diff options
Diffstat (limited to 'fs')
99 files changed, 6241 insertions, 2646 deletions
diff --git a/fs/affs/super.c b/fs/affs/super.c index c70f1e5fc024..022cecb0757d 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -551,7 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) return -EINVAL; } - flush_delayed_work_sync(&sbi->sb_work); + flush_delayed_work(&sbi->sb_work); replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 587ef5123cd8..7ef637d7f3a5 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -351,9 +351,7 @@ void afs_dispatch_give_up_callbacks(struct work_struct *work) */ void afs_flush_callback_breaks(struct afs_server *server) { - cancel_delayed_work(&server->cb_break_work); - queue_delayed_work(afs_callback_update_worker, - &server->cb_break_work, 0); + mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0); } #if 0 diff --git a/fs/afs/server.c b/fs/afs/server.c index d59b7516e943..f342acf3547d 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -285,12 +285,7 @@ static void afs_reap_server(struct work_struct *work) expiry = server->time_of_death + afs_server_timeout; if (expiry > now) { delay = (expiry - now) * HZ; - if (!queue_delayed_work(afs_wq, &afs_server_reaper, - delay)) { - cancel_delayed_work(&afs_server_reaper); - queue_delayed_work(afs_wq, &afs_server_reaper, - delay); - } + mod_delayed_work(afs_wq, &afs_server_reaper, delay); break; } @@ -323,6 +318,5 @@ static void afs_reap_server(struct work_struct *work) void __exit afs_purge_servers(void) { afs_server_timeout = 0; - cancel_delayed_work(&afs_server_reaper); - queue_delayed_work(afs_wq, &afs_server_reaper, 0); + mod_delayed_work(afs_wq, &afs_server_reaper, 0); } diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 431984d2e372..57bcb1596530 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -561,12 +561,7 @@ static void afs_vlocation_reaper(struct work_struct *work) if (expiry > now) { delay = (expiry - now) * HZ; _debug("delay %lu", delay); - if (!queue_delayed_work(afs_wq, &afs_vlocation_reap, - delay)) { - cancel_delayed_work(&afs_vlocation_reap); - queue_delayed_work(afs_wq, &afs_vlocation_reap, - delay); - } + mod_delayed_work(afs_wq, &afs_vlocation_reap, delay); break; } @@ -614,13 +609,10 @@ void afs_vlocation_purge(void) spin_lock(&afs_vlocation_updates_lock); list_del_init(&afs_vlocation_updates); spin_unlock(&afs_vlocation_updates_lock); - cancel_delayed_work(&afs_vlocation_update); - queue_delayed_work(afs_vlocation_update_worker, - &afs_vlocation_update, 0); + mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0); destroy_workqueue(afs_vlocation_update_worker); - cancel_delayed_work(&afs_vlocation_reap); - queue_delayed_work(afs_wq, &afs_vlocation_reap, 0); + mod_delayed_work(afs_wq, &afs_vlocation_reap, 0); } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0d195b507660..9821b672f5a2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -116,7 +116,7 @@ struct btrfs_ordered_sum; #define BTRFS_FREE_SPACE_OBJECTID -11ULL /* - * The inode number assigned to the special inode for sotring + * The inode number assigned to the special inode for storing * free ino cache */ #define BTRFS_FREE_INO_OBJECTID -12ULL diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ab5300595847..c9d703693df0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -18,7 +18,7 @@ #ifndef __DELAYED_REF__ #define __DELAYED_REF__ -/* these are the possible values of struct btrfs_delayed_ref->action */ +/* these are the possible values of struct btrfs_delayed_ref_node->action */ #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ec154f954646..316b07a866d2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1971,8 +1971,8 @@ out: ordered_extent->len - 1, NULL, GFP_NOFS); /* - * This needs to be dont to make sure anybody waiting knows we are done - * upating everything for this ordered extent. + * This needs to be done to make sure anybody waiting knows we are done + * updating everything for this ordered extent. */ btrfs_remove_ordered_extent(inode, ordered_extent); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 38b42e7bc91d..b65015581744 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1371,10 +1371,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, if (srcid) { srcgroup = find_qgroup_rb(fs_info, srcid); - if (!srcgroup) { - ret = -EINVAL; + if (!srcgroup) goto unlock; - } dstgroup->rfer = srcgroup->rfer - level_size; dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; srcgroup->excl = level_size; @@ -1383,10 +1381,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, qgroup_dirty(fs_info, srcgroup); } - if (!inherit) { - ret = -EINVAL; + if (!inherit) goto unlock; - } i_qgroups = (u64 *)(inherit + 1); for (i = 0; i < inherit->num_qgroups; ++i) { diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index a08306a8bec9..2075ddfffa73 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -9,13 +9,14 @@ config CIFS select CRYPTO_ARC4 select CRYPTO_ECB select CRYPTO_DES + select CRYPTO_SHA256 help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block (SMB) protocol, the native file sharing mechanism for most early PC operating systems. The CIFS protocol is fully supported by - file servers such as Windows 2000 (including Windows 2003, NT 4 - and Windows XP) as well by Samba (which provides excellent CIFS + file servers such as Windows 2000 (including Windows 2003, Windows 2008, + NT 4 and Windows XP) as well by Samba (which provides excellent CIFS server support for Linux and many other operating systems). Limited support for OS/2 and Windows ME and similar servers is provided as well. @@ -114,6 +115,13 @@ config CIFS_POSIX (such as Samba 3.10 and later) which can negotiate CIFS POSIX ACL support. If unsure, say N. +config CIFS_ACL + bool "Provide CIFS ACL support" + depends on CIFS_XATTR && KEYS + help + Allows fetching CIFS/NTFS ACL from the server. The DACL blob + is handed over to the application/caller. + config CIFS_DEBUG2 bool "Enable additional CIFS debugging routines" depends on CIFS @@ -138,21 +146,6 @@ config CIFS_DFS_UPCALL IP addresses) which is needed for implicit mounts of DFS junction points. If unsure, say N. -config CIFS_FSCACHE - bool "Provide CIFS client caching support" - depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y - help - Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data - to be cached locally on disk through the general filesystem cache - manager. If unsure, say N. - -config CIFS_ACL - bool "Provide CIFS ACL support" - depends on CIFS_XATTR && KEYS - help - Allows to fetch CIFS/NTFS ACL from the server. The DACL blob - is handed over to the application/caller. - config CIFS_NFSD_EXPORT bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" depends on CIFS && EXPERIMENTAL && BROKEN @@ -161,7 +154,7 @@ config CIFS_NFSD_EXPORT config CIFS_SMB2 bool "SMB2 network file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL && INET && BROKEN + depends on CIFS && EXPERIMENTAL && INET select NLS select KEYS select FSCACHE @@ -178,3 +171,12 @@ config CIFS_SMB2 (compared to cifs) due to protocol improvements. Unless you are a developer or tester, say N. + +config CIFS_FSCACHE + bool "Provide CIFS client caching support" + depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y + help + Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data + to be cached locally on disk through the general filesystem cache + manager. If unsure, say N. + diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index feee94309271..aa0d68b086eb 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -17,4 +17,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \ - smb2misc.o smb2pdu.o smb2inode.o + smb2misc.o smb2pdu.o smb2inode.o smb2file.o diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 7dab9c04ad52..53cf2aabce87 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -328,7 +328,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, } ctoUTF16_out: - return i; + return j; } #ifdef CONFIG_CIFS_SMB2 diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 05f4dc263a23..2ee5c54797fa 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1222,7 +1222,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, if (!open_file) return get_cifs_acl_by_path(cifs_sb, path, pacllen); - pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); + pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->fid.netfid, pacllen); cifsFileInfo_put(open_file); return pntsd; } diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 6a0d741159f0..652f5051be09 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -29,6 +29,7 @@ #include "ntlmssp.h" #include <linux/ctype.h> #include <linux/random.h> +#include <linux/highmem.h> /* * Calculate and return the CIFS signature based on the mac key and SMB PDU. @@ -37,11 +38,13 @@ * the sequence number before this function is called. Also, this function * should be called with the server->srv_mutex held. */ -static int cifs_calc_signature(const struct kvec *iov, int n_vec, +static int cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, char *signature) { int i; int rc; + struct kvec *iov = rqst->rq_iov; + int n_vec = rqst->rq_nvec; if (iov == NULL || signature == NULL || server == NULL) return -EINVAL; @@ -91,6 +94,16 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, } } + /* now hash over the rq_pages array */ + for (i = 0; i < rqst->rq_npages; i++) { + struct kvec p_iov; + + cifs_rqst_page_to_kvec(rqst, i, &p_iov); + crypto_shash_update(&server->secmech.sdescmd5->shash, + p_iov.iov_base, p_iov.iov_len); + kunmap(rqst->rq_pages[i]); + } + rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); if (rc) cERROR(1, "%s: Could not generate md5 hash", __func__); @@ -99,12 +112,12 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec, } /* must be called with server->srv_mutex held */ -int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, +int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { int rc = 0; char smb_signature[20]; - struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; + struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base; if ((cifs_pdu == NULL) || (server == NULL)) return -EINVAL; @@ -125,7 +138,7 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, *pexpected_response_sequence_number = server->sequence_number++; server->sequence_number++; - rc = cifs_calc_signature(iov, n_vec, server, smb_signature); + rc = cifs_calc_signature(rqst, server, smb_signature); if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else @@ -134,6 +147,15 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, return rc; } +int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, + __u32 *pexpected_response_sequence) +{ + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = n_vec }; + + return cifs_sign_rqst(&rqst, server, pexpected_response_sequence); +} + /* must be called with server->srv_mutex held */ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) @@ -147,14 +169,14 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, pexpected_response_sequence_number); } -int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, +int cifs_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 expected_sequence_number) { unsigned int rc; char server_response_sig[8]; char what_we_think_sig_should_be[20]; - struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; + struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base; if (cifs_pdu == NULL || server == NULL) return -EINVAL; @@ -186,8 +208,7 @@ int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, cifs_pdu->Signature.Sequence.Reserved = 0; mutex_lock(&server->srv_mutex); - rc = cifs_calc_signature(iov, nr_iov, server, - what_we_think_sig_should_be); + rc = cifs_calc_signature(rqst, server, what_we_think_sig_should_be); mutex_unlock(&server->srv_mutex); if (rc) @@ -686,12 +707,17 @@ calc_seckey(struct cifs_ses *ses) void cifs_crypto_shash_release(struct TCP_Server_Info *server) { + if (server->secmech.hmacsha256) + crypto_free_shash(server->secmech.hmacsha256); + if (server->secmech.md5) crypto_free_shash(server->secmech.md5); if (server->secmech.hmacmd5) crypto_free_shash(server->secmech.hmacmd5); + kfree(server->secmech.sdeschmacsha256); + kfree(server->secmech.sdeschmacmd5); kfree(server->secmech.sdescmd5); @@ -716,6 +742,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) goto crypto_allocate_md5_fail; } + server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); + if (IS_ERR(server->secmech.hmacsha256)) { + cERROR(1, "could not allocate crypto hmacsha256\n"); + rc = PTR_ERR(server->secmech.hmacsha256); + goto crypto_allocate_hmacsha256_fail; + } + size = sizeof(struct shash_desc) + crypto_shash_descsize(server->secmech.hmacmd5); server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); @@ -727,7 +760,6 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; server->secmech.sdeschmacmd5->shash.flags = 0x0; - size = sizeof(struct shash_desc) + crypto_shash_descsize(server->secmech.md5); server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); @@ -739,12 +771,29 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) server->secmech.sdescmd5->shash.tfm = server->secmech.md5; server->secmech.sdescmd5->shash.flags = 0x0; + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->secmech.hmacsha256); + server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL); + if (!server->secmech.sdeschmacsha256) { + cERROR(1, "%s: Can't alloc hmacsha256\n", __func__); + rc = -ENOMEM; + goto crypto_allocate_hmacsha256_sdesc_fail; + } + server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; + server->secmech.sdeschmacsha256->shash.flags = 0x0; + return 0; +crypto_allocate_hmacsha256_sdesc_fail: + kfree(server->secmech.sdescmd5); + crypto_allocate_md5_sdesc_fail: kfree(server->secmech.sdeschmacmd5); crypto_allocate_hmacmd5_sdesc_fail: + crypto_free_shash(server->secmech.hmacsha256); + +crypto_allocate_hmacsha256_fail: crypto_free_shash(server->secmech.md5); crypto_allocate_md5_fail: diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index db8a404a51dd..a41044a31083 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -36,6 +36,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/namei.h> +#include <linux/random.h> #include <net/ipv6.h> #include "cifsfs.h" #include "cifspdu.h" @@ -51,7 +52,6 @@ #ifdef CONFIG_CIFS_SMB2 #include "smb2pdu.h" #endif -#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ int cifsFYI = 0; int cifsERROR = 1; @@ -89,6 +89,10 @@ extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +#ifdef CONFIG_CIFS_SMB2 +__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; +#endif + static int cifs_read_super(struct super_block *sb) { @@ -160,13 +164,12 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *sb = dentry->d_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - int rc = -EOPNOTSUPP; + struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; + int rc = 0; xid = get_xid(); - buf->f_type = CIFS_MAGIC_NUMBER; - /* * PATH_MAX may be too long - it would presumably be total path, * but note that some servers (includinng Samba 3) have a shorter @@ -178,27 +181,8 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = 0; /* undefined */ buf->f_ffree = 0; /* unlimited */ - /* - * We could add a second check for a QFS Unix capability bit - */ - if ((tcon->ses->capabilities & CAP_UNIX) && - (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability))) - rc = CIFSSMBQFSPosixInfo(xid, tcon, buf); - - /* - * Only need to call the old QFSInfo if failed on newer one, - * e.g. by OS/2. - **/ - if (rc && (tcon->ses->capabilities & CAP_NT_SMBS)) - rc = CIFSSMBQFSInfo(xid, tcon, buf); - - /* - * Some old Windows servers also do not support level 103, retry with - * older level one if old server failed the previous call or we - * bypassed it because we detected that this was an older LANMAN sess - */ - if (rc) - rc = SMBOldQFSInfo(xid, tcon, buf); + if (server->ops->queryfs) + rc = server->ops->queryfs(xid, tcon, buf); free_xid(xid); return 0; @@ -239,9 +223,10 @@ cifs_alloc_inode(struct super_block *sb) return NULL; cifs_inode->cifsAttrs = 0x20; /* default */ cifs_inode->time = 0; - /* Until the file is open and we have gotten oplock - info back from the server, can not assume caching of - file data or metadata */ + /* + * Until the file is open and we have gotten oplock info back from the + * server, can not assume caching of file data or metadata. + */ cifs_set_oplock_level(cifs_inode, 0); cifs_inode->delete_pending = false; cifs_inode->invalid_mapping = false; @@ -249,11 +234,16 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->server_eof = 0; cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; - - /* Can not set i_flags here - they get immediately overwritten - to zero by the VFS */ -/* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;*/ +#ifdef CONFIG_CIFS_SMB2 + get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE); +#endif + /* + * Can not set i_flags here - they get immediately overwritten to zero + * by the VFS. + */ + /* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */ INIT_LIST_HEAD(&cifs_inode->openFileList); + INIT_LIST_HEAD(&cifs_inode->llist); return &cifs_inode->vfs_inode; } @@ -360,7 +350,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) cifs_show_security(s, tcon->ses->server); cifs_show_cache_flavor(s, cifs_sb); - seq_printf(s, ",unc=%s", tcon->treeName); + seq_printf(s, ",unc="); + seq_escape(s, tcon->treeName, " \t\n\\"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) seq_printf(s, ",multiuser"); @@ -957,7 +948,7 @@ cifs_init_once(void *inode) struct cifsInodeInfo *cifsi = inode; inode_init_once(&cifsi->vfs_inode); - mutex_init(&cifsi->lock_mutex); + init_rwsem(&cifsi->lock_sem); } static int @@ -1127,6 +1118,10 @@ init_cifs(void) spin_lock_init(&cifs_file_list_lock); spin_lock_init(&GlobalMid_Lock); +#ifdef CONFIG_CIFS_SMB2 + get_random_bytes(cifs_client_guid, SMB2_CLIENT_GUID_SIZE); +#endif + if (cifs_max_pending < 2) { cifs_max_pending = 2; cFYI(1, "cifs_max_pending set to min of 2"); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 1c49c5a9b27a..7163419cecd9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -128,5 +128,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "1.78" +#define CIFS_VERSION "2.0" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 977dc0e85ccb..f5af2527fc69 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -32,6 +32,8 @@ #include "smb2pdu.h" #endif +#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ + /* * The sizes of various internal tables and strings */ @@ -128,8 +130,10 @@ struct sdesc { struct cifs_secmech { struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ struct crypto_shash *md5; /* md5 hash function */ + struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */ struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ + struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */ }; /* per smb session structure/fields */ @@ -158,9 +162,24 @@ struct cifs_cred { ***************************************************************** */ +/* + * A smb_rqst represents a complete request to be issued to a server. It's + * formed by a kvec array, followed by an array of pages. Page data is assumed + * to start at the beginning of the first page. + */ +struct smb_rqst { + struct kvec *rq_iov; /* array of kvecs */ + unsigned int rq_nvec; /* number of kvecs in array */ + struct page **rq_pages; /* pointer to array of page ptrs */ + unsigned int rq_npages; /* number pages in array */ + unsigned int rq_pagesz; /* page size to use */ + unsigned int rq_tailsz; /* length of last page */ +}; + enum smb_version { Smb_1 = 1, Smb_21, + Smb_30, }; struct mid_q_entry; @@ -171,17 +190,23 @@ struct cifs_tcon; struct dfs_info3_param; struct cifs_fattr; struct smb_vol; +struct cifs_fid; +struct cifs_readdata; +struct cifs_writedata; +struct cifs_io_parms; +struct cifs_search_info; +struct cifsInodeInfo; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, struct mid_q_entry *); bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); /* setup request: allocate mid, sign message */ - int (*setup_request)(struct cifs_ses *, struct kvec *, unsigned int, - struct mid_q_entry **); + struct mid_q_entry *(*setup_request)(struct cifs_ses *, + struct smb_rqst *); /* setup async request: allocate mid, sign message */ - int (*setup_async_request)(struct TCP_Server_Info *, struct kvec *, - unsigned int, struct mid_q_entry **); + struct mid_q_entry *(*setup_async_request)(struct TCP_Server_Info *, + struct smb_rqst *); /* check response: verify signature, map error */ int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, bool); @@ -212,6 +237,10 @@ struct smb_version_operations { bool (*need_neg)(struct TCP_Server_Info *); /* negotiate to the server */ int (*negotiate)(const unsigned int, struct cifs_ses *); + /* set negotiated write size */ + unsigned int (*negotiate_wsize)(struct cifs_tcon *, struct smb_vol *); + /* set negotiated read size */ + unsigned int (*negotiate_rsize)(struct cifs_tcon *, struct smb_vol *); /* setup smb sessionn */ int (*sess_setup)(const unsigned int, struct cifs_ses *, const struct nls_table *); @@ -235,10 +264,22 @@ struct smb_version_operations { int (*query_path_info)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, FILE_ALL_INFO *, bool *); + /* query file data from the server */ + int (*query_file_info)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *, FILE_ALL_INFO *); /* get server index number */ int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, u64 *uniqueid, FILE_ALL_INFO *); + /* set size by path */ + int (*set_path_size)(const unsigned int, struct cifs_tcon *, + const char *, __u64, struct cifs_sb_info *, bool); + /* set size by file handle */ + int (*set_file_size)(const unsigned int, struct cifs_tcon *, + struct cifsFileInfo *, __u64, bool); + /* set attributes */ + int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, + const unsigned int); /* build a full path to the root of the mount */ char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, struct cifs_tcon *); @@ -256,10 +297,84 @@ struct smb_version_operations { /* remove directory */ int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *); + /* unlink file */ + int (*unlink)(const unsigned int, struct cifs_tcon *, const char *, + struct cifs_sb_info *); + /* open, rename and delete file */ + int (*rename_pending_delete)(const char *, struct dentry *, + const unsigned int); + /* send rename request */ + int (*rename)(const unsigned int, struct cifs_tcon *, const char *, + const char *, struct cifs_sb_info *); + /* send create hardlink request */ + int (*create_hardlink)(const unsigned int, struct cifs_tcon *, + const char *, const char *, + struct cifs_sb_info *); + /* open a file for non-posix mounts */ + int (*open)(const unsigned int, struct cifs_tcon *, const char *, int, + int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *, + struct cifs_sb_info *); + /* set fid protocol-specific info */ + void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); + /* close a file */ + void (*close)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *); + /* send a flush request to the server */ + int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); + /* async read from the server */ + int (*async_readv)(struct cifs_readdata *); + /* async write to the server */ + int (*async_writev)(struct cifs_writedata *); + /* sync read from the server */ + int (*sync_read)(const unsigned int, struct cifsFileInfo *, + struct cifs_io_parms *, unsigned int *, char **, + int *); + /* sync write to the server */ + int (*sync_write)(const unsigned int, struct cifsFileInfo *, + struct cifs_io_parms *, unsigned int *, struct kvec *, + unsigned long); + /* open dir, start readdir */ + int (*query_dir_first)(const unsigned int, struct cifs_tcon *, + const char *, struct cifs_sb_info *, + struct cifs_fid *, __u16, + struct cifs_search_info *); + /* continue readdir */ + int (*query_dir_next)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *, + __u16, struct cifs_search_info *srch_inf); + /* close dir */ + int (*close_dir)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *); + /* calculate a size of SMB message */ + unsigned int (*calc_smb_size)(void *); + /* check for STATUS_PENDING and process it in a positive case */ + bool (*is_status_pending)(char *, struct TCP_Server_Info *, int); + /* send oplock break response */ + int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *, + struct cifsInodeInfo *); + /* query remote filesystem */ + int (*queryfs)(const unsigned int, struct cifs_tcon *, + struct kstatfs *); + /* send mandatory brlock to the server */ + int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, + __u64, __u32, int, int, bool); + /* unlock range of mandatory locks */ + int (*mand_unlock_range)(struct cifsFileInfo *, struct file_lock *, + const unsigned int); + /* push brlocks from the cache to the server */ + int (*push_mand_locks)(struct cifsFileInfo *); + /* get lease key of the inode */ + void (*get_lease_key)(struct inode *, struct cifs_fid *fid); + /* set lease key of the inode */ + void (*set_lease_key)(struct inode *, struct cifs_fid *fid); + /* generate new lease key */ + void (*new_lease_key)(struct cifs_fid *fid); }; struct smb_version_values { char *version_string; + __u16 protocol_id; + __u32 req_capabilities; __u32 large_lock_type; __u32 exclusive_lock_type; __u32 shared_lock_type; @@ -496,6 +611,51 @@ get_next_mid(struct TCP_Server_Info *server) } /* + * When the server supports very large reads and writes via POSIX extensions, + * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not + * including the RFC1001 length. + * + * Note that this might make for "interesting" allocation problems during + * writeback however as we have to allocate an array of pointers for the + * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. + * + * For reads, there is a similar problem as we need to allocate an array + * of kvecs to handle the receive, though that should only need to be done + * once. + */ +#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) +#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) + +/* + * When the server doesn't allow large posix writes, only allow a rsize/wsize + * of 2^17-1 minus the size of the call header. That allows for a read or + * write up to the maximum size described by RFC1002. + */ +#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) +#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) + +/* + * The default wsize is 1M. find_get_pages seems to return a maximum of 256 + * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill + * a single wsize request with a single call. + */ +#define CIFS_DEFAULT_IOSIZE (1024 * 1024) + +/* + * Windows only supports a max of 60kb reads and 65535 byte writes. Default to + * those values when posix extensions aren't in force. In actuality here, we + * use 65536 to allow for a write that is a multiple of 4k. Most servers seem + * to be ok with the extra byte even though Windows doesn't send writes that + * are that large. + * + * Citation: + * + * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx + */ +#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) +#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) + +/* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. */ @@ -559,6 +719,7 @@ struct cifs_ses { __u16 session_flags; #endif /* CONFIG_CIFS_SMB2 */ }; + /* no more than one of the following three session flags may be set */ #define CIFS_SES_NT4 1 #define CIFS_SES_OS2 2 @@ -665,6 +826,7 @@ struct cifs_tcon { u64 resource_id; /* server resource id */ struct fscache_cookie *fscache; /* cookie for share */ #endif + struct list_head pending_opens; /* list of incomplete opens */ /* BB add field for back pointer to sb struct(s)? */ }; @@ -707,6 +869,15 @@ cifs_get_tlink(struct tcon_link *tlink) /* This function is always expected to succeed */ extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); +#define CIFS_OPLOCK_NO_CHANGE 0xfe + +struct cifs_pending_open { + struct list_head olist; + struct tcon_link *tlink; + __u8 lease_key[16]; + __u32 oplock; +}; + /* * This info hangs off the cifsFileInfo structure, pointed to by llist. * This is used to track byte stream locks on the file @@ -740,16 +911,29 @@ struct cifs_search_info { bool smallBuf:1; /* so we know which buf_release function to call */ }; +struct cifs_fid { + __u16 netfid; +#ifdef CONFIG_CIFS_SMB2 + __u64 persistent_fid; /* persist file id for smb2 */ + __u64 volatile_fid; /* volatile file id for smb2 */ + __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ +#endif + struct cifs_pending_open *pending_open; +}; + +struct cifs_fid_locks { + struct list_head llist; + struct cifsFileInfo *cfile; /* fid that owns locks */ + struct list_head locks; /* locks held by fid above */ +}; + struct cifsFileInfo { struct list_head tlist; /* pointer to next fid owned by tcon */ struct list_head flist; /* next fid (file instance) for this inode */ - struct list_head llist; /* - * brlocks held by this fid, protected by - * lock_mutex from cifsInodeInfo structure - */ + struct cifs_fid_locks *llist; /* brlocks held by this fid */ unsigned int uid; /* allows finding which FileInfo structure */ __u32 pid; /* process id who opened file */ - __u16 netfid; /* file id from remote */ + struct cifs_fid fid; /* file id from remote */ /* BB add lock scope info here if needed */ ; /* lock scope id (0 if none) */ struct dentry *dentry; @@ -765,12 +949,60 @@ struct cifsFileInfo { struct cifs_io_parms { __u16 netfid; +#ifdef CONFIG_CIFS_SMB2 + __u64 persistent_fid; /* persist file id for smb2 */ + __u64 volatile_fid; /* volatile file id for smb2 */ +#endif __u32 pid; __u64 offset; unsigned int length; struct cifs_tcon *tcon; }; +struct cifs_readdata; + +/* asynchronous read support */ +struct cifs_readdata { + struct kref refcount; + struct list_head list; + struct completion done; + struct cifsFileInfo *cfile; + struct address_space *mapping; + __u64 offset; + unsigned int bytes; + pid_t pid; + int result; + struct work_struct work; + int (*read_into_pages)(struct TCP_Server_Info *server, + struct cifs_readdata *rdata, + unsigned int len); + struct kvec iov; + unsigned int pagesz; + unsigned int tailsz; + unsigned int nr_pages; + struct page *pages[]; +}; + +struct cifs_writedata; + +/* asynchronous write support */ +struct cifs_writedata { + struct kref refcount; + struct list_head list; + struct completion done; + enum writeback_sync_modes sync_mode; + struct work_struct work; + struct cifsFileInfo *cfile; + __u64 offset; + pid_t pid; + unsigned int bytes; + int result; + unsigned int pagesz; + unsigned int tailsz; + unsigned int nr_pages; + struct page *pages[1]; +}; + /* * Take a reference on the file private data. Must be called with * cifs_file_list_lock held. @@ -790,11 +1022,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); struct cifsInodeInfo { bool can_cache_brlcks; - struct mutex lock_mutex; /* - * protect the field above and llist - * from every cifsFileInfo structure - * from openFileList - */ + struct list_head llist; /* locks helb by this inode */ + struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ @@ -806,6 +1035,9 @@ struct cifsInodeInfo { u64 server_eof; /* current file size on server -- protected by i_lock */ u64 uniqueid; /* server inode number */ u64 createtime; /* creation time on server */ +#ifdef CONFIG_CIFS_SMB2 + __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */ +#endif #ifdef CONFIG_CIFS_FSCACHE struct fscache_cookie *fscache; #endif @@ -1130,7 +1362,7 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ -#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2) +#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) /* @@ -1267,7 +1499,13 @@ extern mempool_t *cifs_mid_poolp; #define SMB1_VERSION_STRING "1.0" extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; +#define SMB20_VERSION_STRING "2.0" +/*extern struct smb_version_operations smb20_operations; */ /* not needed yet */ +extern struct smb_version_values smb20_values; #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; +#define SMB30_VERSION_STRING "3.0" +/*extern struct smb_version_operations smb30_operations; */ /* not needed yet */ +extern struct smb_version_values smb30_values; #endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 3fb03e2c8e86..b9d59a948a2c 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2210,7 +2210,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */ __u8 DeletePending; __u8 Directory; __u16 Pad2; - __u64 IndexNumber; + __le64 IndexNumber; __le32 EASize; __le32 AccessFlags; __u64 IndexNumber1; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f1bbf8305d3a..5144e9fbeb8c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -24,6 +24,7 @@ struct statfs; struct smb_vol; +struct smb_rqst; /* ***************************************************************** @@ -35,6 +36,8 @@ extern struct smb_hdr *cifs_buf_get(void); extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); +extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, + struct kvec *iov); extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, unsigned int /* length */); extern unsigned int _get_xid(void); @@ -65,21 +68,22 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata, extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); extern void DeleteMidQEntry(struct mid_q_entry *midEntry); +extern void cifs_delete_mid(struct mid_q_entry *mid); extern void cifs_wake_up_task(struct mid_q_entry *mid); -extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, mid_receive_t *receive, - mid_callback_t *callback, void *cbdata, - const int flags); +extern int cifs_call_async(struct TCP_Server_Info *server, + struct smb_rqst *rqst, + mid_receive_t *receive, mid_callback_t *callback, + void *cbdata, const int flags); extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , int * /* bytes returned */ , const int); extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, char *in_buf, int flags); -extern int cifs_setup_request(struct cifs_ses *, struct kvec *, unsigned int, - struct mid_q_entry **); -extern int cifs_setup_async_request(struct TCP_Server_Info *, struct kvec *, - unsigned int, struct mid_q_entry **); +extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *, + struct smb_rqst *); +extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *, + struct smb_rqst *); extern int cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, @@ -99,7 +103,7 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); -extern unsigned int smbCalcSize(struct smb_hdr *ptr); +extern unsigned int smbCalcSize(void *buf); extern int decode_negTokenInit(unsigned char *security_blob, int length, struct TCP_Server_Info *server); extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); @@ -120,10 +124,14 @@ extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset); extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); - -extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, - struct file *file, struct tcon_link *tlink, - __u32 oplock); +extern int cifs_unlock_range(struct cifsFileInfo *cfile, + struct file_lock *flock, const unsigned int xid); +extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); + +extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, + struct file *file, + struct tcon_link *tlink, + __u32 oplock); extern int cifs_posix_open(char *full_path, struct inode **inode, struct super_block *sb, int mode, unsigned int f_flags, __u32 *oplock, __u16 *netfid, @@ -132,18 +140,23 @@ void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr); extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, struct cifs_sb_info *cifs_sb); +extern void cifs_dir_info_to_fattr(struct cifs_fattr *, FILE_DIRECTORY_INFO *, + struct cifs_sb_info *); extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr); extern struct inode *cifs_iget(struct super_block *sb, struct cifs_fattr *fattr); -extern int cifs_get_file_info(struct file *filp); extern int cifs_get_inode_info(struct inode **inode, const char *full_path, FILE_ALL_INFO *data, struct super_block *sb, int xid, const __u16 *fid); -extern int cifs_get_file_info_unix(struct file *filp); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, struct super_block *sb, unsigned int xid); +extern int cifs_set_file_info(struct inode *inode, struct iattr *attrs, + unsigned int xid, char *full_path, __u32 dosattr); +extern int cifs_rename_pending_delete(const char *full_path, + struct dentry *dentry, + const unsigned int xid); extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, const char *path, const __u16 *pfid); @@ -169,6 +182,17 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data, extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); extern void cifs_umount(struct cifs_sb_info *); extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); +extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, + __u64 length, __u8 type, + struct cifsLockInfo **conf_lock, + bool rw_check); +extern void cifs_add_pending_open(struct cifs_fid *fid, + struct tcon_link *tlink, + struct cifs_pending_open *open); +extern void cifs_add_pending_open_locked(struct cifs_fid *fid, + struct tcon_link *tlink, + struct cifs_pending_open *open); +extern void cifs_del_pending_open(struct cifs_pending_open *open); #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) extern void cifs_dfs_release_automount_timer(void); @@ -179,6 +203,10 @@ extern void cifs_dfs_release_automount_timer(void); void cifs_proc_init(void); void cifs_proc_clean(void); +extern void cifs_move_llist(struct list_head *source, struct list_head *dest); +extern void cifs_free_llist(struct list_head *llist); +extern void cifs_del_lock_waiters(struct cifsLockInfo *lock); + extern int cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses); extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, @@ -190,10 +218,10 @@ extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *); extern int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, - const char *searchName, const struct nls_table *nls_codepage, + const char *searchName, struct cifs_sb_info *cifs_sb, __u16 *searchHandle, __u16 search_flags, struct cifs_search_info *psrch_inf, - int map, const char dirsep); + bool msearch); extern int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, __u16 searchHandle, __u16 search_flags, @@ -265,13 +293,11 @@ extern int CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nls_codepage); #endif /* possibly unneeded function */ extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, - const char *fileName, __u64 size, - bool setAllocationSizeFlag, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *file_name, __u64 size, + struct cifs_sb_info *cifs_sb, bool set_allocation); extern int CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, - __u64 size, __u16 fileHandle, __u32 opener_pid, - bool AllocSizeFlag); + struct cifsFileInfo *cfile, __u64 size, + bool set_allocation); struct cifs_unix_set_info_args { __u64 ctime; @@ -303,22 +329,17 @@ extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, - const char *name, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *name, struct cifs_sb_info *cifs_sb); extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, - const char *fromName, const char *toName, - const struct nls_table *nls_codepage, - int remap_special_chars); + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, int netfid, const char *target_name, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSCreateHardLink(const unsigned int xid, - struct cifs_tcon *tcon, - const char *fromName, const char *toName, - const struct nls_table *nls_codepage, - int remap_special_chars); +extern int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, @@ -367,8 +388,7 @@ extern int CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, const char *buf, const char __user *ubuf, const int long_op); extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, - unsigned int *nbytes, struct kvec *iov, const int nvec, - const int long_op); + unsigned int *nbytes, struct kvec *iov, const int nvec); extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, const char *search_name, __u64 *inode_number, const struct nls_table *nls_codepage, @@ -397,10 +417,12 @@ extern void sesInfoFree(struct cifs_ses *); extern struct cifs_tcon *tconInfoAlloc(void); extern void tconInfoFree(struct cifs_tcon *); -extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); +extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, + __u32 *pexpected_response_sequence_number); extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *, __u32 *); -extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, +extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); +extern int cifs_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 expected_sequence_number); extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *, @@ -462,45 +484,9 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); -/* asynchronous read support */ -struct cifs_readdata { - struct kref refcount; - struct list_head list; - struct completion done; - struct cifsFileInfo *cfile; - struct address_space *mapping; - __u64 offset; - unsigned int bytes; - pid_t pid; - int result; - struct list_head pages; - struct work_struct work; - int (*marshal_iov) (struct cifs_readdata *rdata, - unsigned int remaining); - unsigned int nr_iov; - struct kvec iov[1]; -}; - void cifs_readdata_release(struct kref *refcount); int cifs_async_readv(struct cifs_readdata *rdata); - -/* asynchronous write support */ -struct cifs_writedata { - struct kref refcount; - struct list_head list; - struct completion done; - enum writeback_sync_modes sync_mode; - struct work_struct work; - struct cifsFileInfo *cfile; - __u64 offset; - pid_t pid; - unsigned int bytes; - int result; - void (*marshal_iov) (struct kvec *iov, - struct cifs_writedata *wdata); - unsigned int nr_pages; - struct page *pages[1]; -}; +int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid); int cifs_async_writev(struct cifs_writedata *wdata); void cifs_writev_complete(struct work_struct *work); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f0cf934ba877..76d0d2998850 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -86,32 +86,6 @@ static struct { #endif /* CONFIG_CIFS_WEAK_PW_HASH */ #endif /* CIFS_POSIX */ -#ifdef CONFIG_HIGHMEM -/* - * On arches that have high memory, kmap address space is limited. By - * serializing the kmap operations on those arches, we ensure that we don't - * end up with a bunch of threads in writeback with partially mapped page - * arrays, stuck waiting for kmap to come back. That situation prevents - * progress and can deadlock. - */ -static DEFINE_MUTEX(cifs_kmap_mutex); - -static inline void -cifs_kmap_lock(void) -{ - mutex_lock(&cifs_kmap_mutex); -} - -static inline void -cifs_kmap_unlock(void) -{ - mutex_unlock(&cifs_kmap_mutex); -} -#else /* !CONFIG_HIGHMEM */ -#define cifs_kmap_lock() do { ; } while(0) -#define cifs_kmap_unlock() do { ; } while(0) -#endif /* CONFIG_HIGHMEM */ - /* * Mark as invalid, all open files on tree connections since they * were closed when session to server was lost. @@ -751,6 +725,8 @@ CIFSSMBEcho(struct TCP_Server_Info *server) ECHO_REQ *smb; int rc = 0; struct kvec iov; + struct smb_rqst rqst = { .rq_iov = &iov, + .rq_nvec = 1 }; cFYI(1, "In echo request"); @@ -768,7 +744,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server) iov.iov_base = smb; iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; - rc = cifs_call_async(server, &iov, 1, NULL, cifs_echo_callback, + rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback, server, CIFS_ASYNC_OP | CIFS_ECHO_OP); if (rc) cFYI(1, "Echo request failed: %d", rc); @@ -902,15 +878,15 @@ PsxDelete: } int -CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, - const char *fileName, const struct nls_table *nls_codepage, - int remap) +CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) { DELETE_FILE_REQ *pSMB = NULL; DELETE_FILE_RSP *pSMBr = NULL; int rc = 0; int bytes_returned; int name_len; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; DelFileRetry: rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB, @@ -919,15 +895,15 @@ DelFileRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = - cifsConvertToUTF16((__le16 *) pSMB->fileName, fileName, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->fileName, name, + PATH_MAX, cifs_sb->local_nls, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); + name_len = strnlen(name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->fileName, fileName, name_len); + strncpy(pSMB->fileName, name, name_len); } pSMB->SearchAttributes = cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); @@ -1440,7 +1416,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) return 0; } -static int +int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { int length, len; @@ -1460,10 +1436,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) len = min_t(unsigned int, buflen, server->vals->read_rsp_size) - HEADER_SIZE(server) + 1; - rdata->iov[0].iov_base = buf + HEADER_SIZE(server) - 1; - rdata->iov[0].iov_len = len; + rdata->iov.iov_base = buf + HEADER_SIZE(server) - 1; + rdata->iov.iov_len = len; - length = cifs_readv_from_socket(server, rdata->iov, 1, len); + length = cifs_readv_from_socket(server, &rdata->iov, 1, len); if (length < 0) return length; server->total_read += length; @@ -1509,19 +1485,19 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) len = data_offset - server->total_read; if (len > 0) { /* read any junk before data into the rest of smallbuf */ - rdata->iov[0].iov_base = buf + server->total_read; - rdata->iov[0].iov_len = len; - length = cifs_readv_from_socket(server, rdata->iov, 1, len); + rdata->iov.iov_base = buf + server->total_read; + rdata->iov.iov_len = len; + length = cifs_readv_from_socket(server, &rdata->iov, 1, len); if (length < 0) return length; server->total_read += length; } /* set up first iov for signature check */ - rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = server->total_read; + rdata->iov.iov_base = buf; + rdata->iov.iov_len = server->total_read; cFYI(1, "0: iov_base=%p iov_len=%zu", - rdata->iov[0].iov_base, rdata->iov[0].iov_len); + rdata->iov.iov_base, rdata->iov.iov_len); /* how much data is in the response? */ data_len = server->ops->read_data_length(buf); @@ -1531,23 +1507,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return cifs_readv_discard(server, mid); } - /* marshal up the page array */ - cifs_kmap_lock(); - len = rdata->marshal_iov(rdata, data_len); - cifs_kmap_unlock(); - data_len -= len; - - /* issue the read if we have any iovecs left to fill */ - if (rdata->nr_iov > 1) { - length = cifs_readv_from_socket(server, &rdata->iov[1], - rdata->nr_iov - 1, len); - if (length < 0) - return length; - server->total_read += length; - } else { - length = 0; - } + length = rdata->read_into_pages(server, rdata, data_len); + if (length < 0) + return length; + server->total_read += length; rdata->bytes = length; cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read, @@ -1567,6 +1531,12 @@ cifs_readv_callback(struct mid_q_entry *mid) struct cifs_readdata *rdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; + struct smb_rqst rqst = { .rq_iov = &rdata->iov, + .rq_nvec = 1, + .rq_pages = rdata->pages, + .rq_npages = rdata->nr_pages, + .rq_pagesz = rdata->pagesz, + .rq_tailsz = rdata->tailsz }; cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, mid->mid, mid->mid_state, rdata->result, rdata->bytes); @@ -1578,9 +1548,8 @@ cifs_readv_callback(struct mid_q_entry *mid) (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { int rc = 0; - rc = cifs_verify_signature(rdata->iov, rdata->nr_iov, - server, - mid->sequence_number + 1); + rc = cifs_verify_signature(&rqst, server, + mid->sequence_number + 1); if (rc) cERROR(1, "SMB signature verification returned " "error = %d", rc); @@ -1610,6 +1579,8 @@ cifs_async_readv(struct cifs_readdata *rdata) READ_REQ *smb = NULL; int wct; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); + struct smb_rqst rqst = { .rq_iov = &rdata->iov, + .rq_nvec = 1 }; cFYI(1, "%s: offset=%llu bytes=%u", __func__, rdata->offset, rdata->bytes); @@ -1632,7 +1603,7 @@ cifs_async_readv(struct cifs_readdata *rdata) smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); smb->AndXCommand = 0xFF; /* none */ - smb->Fid = rdata->cfile->netfid; + smb->Fid = rdata->cfile->fid.netfid; smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); if (wct == 12) smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); @@ -1649,13 +1620,12 @@ cifs_async_readv(struct cifs_readdata *rdata) } /* 4 for RFC1001 length + 1 for BCC */ - rdata->iov[0].iov_base = smb; - rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; + rdata->iov.iov_base = smb; + rdata->iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; kref_get(&rdata->refcount); - rc = cifs_call_async(tcon->ses->server, rdata->iov, 1, - cifs_readv_receive, cifs_readv_callback, - rdata, 0); + rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive, + cifs_readv_callback, rdata, 0); if (rc == 0) cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); @@ -1926,6 +1896,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata) { int i, rc; struct inode *inode = wdata->cfile->dentry->d_inode; + struct TCP_Server_Info *server; for (i = 0; i < wdata->nr_pages; i++) { lock_page(wdata->pages[i]); @@ -1933,7 +1904,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata) } do { - rc = cifs_async_writev(wdata); + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + rc = server->ops->async_writev(wdata); } while (rc == -EAGAIN); for (i = 0; i < wdata->nr_pages; i++) { @@ -2053,11 +2025,12 @@ cifs_writev_callback(struct mid_q_entry *mid) int cifs_async_writev(struct cifs_writedata *wdata) { - int i, rc = -EACCES; + int rc = -EACCES; WRITE_REQ *smb = NULL; int wct; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); - struct kvec *iov = NULL; + struct kvec iov; + struct smb_rqst rqst = { }; if (tcon->ses->capabilities & CAP_LARGE_FILES) { wct = 14; @@ -2073,18 +2046,11 @@ cifs_async_writev(struct cifs_writedata *wdata) if (rc) goto async_writev_out; - /* 1 iov per page + 1 for header */ - iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS); - if (iov == NULL) { - rc = -ENOMEM; - goto async_writev_out; - } - smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid); smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); smb->AndXCommand = 0xFF; /* none */ - smb->Fid = wdata->cfile->netfid; + smb->Fid = wdata->cfile->fid.netfid; smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); if (wct == 14) smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); @@ -2096,18 +2062,15 @@ cifs_async_writev(struct cifs_writedata *wdata) cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); /* 4 for RFC1001 length + 1 for BCC */ - iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; - iov[0].iov_base = smb; + iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; + iov.iov_base = smb; - /* - * This function should marshal up the page array into the kvec - * array, reserving [0] for the header. It should kmap the pages - * and set the iov_len properly for each one. It may also set - * wdata->bytes too. - */ - cifs_kmap_lock(); - wdata->marshal_iov(iov, wdata); - cifs_kmap_unlock(); + rqst.rq_iov = &iov; + rqst.rq_nvec = 1; + rqst.rq_pages = wdata->pages; + rqst.rq_npages = wdata->nr_pages; + rqst.rq_pagesz = wdata->pagesz; + rqst.rq_tailsz = wdata->tailsz; cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); @@ -2123,32 +2086,26 @@ cifs_async_writev(struct cifs_writedata *wdata) (struct smb_com_writex_req *)smb; inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); put_bcc(wdata->bytes + 5, &smbw->hdr); - iov[0].iov_len += 4; /* pad bigger by four bytes */ + iov.iov_len += 4; /* pad bigger by four bytes */ } kref_get(&wdata->refcount); - rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, - NULL, cifs_writev_callback, wdata, 0); + rc = cifs_call_async(tcon->ses->server, &rqst, NULL, + cifs_writev_callback, wdata, 0); if (rc == 0) cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); else kref_put(&wdata->refcount, cifs_writedata_release); - /* send is done, unmap pages */ - for (i = 0; i < wdata->nr_pages; i++) - kunmap(wdata->pages[i]); - async_writev_out: cifs_small_buf_release(smb); - kfree(iov); return rc; } int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, - unsigned int *nbytes, struct kvec *iov, int n_vec, - const int long_op) + unsigned int *nbytes, struct kvec *iov, int n_vec) { int rc = -EACCES; WRITE_REQ *pSMB = NULL; @@ -2219,8 +2176,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, iov[0].iov_len = smb_hdr_len + 8; - rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, - long_op); + rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); if (rc) { cFYI(1, "Send error Write2 = %d", rc); @@ -2557,8 +2513,8 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, - const char *fromName, const char *toName, - const struct nls_table *nls_codepage, int remap) + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) { int rc = 0; RENAME_REQ *pSMB = NULL; @@ -2566,6 +2522,7 @@ CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; int name_len, name_len2; __u16 count; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; cFYI(1, "In CIFSSMBRename"); renameRetry: @@ -2580,9 +2537,9 @@ renameRetry: ATTR_DIRECTORY); if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = - cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->OldFileName, + from_name, PATH_MAX, + cifs_sb->local_nls, remap); name_len++; /* trailing null */ name_len *= 2; pSMB->OldFileName[name_len] = 0x04; /* pad */ @@ -2590,17 +2547,18 @@ renameRetry: pSMB->OldFileName[name_len + 1] = 0x00; name_len2 = cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], - toName, PATH_MAX, nls_codepage, remap); + to_name, PATH_MAX, cifs_sb->local_nls, + remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fromName, PATH_MAX); + name_len = strnlen(from_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->OldFileName, fromName, name_len); - name_len2 = strnlen(toName, PATH_MAX); + strncpy(pSMB->OldFileName, from_name, name_len); + name_len2 = strnlen(to_name, PATH_MAX); name_len2++; /* trailing null */ pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ - strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); + strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2); name_len2++; /* trailing null */ name_len2++; /* signature byte */ } @@ -2948,8 +2906,8 @@ createHardLinkRetry: int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, - const char *fromName, const char *toName, - const struct nls_table *nls_codepage, int remap) + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) { int rc = 0; NT_RENAME_REQ *pSMB = NULL; @@ -2957,6 +2915,7 @@ CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; int name_len, name_len2; __u16 count; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; cFYI(1, "In CIFSCreateHardLink"); winCreateHardLinkRetry: @@ -2976,8 +2935,8 @@ winCreateHardLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->OldFileName, from_name, + PATH_MAX, cifs_sb->local_nls, remap); name_len++; /* trailing null */ name_len *= 2; @@ -2986,17 +2945,18 @@ winCreateHardLinkRetry: pSMB->OldFileName[name_len + 1] = 0x00; /* pad */ name_len2 = cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], - toName, PATH_MAX, nls_codepage, remap); + to_name, PATH_MAX, cifs_sb->local_nls, + remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fromName, PATH_MAX); + name_len = strnlen(from_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->OldFileName, fromName, name_len); - name_len2 = strnlen(toName, PATH_MAX); + strncpy(pSMB->OldFileName, from_name, name_len); + name_len2 = strnlen(to_name, PATH_MAX); name_len2++; /* trailing null */ pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ - strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); + strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2); name_len2++; /* trailing null */ name_len2++; /* signature byte */ } @@ -4254,10 +4214,9 @@ UnixQPathInfoRetry: /* xid, tcon, searchName and codepage are input parms, rest are returned */ int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, - const char *searchName, - const struct nls_table *nls_codepage, + const char *searchName, struct cifs_sb_info *cifs_sb, __u16 *pnetfid, __u16 search_flags, - struct cifs_search_info *psrch_inf, int remap, const char dirsep) + struct cifs_search_info *psrch_inf, bool msearch) { /* level 257 SMB_ */ TRANSACTION2_FFIRST_REQ *pSMB = NULL; @@ -4265,8 +4224,9 @@ CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, T2_FFIRST_RSP_PARMS *parms; int rc = 0; int bytes_returned = 0; - int name_len; + int name_len, remap; __u16 params, byte_count; + struct nls_table *nls_codepage; cFYI(1, "In FindFirst for %s", searchName); @@ -4276,6 +4236,9 @@ findFirstRetry: if (rc) return rc; + nls_codepage = cifs_sb->local_nls; + remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, @@ -4284,24 +4247,29 @@ findFirstRetry: it got remapped to 0xF03A as if it were part of the directory name instead of a wildcard */ name_len *= 2; - pSMB->FileName[name_len] = dirsep; - pSMB->FileName[name_len+1] = 0; - pSMB->FileName[name_len+2] = '*'; - pSMB->FileName[name_len+3] = 0; - name_len += 4; /* now the trailing null */ - pSMB->FileName[name_len] = 0; /* null terminate just in case */ - pSMB->FileName[name_len+1] = 0; - name_len += 2; + if (msearch) { + pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb); + pSMB->FileName[name_len+1] = 0; + pSMB->FileName[name_len+2] = '*'; + pSMB->FileName[name_len+3] = 0; + name_len += 4; /* now the trailing null */ + /* null terminate just in case */ + pSMB->FileName[name_len] = 0; + pSMB->FileName[name_len+1] = 0; + name_len += 2; + } } else { /* BB add check for overrun of SMB buf BB */ name_len = strnlen(searchName, PATH_MAX); /* BB fix here and in unicode clause above ie if (name_len > buffersize-header) free buffer exit; BB */ strncpy(pSMB->FileName, searchName, name_len); - pSMB->FileName[name_len] = dirsep; - pSMB->FileName[name_len+1] = '*'; - pSMB->FileName[name_len+2] = 0; - name_len += 3; + if (msearch) { + pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb); + pSMB->FileName[name_len+1] = '*'; + pSMB->FileName[name_len+2] = 0; + name_len += 3; + } } params = 12 + name_len /* includes null */ ; @@ -4389,7 +4357,8 @@ findFirstRetry: psrch_inf->last_entry = psrch_inf->srch_entries_start + lnoff; - *pnetfid = parms->SearchHandle; + if (pnetfid) + *pnetfid = parms->SearchHandle; } else { cifs_buf_release(pSMB); } @@ -5417,16 +5386,16 @@ QFSPosixRetry: } -/* We can not use write of zero bytes trick to - set file size due to need for large file support. Also note that - this SetPathInfo is preferred to SetFileInfo based method in next - routine which is only needed to work around a sharing violation bug - in Samba which this routine can run into */ - +/* + * We can not use write of zero bytes trick to set file size due to need for + * large file support. Also note that this SetPathInfo is preferred to + * SetFileInfo based method in next routine which is only needed to work around + * a sharing violation bugin Samba which this routine can run into. + */ int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, - const char *fileName, __u64 size, bool SetAllocation, - const struct nls_table *nls_codepage, int remap) + const char *file_name, __u64 size, struct cifs_sb_info *cifs_sb, + bool set_allocation) { struct smb_com_transaction2_spi_req *pSMB = NULL; struct smb_com_transaction2_spi_rsp *pSMBr = NULL; @@ -5434,6 +5403,8 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, int name_len; int rc = 0; int bytes_returned = 0; + int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; + __u16 params, byte_count, data_count, param_offset, offset; cFYI(1, "In SetEOF"); @@ -5445,14 +5416,14 @@ SetEOFRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, file_name, + PATH_MAX, cifs_sb->local_nls, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); + name_len = strnlen(file_name, PATH_MAX); name_len++; /* trailing null */ - strncpy(pSMB->FileName, fileName, name_len); + strncpy(pSMB->FileName, file_name, name_len); } params = 6 + name_len; data_count = sizeof(struct file_end_of_file_info); @@ -5466,7 +5437,7 @@ SetEOFRetry: param_offset = offsetof(struct smb_com_transaction2_spi_req, InformationLevel) - 4; offset = param_offset + params; - if (SetAllocation) { + if (set_allocation) { if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); @@ -5513,8 +5484,8 @@ SetEOFRetry: } int -CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, - __u16 fid, __u32 pid_of_opener, bool SetAllocation) +CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *cfile, __u64 size, bool set_allocation) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct file_end_of_file_info *parm_data; @@ -5528,8 +5499,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, if (rc) return rc; - pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); - pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); + pSMB->hdr.Pid = cpu_to_le16((__u16)cfile->pid); + pSMB->hdr.PidHigh = cpu_to_le16((__u16)(cfile->pid >> 16)); params = 6; pSMB->MaxSetupCount = 0; @@ -5558,8 +5529,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, + offset); pSMB->DataOffset = cpu_to_le16(offset); parm_data->FileSize = cpu_to_le64(size); - pSMB->Fid = fid; - if (SetAllocation) { + pSMB->Fid = cfile->fid.netfid; + if (set_allocation) { if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6df6fa14cba8..2fdbe08a7a23 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -82,8 +82,7 @@ enum { Opt_serverino, Opt_noserverino, Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl, Opt_acl, Opt_noacl, Opt_locallease, - Opt_sign, Opt_seal, Opt_direct, - Opt_strictcache, Opt_noac, + Opt_sign, Opt_seal, Opt_noac, Opt_fsc, Opt_mfsymlinks, Opt_multiuser, Opt_sloppy, @@ -160,10 +159,6 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_locallease, "locallease" }, { Opt_sign, "sign" }, { Opt_seal, "seal" }, - { Opt_direct, "direct" }, - { Opt_direct, "directio" }, - { Opt_direct, "forcedirectio" }, - { Opt_strictcache, "strictcache" }, { Opt_noac, "noac" }, { Opt_fsc, "fsc" }, { Opt_mfsymlinks, "mfsymlinks" }, @@ -277,6 +272,7 @@ static const match_table_t cifs_cacheflavor_tokens = { static const match_table_t cifs_smb_version_tokens = { { Smb_1, SMB1_VERSION_STRING }, { Smb_21, SMB21_VERSION_STRING }, + { Smb_30, SMB30_VERSION_STRING }, }; static int ip_connect(struct TCP_Server_Info *server); @@ -819,6 +815,10 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dump_mem("Bad SMB: ", buf, min_t(unsigned int, server->total_read, 48)); + if (server->ops->is_status_pending && + server->ops->is_status_pending(buf, server, length)) + return -1; + if (!mid) return length; @@ -1075,6 +1075,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) vol->ops = &smb21_operations; vol->vals = &smb21_values; break; + case Smb_30: + vol->ops = &smb21_operations; /* currently identical with 2.1 */ + vol->vals = &smb30_values; + break; #endif default: cERROR(1, "Unknown vers= option specified: %s", value); @@ -1101,8 +1105,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, char *string = NULL; char *tmp_end, *value; char delim; - bool cache_specified = false; - static bool cache_warned = false; separator[0] = ','; separator[1] = 0; @@ -1134,6 +1136,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* default to using server inode numbers where available */ vol->server_ino = 1; + /* default is to use strict cifs caching semantics */ + vol->strict_io = true; + vol->actimeo = CIFS_DEF_ACTIMEO; /* FIXME: add autonegotiation -- for now, SMB1 is default */ @@ -1317,22 +1322,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, */ vol->seal = 1; break; - case Opt_direct: - cache_specified = true; - vol->direct_io = true; - vol->strict_io = false; - cERROR(1, "The \"directio\" option will be removed in " - "3.7. Please switch to the \"cache=none\" " - "option."); - break; - case Opt_strictcache: - cache_specified = true; - vol->direct_io = false; - vol->strict_io = true; - cERROR(1, "The \"strictcache\" option will be removed " - "in 3.7. Please switch to the \"cache=strict\" " - "option."); - break; case Opt_noac: printk(KERN_WARNING "CIFS: Mount option noac not " "supported. Instead set " @@ -1676,8 +1665,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (strnicmp(string, "TCP_NODELAY", 11) == 0) + if (strnicmp(string, "TCP_NODELAY", 11) == 0) { + printk(KERN_WARNING "CIFS: the " + "sockopt=TCP_NODELAY option has been " + "deprecated and will be removed " + "in 3.9\n"); vol->sockopt_tcp_nodelay = 1; + } break; case Opt_netbiosname: string = match_strdup(args); @@ -1762,7 +1756,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; break; case Opt_cache: - cache_specified = true; string = match_strdup(args); if (string == NULL) goto out_nomem; @@ -1813,14 +1806,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " "specified with no gid= option.\n"); - /* FIXME: remove this block in 3.7 */ - if (!cache_specified && !cache_warned) { - cache_warned = true; - printk(KERN_NOTICE "CIFS: no cache= option specified, using " - "\"cache=loose\". This default will change " - "to \"cache=strict\" in 3.7.\n"); - } - kfree(mountdata_copy); return 0; @@ -2636,6 +2621,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) tcon->retry = volume_info->retry; tcon->nocase = volume_info->nocase; tcon->local_lease = volume_info->local_lease; + INIT_LIST_HEAD(&tcon->pending_opens); spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); @@ -3261,146 +3247,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, "mount option supported"); } -/* - * When the server supports very large reads and writes via POSIX extensions, - * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not - * including the RFC1001 length. - * - * Note that this might make for "interesting" allocation problems during - * writeback however as we have to allocate an array of pointers for the - * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. - * - * For reads, there is a similar problem as we need to allocate an array - * of kvecs to handle the receive, though that should only need to be done - * once. - */ -#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) -#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) - -/* - * When the server doesn't allow large posix writes, only allow a rsize/wsize - * of 2^17-1 minus the size of the call header. That allows for a read or - * write up to the maximum size described by RFC1002. - */ -#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) -#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) - -/* - * The default wsize is 1M. find_get_pages seems to return a maximum of 256 - * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill - * a single wsize request with a single call. - */ -#define CIFS_DEFAULT_IOSIZE (1024 * 1024) - -/* - * Windows only supports a max of 60kb reads and 65535 byte writes. Default to - * those values when posix extensions aren't in force. In actuality here, we - * use 65536 to allow for a write that is a multiple of 4k. Most servers seem - * to be ok with the extra byte even though Windows doesn't send writes that - * are that large. - * - * Citation: - * - * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx - */ -#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) -#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) - -/* - * On hosts with high memory, we can't currently support wsize/rsize that are - * larger than we can kmap at once. Cap the rsize/wsize at - * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request - * larger than that anyway. - */ -#ifdef CONFIG_HIGHMEM -#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) -#else /* CONFIG_HIGHMEM */ -#define CIFS_KMAP_SIZE_LIMIT (1<<24) -#endif /* CONFIG_HIGHMEM */ - -static unsigned int -cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) -{ - __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); - struct TCP_Server_Info *server = tcon->ses->server; - unsigned int wsize; - - /* start with specified wsize, or default */ - if (pvolume_info->wsize) - wsize = pvolume_info->wsize; - else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) - wsize = CIFS_DEFAULT_IOSIZE; - else - wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; - - /* can server support 24-bit write sizes? (via UNIX extensions) */ - if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) - wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE); - - /* - * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set? - * Limit it to max buffer offered by the server, minus the size of the - * WRITEX header, not including the 4 byte RFC1001 length. - */ - if (!(server->capabilities & CAP_LARGE_WRITE_X) || - (!(server->capabilities & CAP_UNIX) && - (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) - wsize = min_t(unsigned int, wsize, - server->maxBuf - sizeof(WRITE_REQ) + 4); - - /* limit to the amount that we can kmap at once */ - wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); - - /* hard limit of CIFS_MAX_WSIZE */ - wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); - - return wsize; -} - -static unsigned int -cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) -{ - __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); - struct TCP_Server_Info *server = tcon->ses->server; - unsigned int rsize, defsize; - - /* - * Set default value... - * - * HACK alert! Ancient servers have very small buffers. Even though - * MS-CIFS indicates that servers are only limited by the client's - * bufsize for reads, testing against win98se shows that it throws - * INVALID_PARAMETER errors if you try to request too large a read. - * OS/2 just sends back short reads. - * - * If the server doesn't advertise CAP_LARGE_READ_X, then assume that - * it can't handle a read request larger than its MaxBufferSize either. - */ - if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) - defsize = CIFS_DEFAULT_IOSIZE; - else if (server->capabilities & CAP_LARGE_READ_X) - defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; - else - defsize = server->maxBuf - sizeof(READ_RSP); - - rsize = pvolume_info->rsize ? pvolume_info->rsize : defsize; - - /* - * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to - * the client's MaxBufferSize. - */ - if (!(server->capabilities & CAP_LARGE_READ_X)) - rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); - - /* limit to the amount that we can kmap at once */ - rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); - - /* hard limit of CIFS_MAX_RSIZE */ - rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); - - return rsize; -} - static void cleanup_volume_info_contents(struct smb_vol *volume_info) { @@ -3651,8 +3497,8 @@ try_mount_again: if (!tcon->ipc && server->ops->qfs_tcon) server->ops->qfs_tcon(xid, tcon); - cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); - cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); + cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info); + cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info); /* tune readahead according to rsize */ cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 781025be48bc..7c0a81283645 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -160,17 +160,18 @@ check_name(struct dentry *direntry) static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, struct tcon_link *tlink, unsigned oflags, umode_t mode, - __u32 *oplock, __u16 *fileHandle, int *created) + __u32 *oplock, struct cifs_fid *fid, int *created) { int rc = -ENOENT; int create_options = CREATE_NOT_DIR; - int desiredAccess; + int desired_access; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_tcon *tcon = tlink_tcon(tlink); char *full_path = NULL; FILE_ALL_INFO *buf = NULL; struct inode *newinode = NULL; int disposition; + struct TCP_Server_Info *server = tcon->ses->server; *oplock = 0; if (tcon->ses->server->oplocks) @@ -185,8 +186,8 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { - rc = cifs_posix_open(full_path, &newinode, - inode->i_sb, mode, oflags, oplock, fileHandle, xid); + rc = cifs_posix_open(full_path, &newinode, inode->i_sb, mode, + oflags, oplock, &fid->netfid, xid); switch (rc) { case 0: if (newinode == NULL) { @@ -202,7 +203,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, * close it and proceed as if it were a normal * lookup. */ - CIFSSMBClose(xid, tcon, *fileHandle); + CIFSSMBClose(xid, tcon, fid->netfid); goto cifs_create_get_file_info; } /* success, no need to query */ @@ -244,11 +245,11 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, */ } - desiredAccess = 0; + desired_access = 0; if (OPEN_FMODE(oflags) & FMODE_READ) - desiredAccess |= GENERIC_READ; /* is this too little? */ + desired_access |= GENERIC_READ; /* is this too little? */ if (OPEN_FMODE(oflags) & FMODE_WRITE) - desiredAccess |= GENERIC_WRITE; + desired_access |= GENERIC_WRITE; disposition = FILE_OVERWRITE_IF; if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) @@ -260,8 +261,15 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, else cFYI(1, "Create flag not set in create function"); - /* BB add processing to set equivalent of mode - e.g. via CreateX with - ACLs */ + /* + * BB add processing to set equivalent of mode - e.g. via CreateX with + * ACLs + */ + + if (!server->ops->open) { + rc = -ENOSYS; + goto out; + } buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { @@ -279,28 +287,18 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - if (tcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBOpen(xid, tcon, full_path, disposition, - desiredAccess, create_options, - fileHandle, oplock, buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - else - rc = -EIO; /* no NT SMB support fall into legacy open below */ - - if (rc == -EIO) { - /* old server, retry the open legacy style */ - rc = SMBLegacyOpen(xid, tcon, full_path, disposition, - desiredAccess, create_options, - fileHandle, oplock, buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } + rc = server->ops->open(xid, tcon, full_path, disposition, + desired_access, create_options, fid, oplock, + buf, cifs_sb); if (rc) { cFYI(1, "cifs_create returned 0x%x", rc); goto out; } - /* If Open reported that we actually created a file - then we now have to set the mode if possible */ + /* + * If Open reported that we actually created a file then we now have to + * set the mode if possible. + */ if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { struct cifs_unix_set_info_args args = { .mode = mode, @@ -321,11 +319,13 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle, - current->tgid); + CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid, + current->tgid); } else { - /* BB implement mode setting via Windows security - descriptors e.g. */ + /* + * BB implement mode setting via Windows security + * descriptors e.g. + */ /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/ /* Could set r/o dos attribute if mode & 0222 == 0 */ @@ -334,12 +334,14 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, cifs_create_get_file_info: /* server might mask mode so we have to query for it */ if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb, xid); + rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, + xid); else { - rc = cifs_get_inode_info(&newinode, full_path, buf, - inode->i_sb, xid, fileHandle); + rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb, + xid, &fid->netfid); if (newinode) { + if (server->ops->set_lease_key) + server->ops->set_lease_key(newinode, fid); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) newinode->i_mode = mode; if ((*oplock & CIFS_CREATE_ACTION) && @@ -356,7 +358,8 @@ cifs_create_get_file_info: cifs_create_set_dentry: if (rc != 0) { cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); - CIFSSMBClose(xid, tcon, *fileHandle); + if (server->ops->close) + server->ops->close(xid, tcon, fid); goto out; } d_drop(direntry); @@ -377,11 +380,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, unsigned int xid; struct tcon_link *tlink; struct cifs_tcon *tcon; - __u16 fileHandle; + struct TCP_Server_Info *server; + struct cifs_fid fid; + struct cifs_pending_open open; __u32 oplock; - struct cifsFileInfo *pfile_info; + struct cifsFileInfo *file_info; - /* Posix open is only called (at lookup time) for file create now. For + /* + * Posix open is only called (at lookup time) for file create now. For * opens (rather than creates), because we do not know if it is a file * or directory yet, and current Samba no longer allows us to do posix * open on dirs, we could end up wasting an open call on what turns out @@ -413,22 +419,34 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, goto out_free_xid; tcon = tlink_tcon(tlink); + server = tcon->ses->server; + + if (server->ops->new_lease_key) + server->ops->new_lease_key(&fid); + + cifs_add_pending_open(&fid, tlink, &open); rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fileHandle, opened); + &oplock, &fid, opened); - if (rc) + if (rc) { + cifs_del_pending_open(&open); goto out; + } rc = finish_open(file, direntry, generic_file_open, opened); if (rc) { - CIFSSMBClose(xid, tcon, fileHandle); + if (server->ops->close) + server->ops->close(xid, tcon, &fid); + cifs_del_pending_open(&open); goto out; } - pfile_info = cifs_new_fileinfo(fileHandle, file, tlink, oplock); - if (pfile_info == NULL) { - CIFSSMBClose(xid, tcon, fileHandle); + file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); + if (file_info == NULL) { + if (server->ops->close) + server->ops->close(xid, tcon, &fid); + cifs_del_pending_open(&open); rc = -ENOMEM; } @@ -453,7 +471,9 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, */ unsigned oflags = O_EXCL | O_CREAT | O_RDWR; struct tcon_link *tlink; - __u16 fileHandle; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + struct cifs_fid fid; __u32 oplock; int created = FILE_CREATED; @@ -465,10 +485,16 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, if (IS_ERR(tlink)) goto out_free_xid; + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + + if (server->ops->new_lease_key) + server->ops->new_lease_key(&fid); + rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fileHandle, &created); - if (!rc) - CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle); + &oplock, &fid, &created); + if (!rc && server->ops->close) + server->ops->close(xid, tcon, &fid); cifs_put_tlink(tlink); out_free_xid: diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 71e9ad9f5961..7d7bbdc4c8e7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -169,16 +169,20 @@ posix_open_ret: static int cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, - struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock, - __u16 *pnetfid, unsigned int xid) + struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, + struct cifs_fid *fid, unsigned int xid) { int rc; - int desiredAccess; + int desired_access; int disposition; int create_options = CREATE_NOT_DIR; FILE_ALL_INFO *buf; + struct TCP_Server_Info *server = tcon->ses->server; + + if (!server->ops->open) + return -ENOSYS; - desiredAccess = cifs_convert_flags(f_flags); + desired_access = cifs_convert_flags(f_flags); /********************************************************************* * open flag mapping table: @@ -215,16 +219,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - if (tcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBOpen(xid, tcon, full_path, disposition, - desiredAccess, create_options, pnetfid, poplock, buf, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags - & CIFS_MOUNT_MAP_SPECIAL_CHR); - else - rc = SMBLegacyOpen(xid, tcon, full_path, disposition, - desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags - & CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = server->ops->open(xid, tcon, full_path, disposition, + desired_access, create_options, fid, oplock, buf, + cifs_sb); if (rc) goto out; @@ -234,7 +231,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, xid); else rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, - xid, pnetfid); + xid, &fid->netfid); out: kfree(buf); @@ -242,48 +239,62 @@ out: } struct cifsFileInfo * -cifs_new_fileinfo(__u16 fileHandle, struct file *file, +cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - struct cifsInodeInfo *pCifsInode = CIFS_I(inode); - struct cifsFileInfo *pCifsFile; - - pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); - if (pCifsFile == NULL) - return pCifsFile; - - pCifsFile->count = 1; - pCifsFile->netfid = fileHandle; - pCifsFile->pid = current->tgid; - pCifsFile->uid = current_fsuid(); - pCifsFile->dentry = dget(dentry); - pCifsFile->f_flags = file->f_flags; - pCifsFile->invalidHandle = false; - pCifsFile->tlink = cifs_get_tlink(tlink); - mutex_init(&pCifsFile->fh_mutex); - INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); - INIT_LIST_HEAD(&pCifsFile->llist); + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifsFileInfo *cfile; + struct cifs_fid_locks *fdlocks; + struct cifs_tcon *tcon = tlink_tcon(tlink); + + cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); + if (cfile == NULL) + return cfile; + + fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); + if (!fdlocks) { + kfree(cfile); + return NULL; + } + + INIT_LIST_HEAD(&fdlocks->locks); + fdlocks->cfile = cfile; + cfile->llist = fdlocks; + down_write(&cinode->lock_sem); + list_add(&fdlocks->llist, &cinode->llist); + up_write(&cinode->lock_sem); + + cfile->count = 1; + cfile->pid = current->tgid; + cfile->uid = current_fsuid(); + cfile->dentry = dget(dentry); + cfile->f_flags = file->f_flags; + cfile->invalidHandle = false; + cfile->tlink = cifs_get_tlink(tlink); + INIT_WORK(&cfile->oplock_break, cifs_oplock_break); + mutex_init(&cfile->fh_mutex); spin_lock(&cifs_file_list_lock); - list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList)); + if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE) + oplock = fid->pending_open->oplock; + list_del(&fid->pending_open->olist); + + tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); + + list_add(&cfile->tlist, &tcon->openFileList); /* if readable file instance put first in list*/ if (file->f_mode & FMODE_READ) - list_add(&pCifsFile->flist, &pCifsInode->openFileList); + list_add(&cfile->flist, &cinode->openFileList); else - list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); + list_add_tail(&cfile->flist, &cinode->openFileList); spin_unlock(&cifs_file_list_lock); - cifs_set_oplock_level(pCifsInode, oplock); - pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll; - - file->private_data = pCifsFile; - return pCifsFile; + file->private_data = cfile; + return cfile; } -static void cifs_del_lock_waiters(struct cifsLockInfo *lock); - struct cifsFileInfo * cifsFileInfo_get(struct cifsFileInfo *cifs_file) { @@ -302,9 +313,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { struct inode *inode = cifs_file->dentry->d_inode; struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + struct TCP_Server_Info *server = tcon->ses->server; struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsLockInfo *li, *tmp; + struct cifs_fid fid; + struct cifs_pending_open open; spin_lock(&cifs_file_list_lock); if (--cifs_file->count > 0) { @@ -312,6 +326,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) return; } + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + + /* store open in pending opens to make sure we don't miss lease break */ + cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); + /* remove it from the lists */ list_del(&cifs_file->flist); list_del(&cifs_file->tlist); @@ -319,13 +339,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) if (list_empty(&cifsi->openFileList)) { cFYI(1, "closing last open instance for inode %p", cifs_file->dentry->d_inode); - - /* in strict cache mode we need invalidate mapping on the last - close because it may cause a error when we open this file - again and get at least level II oplock */ + /* + * In strict cache mode we need invalidate mapping on the last + * close because it may cause a error when we open this file + * again and get at least level II oplock. + */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) CIFS_I(inode)->invalid_mapping = true; - cifs_set_oplock_level(cifsi, 0); } spin_unlock(&cifs_file_list_lock); @@ -333,23 +353,30 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) cancel_work_sync(&cifs_file->oplock_break); if (!tcon->need_reconnect && !cifs_file->invalidHandle) { + struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; - int rc; + xid = get_xid(); - rc = CIFSSMBClose(xid, tcon, cifs_file->netfid); - free_xid(xid); + if (server->ops->close) + server->ops->close(xid, tcon, &cifs_file->fid); + _free_xid(xid); } - /* Delete any outstanding lock records. We'll lose them when the file + cifs_del_pending_open(&open); + + /* + * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. */ - mutex_lock(&cifsi->lock_mutex); - list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) { + down_write(&cifsi->lock_sem); + list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { list_del(&li->llist); cifs_del_lock_waiters(li); kfree(li); } - mutex_unlock(&cifsi->lock_mutex); + list_del(&cifs_file->llist->llist); + kfree(cifs_file->llist); + up_write(&cifsi->lock_sem); cifs_put_tlink(cifs_file->tlink); dput(cifs_file->dentry); @@ -357,17 +384,20 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) } int cifs_open(struct inode *inode, struct file *file) + { int rc = -EACCES; unsigned int xid; __u32 oplock; struct cifs_sb_info *cifs_sb; + struct TCP_Server_Info *server; struct cifs_tcon *tcon; struct tcon_link *tlink; - struct cifsFileInfo *pCifsFile = NULL; + struct cifsFileInfo *cfile = NULL; char *full_path = NULL; bool posix_open_ok = false; - __u16 netfid; + struct cifs_fid fid; + struct cifs_pending_open open; xid = get_xid(); @@ -378,6 +408,7 @@ int cifs_open(struct inode *inode, struct file *file) return PTR_ERR(tlink); } tcon = tlink_tcon(tlink); + server = tcon->ses->server; full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { @@ -388,7 +419,7 @@ int cifs_open(struct inode *inode, struct file *file) cFYI(1, "inode = 0x%p file flags are 0x%x for %s", inode, file->f_flags, full_path); - if (tcon->ses->server->oplocks) + if (server->oplocks) oplock = REQ_OPLOCK; else oplock = 0; @@ -399,7 +430,7 @@ int cifs_open(struct inode *inode, struct file *file) /* can not refresh inode info since size could be stale */ rc = cifs_posix_open(full_path, &inode, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, - file->f_flags, &oplock, &netfid, xid); + file->f_flags, &oplock, &fid.netfid, xid); if (rc == 0) { cFYI(1, "posix open succeeded"); posix_open_ok = true; @@ -415,20 +446,34 @@ int cifs_open(struct inode *inode, struct file *file) } else if ((rc != -EIO) && (rc != -EREMOTE) && (rc != -EOPNOTSUPP)) /* path not found or net err */ goto out; - /* else fallthrough to retry open the old way on network i/o - or DFS errors */ + /* + * Else fallthrough to retry open the old way on network i/o + * or DFS errors. + */ } + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + + cifs_add_pending_open(&fid, tlink, &open); + if (!posix_open_ok) { + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, - file->f_flags, &oplock, &netfid, xid); - if (rc) + file->f_flags, &oplock, &fid, xid); + if (rc) { + cifs_del_pending_open(&open); goto out; + } } - pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock); - if (pCifsFile == NULL) { - CIFSSMBClose(xid, tcon, netfid); + cfile = cifs_new_fileinfo(&fid, file, tlink, oplock); + if (cfile == NULL) { + if (server->ops->close) + server->ops->close(xid, tcon, &fid); + cifs_del_pending_open(&open); rc = -ENOMEM; goto out; } @@ -436,8 +481,10 @@ int cifs_open(struct inode *inode, struct file *file) cifs_fscache_set_inode_cookie(inode, file); if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { - /* time to set mode which we can not set earlier due to - problems creating new read-only files */ + /* + * Time to set mode which we can not set earlier due to + * problems creating new read-only files. + */ struct cifs_unix_set_info_args args = { .mode = inode->i_mode, .uid = NO_CHANGE_64, @@ -447,8 +494,8 @@ int cifs_open(struct inode *inode, struct file *file) .mtime = NO_CHANGE_64, .device = 0, }; - CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid, - pCifsFile->pid); + CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, + cfile->pid); } out: @@ -458,59 +505,66 @@ out: return rc; } -/* Try to reacquire byte range locks that were released when session */ -/* to server was lost */ +/* + * Try to reacquire byte range locks that were released when session + * to server was lost + */ static int cifs_relock_file(struct cifsFileInfo *cifsFile) { int rc = 0; -/* BB list all locks open on this file and relock */ + /* BB list all locks open on this file and relock */ return rc; } -static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) +static int +cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) { int rc = -EACCES; unsigned int xid; __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - struct cifsInodeInfo *pCifsInode; + struct TCP_Server_Info *server; + struct cifsInodeInfo *cinode; struct inode *inode; char *full_path = NULL; - int desiredAccess; + int desired_access; int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; - __u16 netfid; + struct cifs_fid fid; xid = get_xid(); - mutex_lock(&pCifsFile->fh_mutex); - if (!pCifsFile->invalidHandle) { - mutex_unlock(&pCifsFile->fh_mutex); + mutex_lock(&cfile->fh_mutex); + if (!cfile->invalidHandle) { + mutex_unlock(&cfile->fh_mutex); rc = 0; free_xid(xid); return rc; } - inode = pCifsFile->dentry->d_inode; + inode = cfile->dentry->d_inode; cifs_sb = CIFS_SB(inode->i_sb); - tcon = tlink_tcon(pCifsFile->tlink); + tcon = tlink_tcon(cfile->tlink); + server = tcon->ses->server; -/* can not grab rename sem here because various ops, including - those that already have the rename sem can end up causing writepage - to get called and if the server was down that means we end up here, - and we can never tell if the caller already has the rename_sem */ - full_path = build_path_from_dentry(pCifsFile->dentry); + /* + * Can not grab rename sem here because various ops, including those + * that already have the rename sem can end up causing writepage to get + * called and if the server was down that means we end up here, and we + * can never tell if the caller already has the rename_sem. + */ + full_path = build_path_from_dentry(cfile->dentry); if (full_path == NULL) { rc = -ENOMEM; - mutex_unlock(&pCifsFile->fh_mutex); + mutex_unlock(&cfile->fh_mutex); free_xid(xid); return rc; } - cFYI(1, "inode = 0x%p file flags 0x%x for %s", - inode, pCifsFile->f_flags, full_path); + cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags, + full_path); if (tcon->ses->server->oplocks) oplock = REQ_OPLOCK; @@ -524,69 +578,72 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) * O_CREAT, O_EXCL and O_TRUNC already had their effect on the * original open. Must mask them off for a reopen. */ - unsigned int oflags = pCifsFile->f_flags & + unsigned int oflags = cfile->f_flags & ~(O_CREAT | O_EXCL | O_TRUNC); rc = cifs_posix_open(full_path, NULL, inode->i_sb, - cifs_sb->mnt_file_mode /* ignored */, - oflags, &oplock, &netfid, xid); + cifs_sb->mnt_file_mode /* ignored */, + oflags, &oplock, &fid.netfid, xid); if (rc == 0) { cFYI(1, "posix reopen succeeded"); goto reopen_success; } - /* fallthrough to retry open the old way on errors, especially - in the reconnect path it is important to retry hard */ + /* + * fallthrough to retry open the old way on errors, especially + * in the reconnect path it is important to retry hard + */ } - desiredAccess = cifs_convert_flags(pCifsFile->f_flags); + desired_access = cifs_convert_flags(cfile->f_flags); if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - /* Can not refresh inode by passing in file_info buf to be returned - by SMBOpen and then calling get_inode_info with returned buf - since file might have write behind data that needs to be flushed - and server version of file size can be stale. If we knew for sure - that inode was not dirty locally we could do this */ + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); - rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess, - create_options, &netfid, &oplock, NULL, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + /* + * Can not refresh inode by passing in file_info buf to be returned by + * CIFSSMBOpen and then calling get_inode_info with returned buf since + * file might have write behind data that needs to be flushed and server + * version of file size can be stale. If we knew for sure that inode was + * not dirty locally we could do this. + */ + rc = server->ops->open(xid, tcon, full_path, disposition, + desired_access, create_options, &fid, &oplock, + NULL, cifs_sb); if (rc) { - mutex_unlock(&pCifsFile->fh_mutex); - cFYI(1, "cifs_open returned 0x%x", rc); + mutex_unlock(&cfile->fh_mutex); + cFYI(1, "cifs_reopen returned 0x%x", rc); cFYI(1, "oplock: %d", oplock); goto reopen_error_exit; } reopen_success: - pCifsFile->netfid = netfid; - pCifsFile->invalidHandle = false; - mutex_unlock(&pCifsFile->fh_mutex); - pCifsInode = CIFS_I(inode); + cfile->invalidHandle = false; + mutex_unlock(&cfile->fh_mutex); + cinode = CIFS_I(inode); if (can_flush) { rc = filemap_write_and_wait(inode->i_mapping); mapping_set_error(inode->i_mapping, rc); if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&inode, - full_path, inode->i_sb, xid); + rc = cifs_get_inode_info_unix(&inode, full_path, + inode->i_sb, xid); else - rc = cifs_get_inode_info(&inode, - full_path, NULL, inode->i_sb, - xid, NULL); - } /* else we are writing out data to server already - and could deadlock if we tried to flush data, and - since we do not know if we have data that would - invalidate the current end of file on the server - we can not go to the server to get the new inod - info */ - - cifs_set_oplock_level(pCifsInode, oplock); + rc = cifs_get_inode_info(&inode, full_path, NULL, + inode->i_sb, xid, NULL); + } + /* + * Else we are writing out data to server already and could deadlock if + * we tried to flush data, and since we do not know if we have data that + * would invalidate the current end of file on the server we can not go + * to the server to get the new inode info. + */ - cifs_relock_file(pCifsFile); + server->ops->set_fid(cfile, &fid, oplock); + cifs_relock_file(cfile); reopen_error_exit: kfree(full_path); @@ -609,42 +666,48 @@ int cifs_closedir(struct inode *inode, struct file *file) { int rc = 0; unsigned int xid; - struct cifsFileInfo *pCFileStruct = file->private_data; - char *ptmp; + struct cifsFileInfo *cfile = file->private_data; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + char *buf; cFYI(1, "Closedir inode = 0x%p", inode); + if (cfile == NULL) + return rc; + xid = get_xid(); + tcon = tlink_tcon(cfile->tlink); + server = tcon->ses->server; - if (pCFileStruct) { - struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink); + cFYI(1, "Freeing private data in close dir"); + spin_lock(&cifs_file_list_lock); + if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + cfile->invalidHandle = true; + spin_unlock(&cifs_file_list_lock); + if (server->ops->close_dir) + rc = server->ops->close_dir(xid, tcon, &cfile->fid); + else + rc = -ENOSYS; + cFYI(1, "Closing uncompleted readdir with rc %d", rc); + /* not much we can do if it fails anyway, ignore rc */ + rc = 0; + } else + spin_unlock(&cifs_file_list_lock); - cFYI(1, "Freeing private data in close dir"); - spin_lock(&cifs_file_list_lock); - if (!pCFileStruct->srch_inf.endOfSearch && - !pCFileStruct->invalidHandle) { - pCFileStruct->invalidHandle = true; - spin_unlock(&cifs_file_list_lock); - rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); - cFYI(1, "Closing uncompleted readdir with rc %d", - rc); - /* not much we can do if it fails anyway, ignore rc */ - rc = 0; - } else - spin_unlock(&cifs_file_list_lock); - ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; - if (ptmp) { - cFYI(1, "closedir free smb buf in srch struct"); - pCFileStruct->srch_inf.ntwrk_buf_start = NULL; - if (pCFileStruct->srch_inf.smallBuf) - cifs_small_buf_release(ptmp); - else - cifs_buf_release(ptmp); - } - cifs_put_tlink(pCFileStruct->tlink); - kfree(file->private_data); - file->private_data = NULL; + buf = cfile->srch_inf.ntwrk_buf_start; + if (buf) { + cFYI(1, "closedir free smb buf in srch struct"); + cfile->srch_inf.ntwrk_buf_start = NULL; + if (cfile->srch_inf.smallBuf) + cifs_small_buf_release(buf); + else + cifs_buf_release(buf); } + + cifs_put_tlink(cfile->tlink); + kfree(file->private_data); + file->private_data = NULL; /* BB can we lock the filestruct while this is going on? */ free_xid(xid); return rc; @@ -666,7 +729,7 @@ cifs_lock_init(__u64 offset, __u64 length, __u8 type) return lock; } -static void +void cifs_del_lock_waiters(struct cifsLockInfo *lock) { struct cifsLockInfo *li, *tmp; @@ -677,45 +740,47 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock) } static bool -cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, - __u64 length, __u8 type, struct cifsFileInfo *cur, - struct cifsLockInfo **conf_lock) +cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, + __u64 length, __u8 type, struct cifsFileInfo *cfile, + struct cifsLockInfo **conf_lock, bool rw_check) { struct cifsLockInfo *li; + struct cifsFileInfo *cur_cfile = fdlocks->cfile; struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; - list_for_each_entry(li, &cfile->llist, llist) { + list_for_each_entry(li, &fdlocks->locks, llist) { if (offset + length <= li->offset || offset >= li->offset + li->length) continue; - else if ((type & server->vals->shared_lock_type) && - ((server->ops->compare_fids(cur, cfile) && - current->tgid == li->pid) || type == li->type)) + if (rw_check && server->ops->compare_fids(cfile, cur_cfile) && + current->tgid == li->pid) continue; - else { + if ((type & server->vals->shared_lock_type) && + ((server->ops->compare_fids(cfile, cur_cfile) && + current->tgid == li->pid) || type == li->type)) + continue; + if (conf_lock) *conf_lock = li; - return true; - } + return true; } return false; } -static bool +bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, - __u8 type, struct cifsLockInfo **conf_lock) + __u8 type, struct cifsLockInfo **conf_lock, + bool rw_check) { bool rc = false; - struct cifsFileInfo *fid, *tmp; + struct cifs_fid_locks *cur; struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); - spin_lock(&cifs_file_list_lock); - list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) { - rc = cifs_find_fid_lock_conflict(fid, offset, length, type, - cfile, conf_lock); + list_for_each_entry(cur, &cinode->llist, llist) { + rc = cifs_find_fid_lock_conflict(cur, offset, length, type, + cfile, conf_lock, rw_check); if (rc) break; } - spin_unlock(&cifs_file_list_lock); return rc; } @@ -737,10 +802,10 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; bool exist; - mutex_lock(&cinode->lock_mutex); + down_read(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, offset, length, type, - &conf_lock); + &conf_lock, false); if (exist) { flock->fl_start = conf_lock->offset; flock->fl_end = conf_lock->offset + conf_lock->length - 1; @@ -754,7 +819,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, else flock->fl_type = F_UNLCK; - mutex_unlock(&cinode->lock_mutex); + up_read(&cinode->lock_sem); return rc; } @@ -762,9 +827,9 @@ static void cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) { struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); - mutex_lock(&cinode->lock_mutex); - list_add_tail(&lock->llist, &cfile->llist); - mutex_unlock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); + list_add_tail(&lock->llist, &cfile->llist->locks); + up_write(&cinode->lock_sem); } /* @@ -784,13 +849,13 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, try_again: exist = false; - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, - lock->type, &conf_lock); + lock->type, &conf_lock, false); if (!exist && cinode->can_cache_brlcks) { - list_add_tail(&lock->llist, &cfile->llist); - mutex_unlock(&cinode->lock_mutex); + list_add_tail(&lock->llist, &cfile->llist->locks); + up_write(&cinode->lock_sem); return rc; } @@ -800,17 +865,17 @@ try_again: rc = -EACCES; else { list_add_tail(&lock->blist, &conf_lock->blist); - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); rc = wait_event_interruptible(lock->block_q, (lock->blist.prev == &lock->blist) && (lock->blist.next == &lock->blist)); if (!rc) goto try_again; - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); list_del_init(&lock->blist); } - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); return rc; } @@ -831,7 +896,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) if ((flock->fl_flags & FL_POSIX) == 0) return 1; - mutex_lock(&cinode->lock_mutex); + down_read(&cinode->lock_sem); posix_test_lock(file, flock); if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { @@ -839,7 +904,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) rc = 1; } - mutex_unlock(&cinode->lock_mutex); + up_read(&cinode->lock_sem); return rc; } @@ -859,14 +924,14 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) return rc; try_again: - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); return rc; } rc = posix_lock_file(file, flock, NULL); - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); if (rc == FILE_LOCK_DEFERRED) { rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); if (!rc) @@ -876,7 +941,7 @@ try_again: return rc; } -static int +int cifs_push_mandatory_locks(struct cifsFileInfo *cfile) { unsigned int xid; @@ -893,9 +958,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) xid = get_xid(); tcon = tlink_tcon(cfile->tlink); - mutex_lock(&cinode->lock_mutex); + /* we are going to update can_cache_brlcks here - need a write access */ + down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return rc; } @@ -906,7 +972,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) */ max_buf = tcon->ses->server->maxBuf; if (!max_buf) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return -EINVAL; } @@ -915,7 +981,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) sizeof(LOCKING_ANDX_RANGE); buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return -ENOMEM; } @@ -923,7 +989,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) for (i = 0; i < 2; i++) { cur = buf; num = 0; - list_for_each_entry_safe(li, tmp, &cfile->llist, llist) { + list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (li->type != types[i]) continue; cur->Pid = cpu_to_le16(li->pid); @@ -932,7 +998,8 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) cur->OffsetLow = cpu_to_le32((u32)li->offset); cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); if (++num == max_num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + stored_rc = cifs_lockv(xid, tcon, + cfile->fid.netfid, (__u8)li->type, 0, num, buf); if (stored_rc) @@ -944,7 +1011,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) } if (num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, (__u8)types[i], 0, num, buf); if (stored_rc) rc = stored_rc; @@ -952,7 +1019,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) } cinode->can_cache_brlcks = false; - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); kfree(buf); free_xid(xid); @@ -987,9 +1054,10 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) xid = get_xid(); - mutex_lock(&cinode->lock_mutex); + /* we are going to update can_cache_brlcks here - need a write access */ + down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return rc; } @@ -1005,7 +1073,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) /* * Allocating count locks is enough because no FL_POSIX locks can be - * added to the list while we are holding cinode->lock_mutex that + * added to the list while we are holding cinode->lock_sem that * protects locking operations of this inode. */ for (; i < count; i++) { @@ -1038,7 +1106,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) type = CIFS_WRLCK; lck = list_entry(el, struct lock_to_push, llist); lck->pid = flock->fl_pid; - lck->netfid = cfile->netfid; + lck->netfid = cfile->fid.netfid; lck->length = length; lck->type = type; lck->offset = flock->fl_start; @@ -1060,7 +1128,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) out: cinode->can_cache_brlcks = false; - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); free_xid(xid); return rc; @@ -1083,7 +1151,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) return cifs_push_posix_locks(cfile); - return cifs_push_mandatory_locks(cfile); + return tcon->ses->server->ops->push_mand_locks(cfile); } static void @@ -1104,7 +1172,8 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, if (flock->fl_flags & FL_LEASE) cFYI(1, "Lease on file - not implemented yet"); if (flock->fl_flags & - (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) + (~(FL_POSIX | FL_FLOCK | FL_SLEEP | + FL_ACCESS | FL_LEASE | FL_CLOSE))) cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags); *type = server->vals->large_lock_type; @@ -1134,15 +1203,6 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, } static int -cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, - __u64 length, __u32 type, int lock, int unlock, bool wait) -{ - return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid, - current->tgid, length, offset, unlock, lock, - (__u8)type, wait, 0); -} - -static int cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, bool wait_flag, bool posix_lck, unsigned int xid) { @@ -1151,7 +1211,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - __u16 netfid = cfile->netfid; + __u16 netfid = cfile->fid.netfid; if (posix_lck) { int posix_lock_type; @@ -1175,11 +1235,11 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, return rc; /* BB we could chain these into one lock request BB */ - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type, - 1, 0, false); + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, + 1, 0, false); if (rc == 0) { - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, - type, 0, 1, false); + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type, 0, 1, false); flock->fl_type = F_UNLCK; if (rc != 0) cERROR(1, "Error unlocking previously locked " @@ -1192,13 +1252,14 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, return 0; } - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, - type | server->vals->shared_lock_type, 1, 0, - false); + type &= ~server->vals->exclusive_lock_type; + + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type | server->vals->shared_lock_type, + 1, 0, false); if (rc == 0) { - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, - type | server->vals->shared_lock_type, - 0, 1, false); + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type | server->vals->shared_lock_type, 0, 1, false); flock->fl_type = F_RDLCK; if (rc != 0) cERROR(1, "Error unlocking previously locked " @@ -1209,7 +1270,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, return 0; } -static void +void cifs_move_llist(struct list_head *source, struct list_head *dest) { struct list_head *li, *tmp; @@ -1217,7 +1278,7 @@ cifs_move_llist(struct list_head *source, struct list_head *dest) list_move(li, dest); } -static void +void cifs_free_llist(struct list_head *llist) { struct cifsLockInfo *li, *tmp; @@ -1228,7 +1289,7 @@ cifs_free_llist(struct list_head *llist) } } -static int +int cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, unsigned int xid) { @@ -1260,11 +1321,11 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, if (!buf) return -ENOMEM; - mutex_lock(&cinode->lock_mutex); + down_write(&cinode->lock_sem); for (i = 0; i < 2; i++) { cur = buf; num = 0; - list_for_each_entry_safe(li, tmp, &cfile->llist, llist) { + list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (flock->fl_start > li->offset || (flock->fl_start + length) < (li->offset + li->length)) @@ -1295,7 +1356,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, */ list_move(&li->llist, &tmp_llist); if (++num == max_num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + stored_rc = cifs_lockv(xid, tcon, + cfile->fid.netfid, li->type, num, 0, buf); if (stored_rc) { /* @@ -1304,7 +1366,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, * list to the head of the file's list. */ cifs_move_llist(&tmp_llist, - &cfile->llist); + &cfile->llist->locks); rc = stored_rc; } else /* @@ -1318,23 +1380,24 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, cur++; } if (num) { - stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, types[i], num, 0, buf); if (stored_rc) { - cifs_move_llist(&tmp_llist, &cfile->llist); + cifs_move_llist(&tmp_llist, + &cfile->llist->locks); rc = stored_rc; } else cifs_free_llist(&tmp_llist); } } - mutex_unlock(&cinode->lock_mutex); + up_write(&cinode->lock_sem); kfree(buf); return rc; } static int -cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, +cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, bool wait_flag, bool posix_lck, int lock, int unlock, unsigned int xid) { @@ -1343,7 +1406,6 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - __u16 netfid = cfile->netfid; if (posix_lck) { int posix_lock_type; @@ -1360,9 +1422,9 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, if (unlock == 1) posix_lock_type = CIFS_UNLCK; - rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, - flock->fl_start, length, NULL, - posix_lock_type, wait_flag); + rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, + current->tgid, flock->fl_start, length, + NULL, posix_lock_type, wait_flag); goto out; } @@ -1379,8 +1441,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, if (rc <= 0) goto out; - rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, - type, 1, 0, wait_flag); + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type, 1, 0, wait_flag); if (rc) { kfree(lock); goto out; @@ -1388,7 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, cifs_lock_add(cfile, lock); } else if (unlock) - rc = cifs_unlock_range(cfile, flock, xid); + rc = server->ops->mand_unlock_range(cfile, flock, xid); out: if (flock->fl_flags & FL_POSIX) @@ -1423,7 +1485,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) tcon->ses->server); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - netfid = cfile->netfid; + netfid = cfile->fid.netfid; cinode = CIFS_I(file->f_path.dentry->d_inode); if (cap_unix(tcon->ses) && @@ -1469,15 +1531,16 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, cifsi->server_eof = end_of_write; } -static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, - const char *write_data, size_t write_size, - loff_t *poffset) +static ssize_t +cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, + size_t write_size, loff_t *offset) { int rc = 0; unsigned int bytes_written = 0; unsigned int total_written; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; unsigned int xid; struct dentry *dentry = open_file->dentry; struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); @@ -1486,9 +1549,13 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, cifs_sb = CIFS_SB(dentry->d_sb); cFYI(1, "write %zd bytes to offset %lld of %s", write_size, - *poffset, dentry->d_name.name); + *offset, dentry->d_name.name); - pTcon = tlink_tcon(open_file->tlink); + tcon = tlink_tcon(open_file->tlink); + server = tcon->ses->server; + + if (!server->ops->sync_write) + return -ENOSYS; xid = get_xid(); @@ -1514,13 +1581,12 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, /* iov[0] is reserved for smb header */ iov[1].iov_base = (char *)write_data + total_written; iov[1].iov_len = len; - io_parms.netfid = open_file->netfid; io_parms.pid = pid; - io_parms.tcon = pTcon; - io_parms.offset = *poffset; + io_parms.tcon = tcon; + io_parms.offset = *offset; io_parms.length = len; - rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov, - 1, 0); + rc = server->ops->sync_write(xid, open_file, &io_parms, + &bytes_written, iov, 1); } if (rc || (bytes_written == 0)) { if (total_written) @@ -1531,18 +1597,18 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, } } else { spin_lock(&dentry->d_inode->i_lock); - cifs_update_eof(cifsi, *poffset, bytes_written); + cifs_update_eof(cifsi, *offset, bytes_written); spin_unlock(&dentry->d_inode->i_lock); - *poffset += bytes_written; + *offset += bytes_written; } } - cifs_stats_bytes_written(pTcon, total_written); + cifs_stats_bytes_written(tcon, total_written); if (total_written > 0) { spin_lock(&dentry->d_inode->i_lock); - if (*poffset > dentry->d_inode->i_size) - i_size_write(dentry->d_inode, *poffset); + if (*offset > dentry->d_inode->i_size) + i_size_write(dentry->d_inode, *offset); spin_unlock(&dentry->d_inode->i_lock); } mark_inode_dirty_sync(dentry->d_inode); @@ -1718,27 +1784,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } -/* - * Marshal up the iov array, reserving the first one for the header. Also, - * set wdata->bytes. - */ -static void -cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata) -{ - int i; - struct inode *inode = wdata->cfile->dentry->d_inode; - loff_t size = i_size_read(inode); - - /* marshal up the pages into iov array */ - wdata->bytes = 0; - for (i = 0; i < wdata->nr_pages; i++) { - iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]), - (loff_t)PAGE_CACHE_SIZE); - iov[i + 1].iov_base = kmap(wdata->pages[i]); - wdata->bytes += iov[i + 1].iov_len; - } -} - static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1746,8 +1791,10 @@ static int cifs_writepages(struct address_space *mapping, bool done = false, scanned = false, range_whole = false; pgoff_t end, index; struct cifs_writedata *wdata; + struct TCP_Server_Info *server; struct page *page; int rc = 0; + loff_t isize = i_size_read(mapping->host); /* * If wsize is smaller than the page cache size, default to writing @@ -1852,7 +1899,7 @@ retry: */ set_page_writeback(page); - if (page_offset(page) >= mapping->host->i_size) { + if (page_offset(page) >= isize) { done = true; unlock_page(page); end_page_writeback(page); @@ -1883,7 +1930,12 @@ retry: wdata->sync_mode = wbc->sync_mode; wdata->nr_pages = nr_pages; wdata->offset = page_offset(wdata->pages[0]); - wdata->marshal_iov = cifs_writepages_marshal_iov; + wdata->pagesz = PAGE_CACHE_SIZE; + wdata->tailsz = + min(isize - page_offset(wdata->pages[nr_pages - 1]), + (loff_t)PAGE_CACHE_SIZE); + wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + + wdata->tailsz; do { if (wdata->cfile != NULL) @@ -1896,7 +1948,8 @@ retry: break; } wdata->pid = wdata->cfile->pid; - rc = cifs_async_writev(wdata); + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + rc = server->ops->async_writev(wdata); } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); for (i = 0; i < nr_pages; ++i) @@ -2054,6 +2107,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, unsigned int xid; int rc = 0; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; struct inode *inode = file->f_path.dentry->d_inode; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); @@ -2077,8 +2131,13 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, } tcon = tlink_tcon(smbfile->tlink); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) - rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + server = tcon->ses->server; + if (server->ops->flush) + rc = server->ops->flush(xid, tcon, &smbfile->fid); + else + rc = -ENOSYS; + } free_xid(xid); mutex_unlock(&inode->i_mutex); @@ -2090,6 +2149,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) unsigned int xid; int rc = 0; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); struct inode *inode = file->f_mapping->host; @@ -2105,8 +2165,13 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) file->f_path.dentry->d_name.name, datasync); tcon = tlink_tcon(smbfile->tlink); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) - rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + server = tcon->ses->server; + if (server->ops->flush) + rc = server->ops->flush(xid, tcon, &smbfile->fid); + else + rc = -ENOSYS; + } free_xid(xid); mutex_unlock(&inode->i_mutex); @@ -2172,20 +2237,6 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) } static void -cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata) -{ - int i; - size_t bytes = wdata->bytes; - - /* marshal up the pages into iov array */ - for (i = 0; i < wdata->nr_pages; i++) { - iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE); - iov[i + 1].iov_base = kmap(wdata->pages[i]); - bytes -= iov[i + 1].iov_len; - } -} - -static void cifs_uncached_writev_complete(struct work_struct *work) { int i; @@ -2215,6 +2266,9 @@ static int cifs_uncached_retry_writev(struct cifs_writedata *wdata) { int rc; + struct TCP_Server_Info *server; + + server = tlink_tcon(wdata->cfile->tlink)->ses->server; do { if (wdata->cfile->invalidHandle) { @@ -2222,7 +2276,7 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata) if (rc != 0) continue; } - rc = cifs_async_writev(wdata); + rc = server->ops->async_writev(wdata); } while (rc == -EAGAIN); return rc; @@ -2257,6 +2311,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); + + if (!tcon->ses->server->ops->async_writev) + return -ENOSYS; + offset = *poffset; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) @@ -2298,7 +2356,8 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, wdata->cfile = cifsFileInfo_get(open_file); wdata->pid = pid; wdata->bytes = cur_len; - wdata->marshal_iov = cifs_uncached_marshal_iov; + wdata->pagesz = PAGE_SIZE; + wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); rc = cifs_uncached_retry_writev(wdata); if (rc) { kref_put(&wdata->refcount, cifs_writedata_release); @@ -2376,40 +2435,110 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, return written; } -ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t +cifs_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct inode *inode; + struct file *file = iocb->ki_filp; + struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; + struct inode *inode = file->f_mapping->host; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + ssize_t rc = -EACCES; - inode = iocb->ki_filp->f_path.dentry->d_inode; + BUG_ON(iocb->ki_pos != pos); - if (CIFS_I(inode)->clientCanCacheAll) - return generic_file_aio_write(iocb, iov, nr_segs, pos); + sb_start_write(inode->i_sb); + + /* + * We need to hold the sem to be sure nobody modifies lock list + * with a brlock that prevents writing. + */ + down_read(&cinode->lock_sem); + if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + server->vals->exclusive_lock_type, NULL, + true)) { + mutex_lock(&inode->i_mutex); + rc = __generic_file_aio_write(iocb, iov, nr_segs, + &iocb->ki_pos); + mutex_unlock(&inode->i_mutex); + } + + if (rc > 0 || rc == -EIOCBQUEUED) { + ssize_t err; + + err = generic_write_sync(file, pos, rc); + if (err < 0 && rc > 0) + rc = err; + } + + up_read(&cinode->lock_sem); + sb_end_write(inode->i_sb); + return rc; +} + +ssize_t +cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsFileInfo *cfile = (struct cifsFileInfo *) + iocb->ki_filp->private_data; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); +#ifdef CONFIG_CIFS_SMB2 /* - * In strict cache mode we need to write the data to the server exactly - * from the pos to pos+len-1 rather than flush all affected pages - * because it may cause a error with mandatory locks on these pages but - * not on the region from pos to ppos+len-1. + * If we have an oplock for read and want to write a data to the file + * we need to store it in the page cache and then push it to the server + * to be sure the next read will get a valid data. */ + if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) { + ssize_t written; + int rc; - return cifs_user_writev(iocb, iov, nr_segs, pos); + written = generic_file_aio_write(iocb, iov, nr_segs, pos); + rc = filemap_fdatawrite(inode->i_mapping); + if (rc) + return (ssize_t)rc; + + return written; + } +#endif + + /* + * For non-oplocked files in strict cache mode we need to write the data + * to the server exactly from the pos to pos+len-1 rather than flush all + * affected pages because it may cause a error with mandatory locks on + * these pages but not on the region from pos to ppos+len-1. + */ + + if (!cinode->clientCanCacheAll) + return cifs_user_writev(iocb, iov, nr_segs, pos); + + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + return generic_file_aio_write(iocb, iov, nr_segs, pos); + + return cifs_writev(iocb, iov, nr_segs, pos); } static struct cifs_readdata * -cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete) +cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete) { struct cifs_readdata *rdata; - rdata = kzalloc(sizeof(*rdata) + - sizeof(struct kvec) * nr_vecs, GFP_KERNEL); + rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages), + GFP_KERNEL); if (rdata != NULL) { kref_init(&rdata->refcount); INIT_LIST_HEAD(&rdata->list); init_completion(&rdata->done); INIT_WORK(&rdata->work, complete); - INIT_LIST_HEAD(&rdata->pages); } + return rdata; } @@ -2426,25 +2555,25 @@ cifs_readdata_release(struct kref *refcount) } static int -cifs_read_allocate_pages(struct list_head *list, unsigned int npages) +cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages) { int rc = 0; - struct page *page, *tpage; + struct page *page; unsigned int i; - for (i = 0; i < npages; i++) { + for (i = 0; i < nr_pages; i++) { page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); if (!page) { rc = -ENOMEM; break; } - list_add(&page->lru, list); + rdata->pages[i] = page; } if (rc) { - list_for_each_entry_safe(page, tpage, list, lru) { - list_del(&page->lru); - put_page(page); + for (i = 0; i < nr_pages; i++) { + put_page(rdata->pages[i]); + rdata->pages[i] = NULL; } } return rc; @@ -2453,13 +2582,13 @@ cifs_read_allocate_pages(struct list_head *list, unsigned int npages) static void cifs_uncached_readdata_release(struct kref *refcount) { - struct page *page, *tpage; struct cifs_readdata *rdata = container_of(refcount, struct cifs_readdata, refcount); + unsigned int i; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { - list_del(&page->lru); - put_page(page); + for (i = 0; i < rdata->nr_pages; i++) { + put_page(rdata->pages[i]); + rdata->pages[i] = NULL; } cifs_readdata_release(refcount); } @@ -2468,6 +2597,9 @@ static int cifs_retry_async_readv(struct cifs_readdata *rdata) { int rc; + struct TCP_Server_Info *server; + + server = tlink_tcon(rdata->cfile->tlink)->ses->server; do { if (rdata->cfile->invalidHandle) { @@ -2475,7 +2607,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata) if (rc != 0) continue; } - rc = cifs_async_readv(rdata); + rc = server->ops->async_readv(rdata); } while (rc == -EAGAIN); return rc; @@ -2500,17 +2632,18 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, int rc = 0; struct iov_iter ii; size_t pos = rdata->offset - offset; - struct page *page, *tpage; ssize_t remaining = rdata->bytes; unsigned char *pdata; + unsigned int i; /* set up iov_iter and advance to the correct offset */ iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0); iov_iter_advance(&ii, pos); *copied = 0; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { + for (i = 0; i < rdata->nr_pages; i++) { ssize_t copy; + struct page *page = rdata->pages[i]; /* copy a whole page or whatever's left */ copy = min_t(ssize_t, remaining, PAGE_SIZE); @@ -2530,9 +2663,6 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, iov_iter_advance(&ii, copy); } } - - list_del(&page->lru); - put_page(page); } return rc; @@ -2544,59 +2674,56 @@ cifs_uncached_readv_complete(struct work_struct *work) struct cifs_readdata *rdata = container_of(work, struct cifs_readdata, work); - /* if the result is non-zero then the pages weren't kmapped */ - if (rdata->result == 0) { - struct page *page; - - list_for_each_entry(page, &rdata->pages, lru) - kunmap(page); - } - complete(&rdata->done); kref_put(&rdata->refcount, cifs_uncached_readdata_release); } static int -cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata, - unsigned int remaining) +cifs_uncached_read_into_pages(struct TCP_Server_Info *server, + struct cifs_readdata *rdata, unsigned int len) { - int len = 0; - struct page *page, *tpage; + int total_read = 0, result = 0; + unsigned int i; + unsigned int nr_pages = rdata->nr_pages; + struct kvec iov; + + rdata->tailsz = PAGE_SIZE; + for (i = 0; i < nr_pages; i++) { + struct page *page = rdata->pages[i]; - rdata->nr_iov = 1; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { - if (remaining >= PAGE_SIZE) { + if (len >= PAGE_SIZE) { /* enough data to fill the page */ - rdata->iov[rdata->nr_iov].iov_base = kmap(page); - rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE; - cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - rdata->nr_iov, page->index, - rdata->iov[rdata->nr_iov].iov_base, - rdata->iov[rdata->nr_iov].iov_len); - ++rdata->nr_iov; - len += PAGE_SIZE; - remaining -= PAGE_SIZE; - } else if (remaining > 0) { + iov.iov_base = kmap(page); + iov.iov_len = PAGE_SIZE; + cFYI(1, "%u: iov_base=%p iov_len=%zu", + i, iov.iov_base, iov.iov_len); + len -= PAGE_SIZE; + } else if (len > 0) { /* enough for partial page, fill and zero the rest */ - rdata->iov[rdata->nr_iov].iov_base = kmap(page); - rdata->iov[rdata->nr_iov].iov_len = remaining; - cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - rdata->nr_iov, page->index, - rdata->iov[rdata->nr_iov].iov_base, - rdata->iov[rdata->nr_iov].iov_len); - memset(rdata->iov[rdata->nr_iov].iov_base + remaining, - '\0', PAGE_SIZE - remaining); - ++rdata->nr_iov; - len += remaining; - remaining = 0; + iov.iov_base = kmap(page); + iov.iov_len = len; + cFYI(1, "%u: iov_base=%p iov_len=%zu", + i, iov.iov_base, iov.iov_len); + memset(iov.iov_base + len, '\0', PAGE_SIZE - len); + rdata->tailsz = len; + len = 0; } else { /* no need to hold page hostage */ - list_del(&page->lru); + rdata->pages[i] = NULL; + rdata->nr_pages--; put_page(page); + continue; } + + result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len); + kunmap(page); + if (result < 0) + break; + + total_read += result; } - return len; + return total_read > 0 ? total_read : result; } static ssize_t @@ -2627,6 +2754,9 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); + if (!tcon->ses->server->ops->async_readv) + return -ENOSYS; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else @@ -2647,15 +2777,17 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, goto error; } - rc = cifs_read_allocate_pages(&rdata->pages, npages); + rc = cifs_read_allocate_pages(rdata, npages); if (rc) goto error; rdata->cfile = cifsFileInfo_get(open_file); + rdata->nr_pages = npages; rdata->offset = offset; rdata->bytes = cur_len; rdata->pid = pid; - rdata->marshal_iov = cifs_uncached_read_marshal_iov; + rdata->pagesz = PAGE_SIZE; + rdata->read_into_pages = cifs_uncached_read_into_pages; rc = cifs_retry_async_readv(rdata); error: @@ -2706,6 +2838,10 @@ restart_loop: cifs_stats_bytes_read(tcon, total_read); *poffset += total_read; + /* mask nodata case */ + if (rc == -ENODATA) + rc = 0; + return total_read ? total_read : rc; } @@ -2721,15 +2857,17 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, return read; } -ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t +cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct inode *inode; - - inode = iocb->ki_filp->f_path.dentry->d_inode; - - if (CIFS_I(inode)->clientCanCacheRead) - return generic_file_aio_read(iocb, iov, nr_segs, pos); + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsFileInfo *cfile = (struct cifsFileInfo *) + iocb->ki_filp->private_data; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + int rc = -EACCES; /* * In strict cache mode we need to read from the server all the time @@ -2739,12 +2877,29 @@ ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, * on pages affected by this read but not on the region from pos to * pos+len-1. */ + if (!cinode->clientCanCacheRead) + return cifs_user_readv(iocb, iov, nr_segs, pos); + + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + return generic_file_aio_read(iocb, iov, nr_segs, pos); - return cifs_user_readv(iocb, iov, nr_segs, pos); + /* + * We need to hold the sem to be sure nobody modifies lock list + * with a brlock that prevents reading. + */ + down_read(&cinode->lock_sem); + if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + tcon->ses->server->vals->shared_lock_type, + NULL, true)) + rc = generic_file_aio_read(iocb, iov, nr_segs, pos); + up_read(&cinode->lock_sem); + return rc; } -static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, - loff_t *poffset) +static ssize_t +cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) { int rc = -EACCES; unsigned int bytes_read = 0; @@ -2753,8 +2908,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, unsigned int rsize; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; unsigned int xid; - char *current_offset; + char *cur_offset; struct cifsFileInfo *open_file; struct cifs_io_parms io_parms; int buf_type = CIFS_NO_BUFFER; @@ -2773,6 +2929,12 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, } open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); + server = tcon->ses->server; + + if (!server->ops->sync_read) { + free_xid(xid); + return -ENOSYS; + } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -2782,9 +2944,8 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, if ((file->f_flags & O_ACCMODE) == O_WRONLY) cFYI(1, "attempting read on write only file instance"); - for (total_read = 0, current_offset = read_data; - read_size > total_read; - total_read += bytes_read, current_offset += bytes_read) { + for (total_read = 0, cur_offset = read_data; read_size > total_read; + total_read += bytes_read, cur_offset += bytes_read) { current_read_size = min_t(uint, read_size - total_read, rsize); /* * For windows me and 9x we do not want to request more than it @@ -2802,13 +2963,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, if (rc != 0) break; } - io_parms.netfid = open_file->netfid; io_parms.pid = pid; io_parms.tcon = tcon; - io_parms.offset = *poffset; + io_parms.offset = *offset; io_parms.length = current_read_size; - rc = CIFSSMBRead(xid, &io_parms, &bytes_read, - ¤t_offset, &buf_type); + rc = server->ops->sync_read(xid, open_file, &io_parms, + &bytes_read, &cur_offset, + &buf_type); } if (rc || (bytes_read == 0)) { if (total_read) { @@ -2819,7 +2980,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, } } else { cifs_stats_bytes_read(tcon, total_read); - *poffset += bytes_read; + *offset += bytes_read; } } free_xid(xid); @@ -2885,16 +3046,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) static void cifs_readv_complete(struct work_struct *work) { + unsigned int i; struct cifs_readdata *rdata = container_of(work, struct cifs_readdata, work); - struct page *page, *tpage; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { - list_del(&page->lru); + for (i = 0; i < rdata->nr_pages; i++) { + struct page *page = rdata->pages[i]; + lru_cache_add_file(page); if (rdata->result == 0) { - kunmap(page); flush_dcache_page(page); SetPageUptodate(page); } @@ -2905,49 +3066,48 @@ cifs_readv_complete(struct work_struct *work) cifs_readpage_to_fscache(rdata->mapping->host, page); page_cache_release(page); + rdata->pages[i] = NULL; } kref_put(&rdata->refcount, cifs_readdata_release); } static int -cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) +cifs_readpages_read_into_pages(struct TCP_Server_Info *server, + struct cifs_readdata *rdata, unsigned int len) { - int len = 0; - struct page *page, *tpage; + int total_read = 0, result = 0; + unsigned int i; u64 eof; pgoff_t eof_index; + unsigned int nr_pages = rdata->nr_pages; + struct kvec iov; /* determine the eof that the server (probably) has */ eof = CIFS_I(rdata->mapping->host)->server_eof; eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); - rdata->nr_iov = 1; - list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { - if (remaining >= PAGE_CACHE_SIZE) { + rdata->tailsz = PAGE_CACHE_SIZE; + for (i = 0; i < nr_pages; i++) { + struct page *page = rdata->pages[i]; + + if (len >= PAGE_CACHE_SIZE) { /* enough data to fill the page */ - rdata->iov[rdata->nr_iov].iov_base = kmap(page); - rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE; + iov.iov_base = kmap(page); + iov.iov_len = PAGE_CACHE_SIZE; cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - rdata->nr_iov, page->index, - rdata->iov[rdata->nr_iov].iov_base, - rdata->iov[rdata->nr_iov].iov_len); - ++rdata->nr_iov; - len += PAGE_CACHE_SIZE; - remaining -= PAGE_CACHE_SIZE; - } else if (remaining > 0) { + i, page->index, iov.iov_base, iov.iov_len); + len -= PAGE_CACHE_SIZE; + } else if (len > 0) { /* enough for partial page, fill and zero the rest */ - rdata->iov[rdata->nr_iov].iov_base = kmap(page); - rdata->iov[rdata->nr_iov].iov_len = remaining; + iov.iov_base = kmap(page); + iov.iov_len = len; cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - rdata->nr_iov, page->index, - rdata->iov[rdata->nr_iov].iov_base, - rdata->iov[rdata->nr_iov].iov_len); - memset(rdata->iov[rdata->nr_iov].iov_base + remaining, - '\0', PAGE_CACHE_SIZE - remaining); - ++rdata->nr_iov; - len += remaining; - remaining = 0; + i, page->index, iov.iov_base, iov.iov_len); + memset(iov.iov_base + len, + '\0', PAGE_CACHE_SIZE - len); + rdata->tailsz = len; + len = 0; } else if (page->index > eof_index) { /* * The VFS will not try to do readahead past the @@ -2958,22 +3118,33 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) * fill them until the writes are flushed. */ zero_user(page, 0, PAGE_CACHE_SIZE); - list_del(&page->lru); lru_cache_add_file(page); flush_dcache_page(page); SetPageUptodate(page); unlock_page(page); page_cache_release(page); + rdata->pages[i] = NULL; + rdata->nr_pages--; + continue; } else { /* no need to hold page hostage */ - list_del(&page->lru); lru_cache_add_file(page); unlock_page(page); page_cache_release(page); + rdata->pages[i] = NULL; + rdata->nr_pages--; + continue; } + + result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len); + kunmap(page); + if (result < 0) + break; + + total_read += result; } - return len; + return total_read > 0 ? total_read : result; } static int cifs_readpages(struct file *file, struct address_space *mapping, @@ -3027,6 +3198,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * the rdata->pages, then we want them in increasing order. */ while (!list_empty(page_list)) { + unsigned int i; unsigned int bytes = PAGE_CACHE_SIZE; unsigned int expected_index; unsigned int nr_pages = 1; @@ -3096,14 +3268,18 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rdata->offset = offset; rdata->bytes = bytes; rdata->pid = pid; - rdata->marshal_iov = cifs_readpages_marshal_iov; - list_splice_init(&tmplist, &rdata->pages); + rdata->pagesz = PAGE_CACHE_SIZE; + rdata->read_into_pages = cifs_readpages_read_into_pages; + + list_for_each_entry_safe(page, tpage, &tmplist, lru) { + list_del(&page->lru); + rdata->pages[rdata->nr_pages++] = page; + } rc = cifs_retry_async_readv(rdata); if (rc != 0) { - list_for_each_entry_safe(page, tpage, &rdata->pages, - lru) { - list_del(&page->lru); + for (i = 0; i < rdata->nr_pages; i++) { + page = rdata->pages[i]; lru_cache_add_file(page); unlock_page(page); page_cache_release(page); @@ -3347,6 +3523,7 @@ void cifs_oplock_break(struct work_struct *work) oplock_break); struct inode *inode = cfile->dentry->d_inode; struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; if (inode && S_ISREG(inode->i_mode)) { @@ -3374,10 +3551,8 @@ void cifs_oplock_break(struct work_struct *work) * disconnected since oplock already released by the server */ if (!cfile->oplock_break_cancelled) { - rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, - current->tgid, 0, 0, 0, 0, - LOCKING_ANDX_OPLOCK_RELEASE, false, - cinode->clientCanCacheRead ? 1 : 0); + rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, + cinode); cFYI(1, "Oplock release rc = %d", rc); } } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index cb79c7edecb0..afdff79651f1 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -282,7 +282,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; } -int cifs_get_file_info_unix(struct file *filp) +static int +cifs_get_file_info_unix(struct file *filp) { int rc; unsigned int xid; @@ -294,7 +295,7 @@ int cifs_get_file_info_unix(struct file *filp) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); xid = get_xid(); - rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); + rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->fid.netfid, &find_data); if (!rc) { cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); } else if (rc == -EREMOTE) { @@ -550,7 +551,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_gid = cifs_sb->mnt_gid; } -int cifs_get_file_info(struct file *filp) +static int +cifs_get_file_info(struct file *filp) { int rc; unsigned int xid; @@ -560,9 +562,13 @@ int cifs_get_file_info(struct file *filp) struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; + + if (!server->ops->query_file_info) + return -ENOSYS; xid = get_xid(); - rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); + rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); @@ -601,7 +607,9 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, FILE_ALL_INFO *data, struct super_block *sb, int xid, const __u16 *fid) { - int rc = 0, tmprc; + bool validinum = false; + __u16 srchflgs; + int rc = 0, tmprc = ENOSYS; struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct tcon_link *tlink; @@ -609,6 +617,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, char *buf = NULL; bool adjust_tz = false; struct cifs_fattr fattr; + struct cifs_search_info *srchinf = NULL; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -647,9 +656,38 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; - } else { + } else if (rc == -EACCES && backup_cred(cifs_sb)) { + srchinf = kzalloc(sizeof(struct cifs_search_info), + GFP_KERNEL); + if (srchinf == NULL) { + rc = -ENOMEM; + goto cgii_exit; + } + + srchinf->endOfSearch = false; + srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; + + srchflgs = CIFS_SEARCH_CLOSE_ALWAYS | + CIFS_SEARCH_CLOSE_AT_END | + CIFS_SEARCH_BACKUP_SEARCH; + + rc = CIFSFindFirst(xid, tcon, full_path, + cifs_sb, NULL, srchflgs, srchinf, false); + if (!rc) { + data = + (FILE_ALL_INFO *)srchinf->srch_entries_start; + + cifs_dir_info_to_fattr(&fattr, + (FILE_DIRECTORY_INFO *)data, cifs_sb); + fattr.cf_uniqueid = le64_to_cpu( + ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId); + validinum = true; + + cifs_buf_release(srchinf->ntwrk_buf_start); + } + kfree(srchinf); + } else goto cgii_exit; - } /* * If an inode wasn't passed in, then get the inode number @@ -660,23 +698,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, */ if (*inode == NULL) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { - if (server->ops->get_srv_inum) - tmprc = server->ops->get_srv_inum(xid, tcon, - cifs_sb, full_path, &fattr.cf_uniqueid, - data); - else - tmprc = -ENOSYS; - if (tmprc || !fattr.cf_uniqueid) { - cFYI(1, "GetSrvInodeNum rc %d", tmprc); - fattr.cf_uniqueid = iunique(sb, ROOT_I); - cifs_autodisable_serverino(cifs_sb); + if (validinum == false) { + if (server->ops->get_srv_inum) + tmprc = server->ops->get_srv_inum(xid, + tcon, cifs_sb, full_path, + &fattr.cf_uniqueid, data); + if (tmprc) { + cFYI(1, "GetSrvInodeNum rc %d", tmprc); + fattr.cf_uniqueid = iunique(sb, ROOT_I); + cifs_autodisable_serverino(cifs_sb); + } } - } else { + } else fattr.cf_uniqueid = iunique(sb, ROOT_I); - } - } else { + } else fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; - } /* query for SFU type info if supported and needed */ if (fattr.cf_cifsattrs & ATTR_SYSTEM && @@ -876,25 +912,22 @@ out: return inode; } -static int +int cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, - char *full_path, __u32 dosattr) + char *full_path, __u32 dosattr) { - int rc; - int oplock = 0; - __u16 netfid; - __u32 netpid; bool set_time = false; - struct cifsFileInfo *open_file; - struct cifsInodeInfo *cifsInode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct tcon_link *tlink = NULL; - struct cifs_tcon *pTcon; + struct TCP_Server_Info *server; FILE_BASIC_INFO info_buf; if (attrs == NULL) return -EINVAL; + server = cifs_sb_master_tcon(cifs_sb)->ses->server; + if (!server->ops->set_file_info) + return -ENOSYS; + if (attrs->ia_valid & ATTR_ATIME) { set_time = true; info_buf.LastAccessTime = @@ -925,81 +958,17 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, info_buf.CreationTime = 0; /* don't change */ info_buf.Attributes = cpu_to_le32(dosattr); - /* - * If the file is already open for write, just use that fileid - */ - open_file = find_writable_file(cifsInode, true); - if (open_file) { - netfid = open_file->netfid; - netpid = open_file->pid; - pTcon = tlink_tcon(open_file->tlink); - goto set_via_filehandle; - } - - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - tlink = NULL; - goto out; - } - pTcon = tlink_tcon(tlink); - - /* - * NT4 apparently returns success on this call, but it doesn't - * really work. - */ - if (!(pTcon->ses->flags & CIFS_SES_NT4)) { - rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, - &info_buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - cifsInode->cifsAttrs = dosattr; - goto out; - } else if (rc != -EOPNOTSUPP && rc != -EINVAL) - goto out; - } - - cFYI(1, "calling SetFileInfo since SetPathInfo for " - "times not supported by this server"); - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, - SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, - CREATE_NOT_DIR, &netfid, &oplock, - NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - - if (rc != 0) { - if (rc == -EIO) - rc = -EINVAL; - goto out; - } - - netpid = current->tgid; - -set_via_filehandle: - rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); - if (!rc) - cifsInode->cifsAttrs = dosattr; - - if (open_file == NULL) - CIFSSMBClose(xid, pTcon, netfid); - else - cifsFileInfo_put(open_file); -out: - if (tlink != NULL) - cifs_put_tlink(tlink); - return rc; + return server->ops->set_file_info(inode, full_path, &info_buf, xid); } /* - * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit + * Open the given file (if it isn't already), set the DELETE_ON_CLOSE bit * and rename it to a random name that hopefully won't conflict with * anything else. */ -static int -cifs_rename_pending_delete(char *full_path, struct dentry *dentry, - unsigned int xid) +int +cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, + const unsigned int xid) { int oplock = 0; int rc; @@ -1136,6 +1105,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct tcon_link *tlink; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct iattr *attrs = NULL; __u32 dosattr = 0, origattr = 0; @@ -1145,6 +1115,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) if (IS_ERR(tlink)) return PTR_ERR(tlink); tcon = tlink_tcon(tlink); + server = tcon->ses->server; xid = get_xid(); @@ -1167,8 +1138,12 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) } retry_std_delete: - rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (!server->ops->unlink) { + rc = -ENOSYS; + goto psx_del_no_retry; + } + + rc = server->ops->unlink(xid, tcon, full_path, cifs_sb); psx_del_no_retry: if (!rc) { @@ -1177,9 +1152,14 @@ psx_del_no_retry: } else if (rc == -ENOENT) { d_drop(dentry); } else if (rc == -ETXTBSY) { - rc = cifs_rename_pending_delete(full_path, dentry, xid); - if (rc == 0) - cifs_drop_nlink(inode); + if (server->ops->rename_pending_delete) { + rc = server->ops->rename_pending_delete(full_path, + dentry, xid); + if (rc == 0) + cifs_drop_nlink(inode); + } + if (rc == -ETXTBSY) + rc = -EBUSY; } else if ((rc == -EACCES) && (dosattr == 0) && inode) { attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (attrs == NULL) { @@ -1227,34 +1207,33 @@ unlink_out: } static int -cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, +cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, const char *full_path, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon, const unsigned int xid) { int rc = 0; - struct inode *newinode = NULL; + struct inode *inode = NULL; if (tcon->unix_ext) - rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, + rc = cifs_get_inode_info_unix(&inode, full_path, parent->i_sb, xid); else - rc = cifs_get_inode_info(&newinode, full_path, NULL, - inode->i_sb, xid, NULL); + rc = cifs_get_inode_info(&inode, full_path, NULL, parent->i_sb, + xid, NULL); + if (rc) return rc; - d_instantiate(dentry, newinode); /* * setting nlink not necessary except in cases where we failed to get it - * from the server or was set bogus + * from the server or was set bogus. Also, since this is a brand new + * inode, no need to grab the i_lock before setting the i_nlink. */ - spin_lock(&dentry->d_inode->i_lock); - if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) - set_nlink(dentry->d_inode, 2); - spin_unlock(&dentry->d_inode->i_lock); + if (inode->i_nlink < 2) + set_nlink(inode, 2); mode &= ~current_umask(); /* must turn on setgid bit if parent dir has it */ - if (inode->i_mode & S_ISGID) + if (parent->i_mode & S_ISGID) mode |= S_ISGID; if (tcon->unix_ext) { @@ -1267,8 +1246,8 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, }; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { args.uid = (__u64)current_fsuid(); - if (inode->i_mode & S_ISGID) - args.gid = (__u64)inode->i_gid; + if (parent->i_mode & S_ISGID) + args.gid = (__u64)parent->i_gid; else args.gid = (__u64)current_fsgid(); } else { @@ -1283,22 +1262,20 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, struct TCP_Server_Info *server = tcon->ses->server; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) - server->ops->mkdir_setinfo(newinode, full_path, cifs_sb, + server->ops->mkdir_setinfo(inode, full_path, cifs_sb, tcon, xid); - if (dentry->d_inode) { - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) - dentry->d_inode->i_mode = (mode | S_IFDIR); - - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - dentry->d_inode->i_uid = current_fsuid(); - if (inode->i_mode & S_ISGID) - dentry->d_inode->i_gid = inode->i_gid; - else - dentry->d_inode->i_gid = - current_fsgid(); - } + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + inode->i_mode = (mode | S_IFDIR); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + inode->i_uid = current_fsuid(); + if (inode->i_mode & S_ISGID) + inode->i_gid = parent->i_gid; + else + inode->i_gid = current_fsgid(); } } + d_instantiate(dentry, inode); return rc; } @@ -1495,29 +1472,32 @@ rmdir_exit: } static int -cifs_do_rename(unsigned int xid, struct dentry *from_dentry, - const char *fromPath, struct dentry *to_dentry, - const char *toPath) +cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, + const char *from_path, struct dentry *to_dentry, + const char *to_path) { struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); struct tcon_link *tlink; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; __u16 srcfid; int oplock, rc; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + + if (!server->ops->rename) + return -ENOSYS; /* try path-based rename first */ - rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + rc = server->ops->rename(xid, tcon, from_path, to_path, cifs_sb); /* - * don't bother with rename by filehandle unless file is busy and - * source Note that cross directory moves do not work with + * Don't bother with rename by filehandle unless file is busy and + * source. Note that cross directory moves do not work with * rename by filehandle to various Windows servers. */ if (rc == 0 || rc != -ETXTBSY) @@ -1528,29 +1508,28 @@ cifs_do_rename(unsigned int xid, struct dentry *from_dentry, goto do_rename_exit; /* open the file to be renamed -- we need DELETE perms */ - rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, + rc = CIFSSMBOpen(xid, tcon, from_path, FILE_OPEN, DELETE, CREATE_NOT_DIR, &srcfid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid, + rc = CIFSSMBRenameOpenFile(xid, tcon, srcfid, (const char *) to_dentry->d_name.name, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - - CIFSSMBClose(xid, pTcon, srcfid); + CIFSSMBClose(xid, tcon, srcfid); } do_rename_exit: cifs_put_tlink(tlink); return rc; } -int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, - struct inode *target_dir, struct dentry *target_dentry) +int +cifs_rename(struct inode *source_dir, struct dentry *source_dentry, + struct inode *target_dir, struct dentry *target_dentry) { - char *fromName = NULL; - char *toName = NULL; + char *from_name = NULL; + char *to_name = NULL; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *tcon; @@ -1571,25 +1550,25 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, * we already have the rename sem so we do not need to * grab it again here to protect the path integrity */ - fromName = build_path_from_dentry(source_dentry); - if (fromName == NULL) { + from_name = build_path_from_dentry(source_dentry); + if (from_name == NULL) { rc = -ENOMEM; goto cifs_rename_exit; } - toName = build_path_from_dentry(target_dentry); - if (toName == NULL) { + to_name = build_path_from_dentry(target_dentry); + if (to_name == NULL) { rc = -ENOMEM; goto cifs_rename_exit; } - rc = cifs_do_rename(xid, source_dentry, fromName, - target_dentry, toName); + rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, + to_name); if (rc == -EEXIST && tcon->unix_ext) { /* - * Are src and dst hardlinks of same inode? We can - * only tell with unix extensions enabled + * Are src and dst hardlinks of same inode? We can only tell + * with unix extensions enabled. */ info_buf_source = kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), @@ -1600,19 +1579,19 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, } info_buf_target = info_buf_source + 1; - tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, - info_buf_source, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + tmprc = CIFSSMBUnixQPathInfo(xid, tcon, from_name, + info_buf_source, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc != 0) goto unlink_target; - tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName, - info_buf_target, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + tmprc = CIFSSMBUnixQPathInfo(xid, tcon, to_name, + info_buf_target, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc == 0 && (info_buf_source->UniqueId == info_buf_target->UniqueId)) { @@ -1620,8 +1599,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, rc = 0; goto cifs_rename_exit; } - } /* else ... BB we could add the same check for Windows by - checking the UniqueId via FILE_INTERNAL_INFO */ + } + /* + * else ... BB we could add the same check for Windows by + * checking the UniqueId via FILE_INTERNAL_INFO + */ unlink_target: /* Try unlinking the target dentry if it's not negative */ @@ -1629,15 +1611,14 @@ unlink_target: tmprc = cifs_unlink(target_dir, target_dentry); if (tmprc) goto cifs_rename_exit; - - rc = cifs_do_rename(xid, source_dentry, fromName, - target_dentry, toName); + rc = cifs_do_rename(xid, source_dentry, from_name, + target_dentry, to_name); } cifs_rename_exit: kfree(info_buf_source); - kfree(fromName); - kfree(toName); + kfree(from_name); + kfree(to_name); free_xid(xid); cifs_put_tlink(tlink); return rc; @@ -1862,7 +1843,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, struct cifsInodeInfo *cifsInode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; - struct cifs_tcon *pTcon = NULL; + struct cifs_tcon *tcon = NULL; + struct TCP_Server_Info *server; struct cifs_io_parms io_parms; /* @@ -1876,19 +1858,21 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, */ open_file = find_writable_file(cifsInode, true); if (open_file) { - __u16 nfid = open_file->netfid; - __u32 npid = open_file->pid; - pTcon = tlink_tcon(open_file->tlink); - rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, - npid, false); + tcon = tlink_tcon(open_file->tlink); + server = tcon->ses->server; + if (server->ops->set_file_size) + rc = server->ops->set_file_size(xid, tcon, open_file, + attrs->ia_size, false); + else + rc = -ENOSYS; cifsFileInfo_put(open_file); cFYI(1, "SetFSize for attrs rc = %d", rc); if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { unsigned int bytes_written; - io_parms.netfid = nfid; - io_parms.pid = npid; - io_parms.tcon = pTcon; + io_parms.netfid = open_file->fid.netfid; + io_parms.pid = open_file->pid; + io_parms.tcon = tcon; io_parms.offset = 0; io_parms.length = attrs->ia_size; rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, @@ -1898,52 +1882,55 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, } else rc = -EINVAL; - if (rc != 0) { - if (pTcon == NULL) { - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) - return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); - } + if (!rc) + goto set_size_out; - /* Set file size by pathname rather than by handle - either because no valid, writeable file handle for - it was found or because there was an error setting - it by handle */ - rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, - false, cifs_sb->local_nls, + if (tcon == NULL) { + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + } + + /* + * Set file size by pathname rather than by handle either because no + * valid, writeable file handle for it was found or because there was + * an error setting it by handle. + */ + if (server->ops->set_path_size) + rc = server->ops->set_path_size(xid, tcon, full_path, + attrs->ia_size, cifs_sb, false); + else + rc = -ENOSYS; + cFYI(1, "SetEOF by path (setattrs) rc = %d", rc); + if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { + __u16 netfid; + int oplock = 0; + + rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN, + GENERIC_WRITE, CREATE_NOT_DIR, &netfid, + &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1, "SetEOF by path (setattrs) rc = %d", rc); - if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { - __u16 netfid; - int oplock = 0; - - rc = SMBLegacyOpen(xid, pTcon, full_path, - FILE_OPEN, GENERIC_WRITE, - CREATE_NOT_DIR, &netfid, &oplock, NULL, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - unsigned int bytes_written; - - io_parms.netfid = netfid; - io_parms.pid = current->tgid; - io_parms.tcon = pTcon; - io_parms.offset = 0; - io_parms.length = attrs->ia_size; - rc = CIFSSMBWrite(xid, &io_parms, - &bytes_written, - NULL, NULL, 1); - cFYI(1, "wrt seteof rc %d", rc); - CIFSSMBClose(xid, pTcon, netfid); - } + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == 0) { + unsigned int bytes_written; + + io_parms.netfid = netfid; + io_parms.pid = current->tgid; + io_parms.tcon = tcon; + io_parms.offset = 0; + io_parms.length = attrs->ia_size; + rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL, + NULL, 1); + cFYI(1, "wrt seteof rc %d", rc); + CIFSSMBClose(xid, tcon, netfid); } - if (tlink) - cifs_put_tlink(tlink); } + if (tlink) + cifs_put_tlink(tlink); +set_size_out: if (rc == 0) { cifsInode->server_eof = attrs->ia_size; cifs_setsize(inode, attrs->ia_size); @@ -2050,7 +2037,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->device = 0; open_file = find_writable_file(cifsInode, true); if (open_file) { - u16 nfid = open_file->netfid; + u16 nfid = open_file->fid.netfid; u32 npid = open_file->pid; pTcon = tlink_tcon(open_file->tlink); rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index ae082a66de2f..fd5009d56f9f 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -28,8 +28,6 @@ #include "cifs_debug.h" #include "cifsfs.h" -#define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) - long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { struct inode *inode = filep->f_dentry->d_inode; @@ -51,23 +49,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) cifs_sb = CIFS_SB(inode->i_sb); switch (command) { - static bool warned = false; - case CIFS_IOC_CHECKUMOUNT: - if (!warned) { - warned = true; - cERROR(1, "the CIFS_IOC_CHECKMOUNT ioctl will " - "be deprecated in 3.7. Please " - "migrate away from the use of " - "umount.cifs"); - } - cFYI(1, "User unmount attempted"); - if (cifs_sb->mnt_uid == current_uid()) - rc = 0; - else { - rc = -EACCES; - cFYI(1, "uids do not match"); - } - break; #ifdef CONFIG_CIFS_POSIX case FS_IOC_GETFLAGS: if (pSMBFile == NULL) @@ -75,8 +56,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) tcon = tlink_tcon(pSMBFile->tlink); caps = le64_to_cpu(tcon->fsUnixInfo.Capability); if (CIFS_UNIX_EXTATTR_CAP & caps) { - rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, - &ExtAttrBits, &ExtAttrMask); + rc = CIFSGetExtAttr(xid, tcon, + pSMBFile->fid.netfid, + &ExtAttrBits, &ExtAttrMask); if (rc == 0) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, @@ -94,8 +76,12 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EFAULT; break; } - /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, - extAttrBits, &ExtAttrMask);*/ + /* + * rc = CIFSGetExtAttr(xid, tcon, + * pSMBFile->fid.netfid, + * extAttrBits, + * &ExtAttrMask); + */ } cFYI(1, "set flags not implemented yet"); break; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index e6ce3b112875..51dc2fb6e854 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -391,72 +391,86 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, { int rc = -EACCES; unsigned int xid; - char *fromName = NULL; - char *toName = NULL; + char *from_name = NULL; + char *to_name = NULL; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; struct cifsInodeInfo *cifsInode; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); + tcon = tlink_tcon(tlink); xid = get_xid(); - fromName = build_path_from_dentry(old_file); - toName = build_path_from_dentry(direntry); - if ((fromName == NULL) || (toName == NULL)) { + from_name = build_path_from_dentry(old_file); + to_name = build_path_from_dentry(direntry); + if ((from_name == NULL) || (to_name == NULL)) { rc = -ENOMEM; goto cifs_hl_exit; } - if (pTcon->unix_ext) - rc = CIFSUnixCreateHardLink(xid, pTcon, fromName, toName, + if (tcon->unix_ext) + rc = CIFSUnixCreateHardLink(xid, tcon, from_name, to_name, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else { - rc = CIFSCreateHardLink(xid, pTcon, fromName, toName, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + server = tcon->ses->server; + if (!server->ops->create_hardlink) + return -ENOSYS; + rc = server->ops->create_hardlink(xid, tcon, from_name, to_name, + cifs_sb); if ((rc == -EIO) || (rc == -EINVAL)) rc = -EOPNOTSUPP; } d_drop(direntry); /* force new lookup from server of target */ - /* if source file is cached (oplocked) revalidate will not go to server - until the file is closed or oplock broken so update nlinks locally */ + /* + * if source file is cached (oplocked) revalidate will not go to server + * until the file is closed or oplock broken so update nlinks locally + */ if (old_file->d_inode) { cifsInode = CIFS_I(old_file->d_inode); if (rc == 0) { spin_lock(&old_file->d_inode->i_lock); inc_nlink(old_file->d_inode); spin_unlock(&old_file->d_inode->i_lock); -/* BB should we make this contingent on superblock flag NOATIME? */ -/* old_file->d_inode->i_ctime = CURRENT_TIME;*/ - /* parent dir timestamps will update from srv - within a second, would it really be worth it - to set the parent dir cifs inode time to zero - to force revalidate (faster) for it too? */ + /* + * BB should we make this contingent on superblock flag + * NOATIME? + */ + /* old_file->d_inode->i_ctime = CURRENT_TIME; */ + /* + * parent dir timestamps will update from srv within a + * second, would it really be worth it to set the parent + * dir cifs inode time to zero to force revalidate + * (faster) for it too? + */ } - /* if not oplocked will force revalidate to get info - on source file from srv */ + /* + * if not oplocked will force revalidate to get info on source + * file from srv + */ cifsInode->time = 0; - /* Will update parent dir timestamps from srv within a second. - Would it really be worth it to set the parent dir (cifs - inode) time field to zero to force revalidate on parent - directory faster ie - CIFS_I(inode)->time = 0; */ + /* + * Will update parent dir timestamps from srv within a second. + * Would it really be worth it to set the parent dir (cifs + * inode) time field to zero to force revalidate on parent + * directory faster ie + * + * CIFS_I(inode)->time = 0; + */ } cifs_hl_exit: - kfree(fromName); - kfree(toName); + kfree(from_name); + kfree(to_name); free_xid(xid); cifs_put_tlink(tlink); return rc; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index ce41fee07e5b..3a00c0d0cead 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -466,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) list_for_each(tmp2, &tcon->openFileList) { netfile = list_entry(tmp2, struct cifsFileInfo, tlist); - if (pSMB->Fid != netfile->netfid) + if (pSMB->Fid != netfile->fid.netfid) continue; cFYI(1, "file id match, oplock break"); @@ -579,3 +579,33 @@ backup_cred(struct cifs_sb_info *cifs_sb) return false; } + +void +cifs_del_pending_open(struct cifs_pending_open *open) +{ + spin_lock(&cifs_file_list_lock); + list_del(&open->olist); + spin_unlock(&cifs_file_list_lock); +} + +void +cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink, + struct cifs_pending_open *open) +{ +#ifdef CONFIG_CIFS_SMB2 + memcpy(open->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE); +#endif + open->oplock = CIFS_OPLOCK_NO_CHANGE; + open->tlink = tlink; + fid->pending_open = open; + list_add_tail(&open->olist, &tlink_tcon(tlink)->pending_opens); +} + +void +cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink, + struct cifs_pending_open *open) +{ + spin_lock(&cifs_file_list_lock); + cifs_add_pending_open_locked(fid, tlink, open); + spin_unlock(&cifs_file_list_lock); +} diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 581c225f7f50..d5ce9e26696c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -110,7 +110,7 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = { {ERRnoroom, -ENOSPC}, {ERRrmuns, -EUSERS}, {ERRtimeout, -ETIME}, - {ERRnoresource, -ENOBUFS}, + {ERRnoresource, -EREMOTEIO}, {ERRtoomanyuids, -EUSERS}, {ERRbaduid, -EACCES}, {ERRusempx, -EIO}, @@ -412,7 +412,7 @@ static const struct { from NT_STATUS_INSUFFICIENT_RESOURCES to NT_STATUS_INSUFF_SERVER_RESOURCES during the session setup } */ { - ERRDOS, ERRnomem, NT_STATUS_INSUFFICIENT_RESOURCES}, { + ERRDOS, ERRnoresource, NT_STATUS_INSUFFICIENT_RESOURCES}, { ERRDOS, ERRbadpath, NT_STATUS_DFS_EXIT_PATH_FOUND}, { ERRDOS, 23, NT_STATUS_DEVICE_DATA_ERROR}, { ERRHRD, ERRgeneral, NT_STATUS_DEVICE_NOT_CONNECTED}, { @@ -682,7 +682,7 @@ static const struct { ERRHRD, ERRgeneral, NT_STATUS_NO_USER_SESSION_KEY}, { ERRDOS, 59, NT_STATUS_USER_SESSION_DELETED}, { ERRHRD, ERRgeneral, NT_STATUS_RESOURCE_LANG_NOT_FOUND}, { - ERRDOS, ERRnomem, NT_STATUS_INSUFF_SERVER_RESOURCES}, { + ERRDOS, ERRnoresource, NT_STATUS_INSUFF_SERVER_RESOURCES}, { ERRHRD, ERRgeneral, NT_STATUS_INVALID_BUFFER_SIZE}, { ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_COMPONENT}, { ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_WILDCARD}, { @@ -913,8 +913,9 @@ map_smb_to_linux_error(char *buf, bool logErr) * portion, the number of word parameters and the data portion of the message */ unsigned int -smbCalcSize(struct smb_hdr *ptr) +smbCalcSize(void *buf) { + struct smb_hdr *ptr = (struct smb_hdr *)buf; return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + 2 /* size of the bcc field */ + get_bcc(ptr)); } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d87f82678bc7..f9b5d3d6cf33 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -151,7 +151,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) } } -static void +void cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, struct cifs_sb_info *cifs_sb) { @@ -220,7 +220,8 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, } */ -static int initiate_cifs_search(const unsigned int xid, struct file *file) +static int +initiate_cifs_search(const unsigned int xid, struct file *file) { __u16 search_flags; int rc = 0; @@ -229,6 +230,7 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; if (file->private_data == NULL) { tlink = cifs_sb_tlink(cifs_sb); @@ -248,6 +250,13 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file) tcon = tlink_tcon(cifsFile->tlink); } + server = tcon->ses->server; + + if (!server->ops->query_dir_first) { + rc = -ENOSYS; + goto error_exit; + } + cifsFile->invalidHandle = true; cifsFile->srch_inf.endOfSearch = false; @@ -278,10 +287,10 @@ ffirst_retry: if (backup_cred(cifs_sb)) search_flags |= CIFS_SEARCH_BACKUP_SEARCH; - rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb->local_nls, - &cifsFile->netfid, search_flags, &cifsFile->srch_inf, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); + rc = server->ops->query_dir_first(xid, tcon, full_path, cifs_sb, + &cifsFile->fid, search_flags, + &cifsFile->srch_inf); + if (rc == 0) cifsFile->invalidHandle = false; /* BB add following call to handle readdir on new NTFS symlink errors @@ -501,62 +510,67 @@ static int cifs_save_resume_key(const char *current_entry, return rc; } -/* find the corresponding entry in the search */ -/* Note that the SMB server returns search entries for . and .. which - complicates logic here if we choose to parse for them and we do not - assume that they are located in the findfirst return buffer.*/ -/* We start counting in the buffer with entry 2 and increment for every - entry (do not increment for . or .. entry) */ -static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, - struct file *file, char **ppCurrentEntry, int *num_to_ret) +/* + * Find the corresponding entry in the search. Note that the SMB server returns + * search entries for . and .. which complicates logic here if we choose to + * parse for them and we do not assume that they are located in the findfirst + * return buffer. We start counting in the buffer with entry 2 and increment for + * every entry (do not increment for . or .. entry). + */ +static int +find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, + struct file *file, char **current_entry, int *num_to_ret) { __u16 search_flags; int rc = 0; int pos_in_buf = 0; loff_t first_entry_in_buffer; loff_t index_to_find = file->f_pos; - struct cifsFileInfo *cifsFile = file->private_data; + struct cifsFileInfo *cfile = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + struct TCP_Server_Info *server = tcon->ses->server; /* check if index in the buffer */ - if ((cifsFile == NULL) || (ppCurrentEntry == NULL) || - (num_to_ret == NULL)) + if (!server->ops->query_dir_first || !server->ops->query_dir_next) + return -ENOSYS; + + if ((cfile == NULL) || (current_entry == NULL) || (num_to_ret == NULL)) return -ENOENT; - *ppCurrentEntry = NULL; - first_entry_in_buffer = - cifsFile->srch_inf.index_of_last_entry - - cifsFile->srch_inf.entries_in_buffer; + *current_entry = NULL; + first_entry_in_buffer = cfile->srch_inf.index_of_last_entry - + cfile->srch_inf.entries_in_buffer; - /* if first entry in buf is zero then is first buffer - in search response data which means it is likely . and .. - will be in this buffer, although some servers do not return - . and .. for the root of a drive and for those we need - to start two entries earlier */ + /* + * If first entry in buf is zero then is first buffer + * in search response data which means it is likely . and .. + * will be in this buffer, although some servers do not return + * . and .. for the root of a drive and for those we need + * to start two entries earlier. + */ dump_cifs_file_struct(file, "In fce "); - if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) && - is_dir_changed(file)) || - (index_to_find < first_entry_in_buffer)) { + if (((index_to_find < cfile->srch_inf.index_of_last_entry) && + is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) { /* close and restart search */ cFYI(1, "search backing up - close and restart search"); spin_lock(&cifs_file_list_lock); - if (!cifsFile->srch_inf.endOfSearch && - !cifsFile->invalidHandle) { - cifsFile->invalidHandle = true; + if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); - CIFSFindClose(xid, pTcon, cifsFile->netfid); + if (server->ops->close) + server->ops->close(xid, tcon, &cfile->fid); } else spin_unlock(&cifs_file_list_lock); - if (cifsFile->srch_inf.ntwrk_buf_start) { + if (cfile->srch_inf.ntwrk_buf_start) { cFYI(1, "freeing SMB ff cache buf on search rewind"); - if (cifsFile->srch_inf.smallBuf) - cifs_small_buf_release(cifsFile->srch_inf. + if (cfile->srch_inf.smallBuf) + cifs_small_buf_release(cfile->srch_inf. ntwrk_buf_start); else - cifs_buf_release(cifsFile->srch_inf. + cifs_buf_release(cfile->srch_inf. ntwrk_buf_start); - cifsFile->srch_inf.ntwrk_buf_start = NULL; + cfile->srch_inf.ntwrk_buf_start = NULL; } rc = initiate_cifs_search(xid, file); if (rc) { @@ -565,65 +579,64 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, return rc; } /* FindFirst/Next set last_entry to NULL on malformed reply */ - if (cifsFile->srch_inf.last_entry) - cifs_save_resume_key(cifsFile->srch_inf.last_entry, - cifsFile); + if (cfile->srch_inf.last_entry) + cifs_save_resume_key(cfile->srch_inf.last_entry, cfile); } search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; if (backup_cred(cifs_sb)) search_flags |= CIFS_SEARCH_BACKUP_SEARCH; - while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && - (rc == 0) && !cifsFile->srch_inf.endOfSearch) { + while ((index_to_find >= cfile->srch_inf.index_of_last_entry) && + (rc == 0) && !cfile->srch_inf.endOfSearch) { cFYI(1, "calling findnext2"); - rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, search_flags, - &cifsFile->srch_inf); + rc = server->ops->query_dir_next(xid, tcon, &cfile->fid, + search_flags, + &cfile->srch_inf); /* FindFirst/Next set last_entry to NULL on malformed reply */ - if (cifsFile->srch_inf.last_entry) - cifs_save_resume_key(cifsFile->srch_inf.last_entry, - cifsFile); + if (cfile->srch_inf.last_entry) + cifs_save_resume_key(cfile->srch_inf.last_entry, cfile); if (rc) return -ENOENT; } - if (index_to_find < cifsFile->srch_inf.index_of_last_entry) { + if (index_to_find < cfile->srch_inf.index_of_last_entry) { /* we found the buffer that contains the entry */ /* scan and find it */ int i; - char *current_entry; - char *end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + - smbCalcSize((struct smb_hdr *) - cifsFile->srch_inf.ntwrk_buf_start); - - current_entry = cifsFile->srch_inf.srch_entries_start; - first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry - - cifsFile->srch_inf.entries_in_buffer; + char *cur_ent; + char *end_of_smb = cfile->srch_inf.ntwrk_buf_start + + server->ops->calc_smb_size( + cfile->srch_inf.ntwrk_buf_start); + + cur_ent = cfile->srch_inf.srch_entries_start; + first_entry_in_buffer = cfile->srch_inf.index_of_last_entry + - cfile->srch_inf.entries_in_buffer; pos_in_buf = index_to_find - first_entry_in_buffer; cFYI(1, "found entry - pos_in_buf %d", pos_in_buf); - for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) { + for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) { /* go entry by entry figuring out which is first */ - current_entry = nxt_dir_entry(current_entry, end_of_smb, - cifsFile->srch_inf.info_level); + cur_ent = nxt_dir_entry(cur_ent, end_of_smb, + cfile->srch_inf.info_level); } - if ((current_entry == NULL) && (i < pos_in_buf)) { + if ((cur_ent == NULL) && (i < pos_in_buf)) { /* BB fixme - check if we should flag this error */ cERROR(1, "reached end of buf searching for pos in buf" - " %d index to find %lld rc %d", - pos_in_buf, index_to_find, rc); + " %d index to find %lld rc %d", pos_in_buf, + index_to_find, rc); } rc = 0; - *ppCurrentEntry = current_entry; + *current_entry = cur_ent; } else { cFYI(1, "index not in buffer - could not findnext into it"); return 0; } - if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) { + if (pos_in_buf >= cfile->srch_inf.entries_in_buffer) { cFYI(1, "can not return entries pos_in_buf beyond last"); *num_to_ret = 0; } else - *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf; + *num_to_ret = cfile->srch_inf.entries_in_buffer - pos_in_buf; return rc; } @@ -723,7 +736,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) int rc = 0; unsigned int xid; int i; - struct cifs_tcon *pTcon; + struct cifs_tcon *tcon; struct cifsFileInfo *cifsFile = NULL; char *current_entry; int num_to_fill = 0; @@ -781,12 +794,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } } /* else { cifsFile->invalidHandle = true; - CIFSFindClose(xid, pTcon, cifsFile->netfid); + tcon->ses->server->close(xid, tcon, &cifsFile->fid); } */ - pTcon = tlink_tcon(cifsFile->tlink); - rc = find_cifs_entry(xid, pTcon, file, - ¤t_entry, &num_to_fill); + tcon = tlink_tcon(cifsFile->tlink); + rc = find_cifs_entry(xid, tcon, file, ¤t_entry, + &num_to_fill); if (rc) { cFYI(1, "fce error %d", rc); goto rddir2_exit; @@ -798,7 +811,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } cFYI(1, "loop through %d times filling dir for net buf %p", num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); - max_len = smbCalcSize((struct smb_hdr *) + max_len = tcon->ses->server->ops->calc_smb_size( cifsFile->srch_inf.ntwrk_buf_start); end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; @@ -815,10 +828,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) num_to_fill, i); break; } - /* if buggy server returns . and .. late do - we want to check for that here? */ - rc = cifs_filldir(current_entry, file, - filldir, direntry, tmp_buf, max_len); + /* + * if buggy server returns . and .. late do we want to + * check for that here? + */ + rc = cifs_filldir(current_entry, file, filldir, + direntry, tmp_buf, max_len); if (rc == -EOVERFLOW) { rc = 0; break; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 382c06d01b38..76809f4d3428 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -876,7 +876,8 @@ ssetup_ntlmssp_authenticate: pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; smb_buf = (struct smb_hdr *)iov[0].iov_base; - if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError == + if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) && + (smb_buf->Status.CifsError == cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { if (phase != NtLmNegotiate) { cERROR(1, "Unexpected more processing error"); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3129ac74b819..56cc4be87807 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -17,6 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/pagemap.h> +#include <linux/vfs.h> #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" @@ -63,7 +65,7 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf, static bool cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) { - return ob1->netfid == ob2->netfid; + return ob1->fid.netfid == ob2->fid.netfid; } static unsigned int @@ -410,6 +412,83 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses) return rc; } +static unsigned int +cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +{ + __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int wsize; + + /* start with specified wsize, or default */ + if (volume_info->wsize) + wsize = volume_info->wsize; + else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) + wsize = CIFS_DEFAULT_IOSIZE; + else + wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; + + /* can server support 24-bit write sizes? (via UNIX extensions) */ + if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) + wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE); + + /* + * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set? + * Limit it to max buffer offered by the server, minus the size of the + * WRITEX header, not including the 4 byte RFC1001 length. + */ + if (!(server->capabilities & CAP_LARGE_WRITE_X) || + (!(server->capabilities & CAP_UNIX) && + (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) + wsize = min_t(unsigned int, wsize, + server->maxBuf - sizeof(WRITE_REQ) + 4); + + /* hard limit of CIFS_MAX_WSIZE */ + wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); + + return wsize; +} + +static unsigned int +cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +{ + __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int rsize, defsize; + + /* + * Set default value... + * + * HACK alert! Ancient servers have very small buffers. Even though + * MS-CIFS indicates that servers are only limited by the client's + * bufsize for reads, testing against win98se shows that it throws + * INVALID_PARAMETER errors if you try to request too large a read. + * OS/2 just sends back short reads. + * + * If the server doesn't advertise CAP_LARGE_READ_X, then assume that + * it can't handle a read request larger than its MaxBufferSize either. + */ + if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) + defsize = CIFS_DEFAULT_IOSIZE; + else if (server->capabilities & CAP_LARGE_READ_X) + defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; + else + defsize = server->maxBuf - sizeof(READ_RSP); + + rsize = volume_info->rsize ? volume_info->rsize : defsize; + + /* + * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to + * the client's MaxBufferSize. + */ + if (!(server->capabilities & CAP_LARGE_READ_X)) + rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); + + /* hard limit of CIFS_MAX_RSIZE */ + rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); + + return rsize; +} + static void cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) { @@ -489,6 +568,13 @@ cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, CIFS_MOUNT_MAP_SPECIAL_CHR); } +static int +cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid, FILE_ALL_INFO *data) +{ + return CIFSSMBQFileInfo(xid, tcon, fid->netfid, data); +} + static char * cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon) @@ -607,6 +693,219 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, cifsInode->cifsAttrs = dosattrs; } +static int +cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, + int disposition, int desired_access, int create_options, + struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, + struct cifs_sb_info *cifs_sb) +{ + if (!(tcon->ses->capabilities & CAP_NT_SMBS)) + return SMBLegacyOpen(xid, tcon, path, disposition, + desired_access, create_options, + &fid->netfid, oplock, buf, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags + & CIFS_MOUNT_MAP_SPECIAL_CHR); + return CIFSSMBOpen(xid, tcon, path, disposition, desired_access, + create_options, &fid->netfid, oplock, buf, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); +} + +static void +cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) +{ + struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + cfile->fid.netfid = fid->netfid; + cifs_set_oplock_level(cinode, oplock); + cinode->can_cache_brlcks = cinode->clientCanCacheAll; +} + +static void +cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + CIFSSMBClose(xid, tcon, fid->netfid); +} + +static int +cifs_flush_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return CIFSSMBFlush(xid, tcon, fid->netfid); +} + +static int +cifs_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, + struct cifs_io_parms *parms, unsigned int *bytes_read, + char **buf, int *buf_type) +{ + parms->netfid = cfile->fid.netfid; + return CIFSSMBRead(xid, parms, bytes_read, buf, buf_type); +} + +static int +cifs_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, + struct cifs_io_parms *parms, unsigned int *written, + struct kvec *iov, unsigned long nr_segs) +{ + + parms->netfid = cfile->fid.netfid; + return CIFSSMBWrite2(xid, parms, written, iov, nr_segs); +} + +static int +smb_set_file_info(struct inode *inode, const char *full_path, + FILE_BASIC_INFO *buf, const unsigned int xid) +{ + int oplock = 0; + int rc; + __u16 netfid; + __u32 netpid; + struct cifsFileInfo *open_file; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct tcon_link *tlink = NULL; + struct cifs_tcon *tcon; + FILE_BASIC_INFO info_buf; + + /* if the file is already open for write, just use that fileid */ + open_file = find_writable_file(cinode, true); + if (open_file) { + netfid = open_file->fid.netfid; + netpid = open_file->pid; + tcon = tlink_tcon(open_file->tlink); + goto set_via_filehandle; + } + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + tlink = NULL; + goto out; + } + tcon = tlink_tcon(tlink); + + /* + * NT4 apparently returns success on this call, but it doesn't really + * work. + */ + if (!(tcon->ses->flags & CIFS_SES_NT4)) { + rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == 0) { + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + goto out; + } else if (rc != -EOPNOTSUPP && rc != -EINVAL) + goto out; + } + + cFYI(1, "calling SetFileInfo since SetPathInfo for times not supported " + "by this server"); + rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, + SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, + &netfid, &oplock, NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (rc != 0) { + if (rc == -EIO) + rc = -EINVAL; + goto out; + } + + netpid = current->tgid; + +set_via_filehandle: + rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid); + if (!rc) + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + + if (open_file == NULL) + CIFSSMBClose(xid, tcon, netfid); + else + cifsFileInfo_put(open_file); +out: + if (tlink != NULL) + cifs_put_tlink(tlink); + return rc; +} + +static int +cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, + const char *path, struct cifs_sb_info *cifs_sb, + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) +{ + return CIFSFindFirst(xid, tcon, path, cifs_sb, + &fid->netfid, search_flags, srch_inf, true); +} + +static int +cifs_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) +{ + return CIFSFindNext(xid, tcon, fid->netfid, search_flags, srch_inf); +} + +static int +cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return CIFSFindClose(xid, tcon, fid->netfid); +} + +static int +cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, + struct cifsInodeInfo *cinode) +{ + return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0, + LOCKING_ANDX_OPLOCK_RELEASE, false, + cinode->clientCanCacheRead ? 1 : 0); +} + +static int +cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, + struct kstatfs *buf) +{ + int rc = -EOPNOTSUPP; + + buf->f_type = CIFS_MAGIC_NUMBER; + + /* + * We could add a second check for a QFS Unix capability bit + */ + if ((tcon->ses->capabilities & CAP_UNIX) && + (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability))) + rc = CIFSSMBQFSPosixInfo(xid, tcon, buf); + + /* + * Only need to call the old QFSInfo if failed on newer one, + * e.g. by OS/2. + **/ + if (rc && (tcon->ses->capabilities & CAP_NT_SMBS)) + rc = CIFSSMBQFSInfo(xid, tcon, buf); + + /* + * Some old Windows servers also do not support level 103, retry with + * older level one if old server failed the previous call or we + * bypassed it because we detected that this was an older LANMAN sess + */ + if (rc) + rc = SMBOldQFSInfo(xid, tcon, buf); + return rc; +} + +static int +cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, + __u64 length, __u32 type, int lock, int unlock, bool wait) +{ + return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid, + current->tgid, length, offset, unlock, lock, + (__u8)type, wait, 0); +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -630,6 +929,8 @@ struct smb_version_operations smb1_operations = { .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, + .negotiate_wsize = cifs_negotiate_wsize, + .negotiate_rsize = cifs_negotiate_rsize, .sess_setup = CIFS_SessSetup, .logoff = CIFSSMBLogoff, .tree_connect = CIFSTCon, @@ -638,12 +939,37 @@ struct smb_version_operations smb1_operations = { .qfs_tcon = cifs_qfs_tcon, .is_path_accessible = cifs_is_path_accessible, .query_path_info = cifs_query_path_info, + .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, + .set_path_size = CIFSSMBSetEOF, + .set_file_size = CIFSSMBSetFileSize, + .set_file_info = smb_set_file_info, .build_path_to_root = cifs_build_path_to_root, .echo = CIFSSMBEcho, .mkdir = CIFSSMBMkDir, .mkdir_setinfo = cifs_mkdir_setinfo, .rmdir = CIFSSMBRmDir, + .unlink = CIFSSMBDelFile, + .rename_pending_delete = cifs_rename_pending_delete, + .rename = CIFSSMBRename, + .create_hardlink = CIFSCreateHardLink, + .open = cifs_open_file, + .set_fid = cifs_set_fid, + .close = cifs_close_file, + .flush = cifs_flush_file, + .async_readv = cifs_async_readv, + .async_writev = cifs_async_writev, + .sync_read = cifs_sync_read, + .sync_write = cifs_sync_write, + .query_dir_first = cifs_query_dir_first, + .query_dir_next = cifs_query_dir_next, + .close_dir = cifs_close_dir, + .calc_smb_size = smbCalcSize, + .oplock_response = cifs_oplock_response, + .queryfs = cifs_queryfs, + .mand_lock = cifs_mand_lock, + .mand_unlock_range = cifs_unlock_range, + .push_mand_locks = cifs_push_mandatory_locks, }; struct smb_version_values smb1_values = { diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c new file mode 100644 index 000000000000..a93eec30a50d --- /dev/null +++ b/fs/cifs/smb2file.c @@ -0,0 +1,302 @@ +/* + * fs/cifs/smb2file.c + * + * Copyright (C) International Business Machines Corp., 2002, 2011 + * Author(s): Steve French (sfrench@us.ibm.com), + * Pavel Shilovsky ((pshilovsky@samba.org) 2012 + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <asm/div64.h> +#include "cifsfs.h" +#include "cifspdu.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "cifs_debug.h" +#include "cifs_fs_sb.h" +#include "cifs_unicode.h" +#include "fscache.h" +#include "smb2proto.h" + +void +smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) +{ + oplock &= 0xFF; + if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) + return; + if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { + cinode->clientCanCacheAll = true; + cinode->clientCanCacheRead = true; + cFYI(1, "Exclusive Oplock granted on inode %p", + &cinode->vfs_inode); + } else if (oplock == SMB2_OPLOCK_LEVEL_II) { + cinode->clientCanCacheAll = false; + cinode->clientCanCacheRead = true; + cFYI(1, "Level II Oplock granted on inode %p", + &cinode->vfs_inode); + } else { + cinode->clientCanCacheAll = false; + cinode->clientCanCacheRead = false; + } +} + +int +smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, + int disposition, int desired_access, int create_options, + struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, + struct cifs_sb_info *cifs_sb) +{ + int rc; + __le16 *smb2_path; + struct smb2_file_all_info *smb2_data = NULL; + __u8 smb2_oplock[17]; + + smb2_path = cifs_convert_path_to_utf16(path, cifs_sb); + if (smb2_path == NULL) { + rc = -ENOMEM; + goto out; + } + + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + GFP_KERNEL); + if (smb2_data == NULL) { + rc = -ENOMEM; + goto out; + } + + desired_access |= FILE_READ_ATTRIBUTES; + *smb2_oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + + if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) + memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); + + rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid, + &fid->volatile_fid, desired_access, disposition, + 0, 0, smb2_oplock, smb2_data); + if (rc) + goto out; + + if (buf) { + /* open response does not have IndexNumber field - get it */ + rc = SMB2_get_srv_num(xid, tcon, fid->persistent_fid, + fid->volatile_fid, + &smb2_data->IndexNumber); + if (rc) { + /* let get_inode_info disable server inode numbers */ + smb2_data->IndexNumber = 0; + rc = 0; + } + move_smb2_info_to_cifs(buf, smb2_data); + } + + *oplock = *smb2_oplock; +out: + kfree(smb2_data); + kfree(smb2_path); + return rc; +} + +int +smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, + const unsigned int xid) +{ + int rc = 0, stored_rc; + unsigned int max_num, num = 0, max_buf; + struct smb2_lock_element *buf, *cur; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsLockInfo *li, *tmp; + __u64 length = 1 + flock->fl_end - flock->fl_start; + struct list_head tmp_llist; + + INIT_LIST_HEAD(&tmp_llist); + + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it for zero before using. + */ + max_buf = tcon->ses->server->maxBuf; + if (!max_buf) + return -EINVAL; + + max_num = max_buf / sizeof(struct smb2_lock_element); + buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + cur = buf; + + down_write(&cinode->lock_sem); + list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { + if (flock->fl_start > li->offset || + (flock->fl_start + length) < + (li->offset + li->length)) + continue; + if (current->tgid != li->pid) + continue; + if (cinode->can_cache_brlcks) { + /* + * We can cache brlock requests - simply remove a lock + * from the file's list. + */ + list_del(&li->llist); + cifs_del_lock_waiters(li); + kfree(li); + continue; + } + cur->Length = cpu_to_le64(li->length); + cur->Offset = cpu_to_le64(li->offset); + cur->Flags = cpu_to_le32(SMB2_LOCKFLAG_UNLOCK); + /* + * We need to save a lock here to let us add it again to the + * file's list if the unlock range request fails on the server. + */ + list_move(&li->llist, &tmp_llist); + if (++num == max_num) { + stored_rc = smb2_lockv(xid, tcon, + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, num, buf); + if (stored_rc) { + /* + * We failed on the unlock range request - add + * all locks from the tmp list to the head of + * the file's list. + */ + cifs_move_llist(&tmp_llist, + &cfile->llist->locks); + rc = stored_rc; + } else + /* + * The unlock range request succeed - free the + * tmp list. + */ + cifs_free_llist(&tmp_llist); + cur = buf; + num = 0; + } else + cur++; + } + if (num) { + stored_rc = smb2_lockv(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, current->tgid, + num, buf); + if (stored_rc) { + cifs_move_llist(&tmp_llist, &cfile->llist->locks); + rc = stored_rc; + } else + cifs_free_llist(&tmp_llist); + } + up_write(&cinode->lock_sem); + + kfree(buf); + return rc; +} + +static int +smb2_push_mand_fdlocks(struct cifs_fid_locks *fdlocks, const unsigned int xid, + struct smb2_lock_element *buf, unsigned int max_num) +{ + int rc = 0, stored_rc; + struct cifsFileInfo *cfile = fdlocks->cfile; + struct cifsLockInfo *li; + unsigned int num = 0; + struct smb2_lock_element *cur = buf; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + + list_for_each_entry(li, &fdlocks->locks, llist) { + cur->Length = cpu_to_le64(li->length); + cur->Offset = cpu_to_le64(li->offset); + cur->Flags = cpu_to_le32(li->type | + SMB2_LOCKFLAG_FAIL_IMMEDIATELY); + if (++num == max_num) { + stored_rc = smb2_lockv(xid, tcon, + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, num, buf); + if (stored_rc) + rc = stored_rc; + cur = buf; + num = 0; + } else + cur++; + } + if (num) { + stored_rc = smb2_lockv(xid, tcon, + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, num, buf); + if (stored_rc) + rc = stored_rc; + } + + return rc; +} + +int +smb2_push_mandatory_locks(struct cifsFileInfo *cfile) +{ + int rc = 0, stored_rc; + unsigned int xid; + unsigned int max_num, max_buf; + struct smb2_lock_element *buf; + struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifs_fid_locks *fdlocks; + + xid = get_xid(); + /* we are going to update can_cache_brlcks here - need a write access */ + down_write(&cinode->lock_sem); + if (!cinode->can_cache_brlcks) { + up_write(&cinode->lock_sem); + free_xid(xid); + return rc; + } + + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it for zero before using. + */ + max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; + if (!max_buf) { + up_write(&cinode->lock_sem); + free_xid(xid); + return -EINVAL; + } + + max_num = max_buf / sizeof(struct smb2_lock_element); + buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); + if (!buf) { + up_write(&cinode->lock_sem); + free_xid(xid); + return -ENOMEM; + } + + list_for_each_entry(fdlocks, &cinode->llist, llist) { + stored_rc = smb2_push_mand_fdlocks(fdlocks, xid, buf, max_num); + if (stored_rc) + rc = stored_rc; + } + + cinode->can_cache_brlcks = false; + kfree(buf); + + up_write(&cinode->lock_sem); + free_xid(xid); + return rc; +} diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 33c1d89090c0..7c0e2143e775 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -23,6 +23,8 @@ #ifndef _SMB2_GLOB_H #define _SMB2_GLOB_H +#define SMB2_MAGIC_NUMBER 0xFE534D42 + /* ***************************************************************** * Constants go here @@ -40,5 +42,17 @@ #define SMB2_OP_MKDIR 5 #define SMB2_OP_RENAME 6 #define SMB2_OP_DELETE 7 +#define SMB2_OP_HARDLINK 8 +#define SMB2_OP_SET_EOF 9 + +/* Used when constructing chained read requests. */ +#define CHAINED_REQUEST 1 +#define START_OF_CHAIN 2 +#define END_OF_CHAIN 4 +#define RELATED_REQUEST 8 + +#define SMB2_SIGNATURE_SIZE (16) +#define SMB2_NTLMV2_SESSKEY_SIZE (16) +#define SMB2_HMACSHA256_SIZE (32) #endif /* _SMB2_GLOB_H */ diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 2aa5cb08c526..706482452df4 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -47,6 +47,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, int rc, tmprc = 0; u64 persistent_fid, volatile_fid; __le16 *utf16_path; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); if (!utf16_path) @@ -54,7 +55,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, desired_access, create_disposition, file_attributes, - create_options); + create_options, &oplock, NULL); if (rc) { kfree(utf16_path); return rc; @@ -74,6 +75,22 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, * SMB2_open() call. */ break; + case SMB2_OP_RENAME: + tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid, + (__le16 *)data); + break; + case SMB2_OP_HARDLINK: + tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid, + volatile_fid, (__le16 *)data); + break; + case SMB2_OP_SET_EOF: + tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid, + current->tgid, (__le64 *)data); + break; + case SMB2_OP_SET_INFO: + tmprc = SMB2_set_info(xid, tcon, persistent_fid, volatile_fid, + (FILE_BASIC_INFO *)data); + break; default: cERROR(1, "Invalid command"); break; @@ -86,7 +103,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, return rc; } -static void +void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) { memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src); @@ -161,3 +178,80 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, NULL, SMB2_OP_DELETE); } + +int +smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, + 0, CREATE_DELETE_ON_CLOSE, NULL, + SMB2_OP_DELETE); +} + +static int +smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb, __u32 access, int command) +{ + __le16 *smb2_to_name = NULL; + int rc; + + smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb); + if (smb2_to_name == NULL) { + rc = -ENOMEM; + goto smb2_rename_path; + } + + rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access, + FILE_OPEN, 0, 0, smb2_to_name, command); +smb2_rename_path: + kfree(smb2_to_name); + return rc; +} + +int +smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, + DELETE, SMB2_OP_RENAME); +} + +int +smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) +{ + return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, + FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK); +} + +int +smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, __u64 size, + struct cifs_sb_info *cifs_sb, bool set_alloc) +{ + __le64 eof = cpu_to_le64(size); + return smb2_open_op_close(xid, tcon, cifs_sb, full_path, + FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof, + SMB2_OP_SET_EOF); +} + +int +smb2_set_file_info(struct inode *inode, const char *full_path, + FILE_BASIC_INFO *buf, const unsigned int xid) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct tcon_link *tlink; + int rc; + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path, + FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, 0, buf, + SMB2_OP_SET_INFO); + cifs_put_tlink(tlink); + return rc; +} diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index be41478acc05..494c912c76fe 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -453,7 +453,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, "STATUS_ALLOTTED_SPACE_EXCEEDED"}, - {STATUS_INSUFFICIENT_RESOURCES, -EIO, "STATUS_INSUFFICIENT_RESOURCES"}, + {STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO, + "STATUS_INSUFFICIENT_RESOURCES"}, {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, {STATUS_DEVICE_NOT_CONNECTED, -EIO, "STATUS_DEVICE_NOT_CONNECTED"}, @@ -2455,7 +2456,8 @@ map_smb2_to_linux_error(char *buf, bool log_err) return 0; /* mask facility */ - if (log_err && (smb2err != (STATUS_MORE_PROCESSING_REQUIRED))) + if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) && + (smb2err != STATUS_END_OF_FILE)) smb2_print_status(smb2err); else if (cifsFYI & CIFS_RC) smb2_print_status(smb2err); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index e4d3b9964167..7b1c5e3287fb 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -142,12 +142,19 @@ smb2_check_message(char *buf, unsigned int length) } if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) { - if (hdr->Status == 0 || - pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2) { + if (command != SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0 || + pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2)) { /* error packets have 9 byte structure size */ cERROR(1, "Illegal response size %u for command %d", le16_to_cpu(pdu->StructureSize2), command); return 1; + } else if (command == SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0) + && (le16_to_cpu(pdu->StructureSize2) != 44) + && (le16_to_cpu(pdu->StructureSize2) != 36)) { + /* special case for SMB2.1 lease break message */ + cERROR(1, "Illegal response size %d for oplock break", + le16_to_cpu(pdu->StructureSize2)); + return 1; } } @@ -162,6 +169,9 @@ smb2_check_message(char *buf, unsigned int length) if (4 + len != clc_len) { cFYI(1, "Calculated size %u length %u mismatch mid %llu", clc_len, 4 + len, mid); + /* Windows 7 server returns 24 bytes more */ + if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) + return 0; /* server can return one byte more */ if (clc_len == 4 + len + 1) return 0; @@ -244,7 +254,15 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength); break; case SMB2_READ: + *off = ((struct smb2_read_rsp *)hdr)->DataOffset; + *len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength); + break; case SMB2_QUERY_DIRECTORY: + *off = le16_to_cpu( + ((struct smb2_query_directory_rsp *)hdr)->OutputBufferOffset); + *len = le32_to_cpu( + ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength); + break; case SMB2_IOCTL: case SMB2_CHANGE_NOTIFY: default: @@ -287,8 +305,9 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) * portion, the number of word parameters and the data portion of the message. */ unsigned int -smb2_calc_size(struct smb2_hdr *hdr) +smb2_calc_size(void *buf) { + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; struct smb2_pdu *pdu = (struct smb2_pdu *)hdr; int offset; /* the offset from the beginning of SMB to data area */ int data_length; /* the length of the variable length data area */ @@ -347,3 +366,218 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) CIFS_MOUNT_MAP_SPECIAL_CHR); return to; } + +__le32 +smb2_get_lease_state(struct cifsInodeInfo *cinode) +{ + if (cinode->clientCanCacheAll) + return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING; + else if (cinode->clientCanCacheRead) + return SMB2_LEASE_READ_CACHING; + return 0; +} + +__u8 smb2_map_lease_to_oplock(__le32 lease_state) +{ + if (lease_state & SMB2_LEASE_WRITE_CACHING) { + if (lease_state & SMB2_LEASE_HANDLE_CACHING) + return SMB2_OPLOCK_LEVEL_BATCH; + else + return SMB2_OPLOCK_LEVEL_EXCLUSIVE; + } else if (lease_state & SMB2_LEASE_READ_CACHING) + return SMB2_OPLOCK_LEVEL_II; + return 0; +} + +struct smb2_lease_break_work { + struct work_struct lease_break; + struct tcon_link *tlink; + __u8 lease_key[16]; + __le32 lease_state; +}; + +static void +cifs_ses_oplock_break(struct work_struct *work) +{ + struct smb2_lease_break_work *lw = container_of(work, + struct smb2_lease_break_work, lease_break); + int rc; + + rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key, + lw->lease_state); + cFYI(1, "Lease release rc %d", rc); + cifs_put_tlink(lw->tlink); + kfree(lw); +} + +static bool +smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) +{ + struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; + struct list_head *tmp, *tmp1, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct cifsInodeInfo *cinode; + struct cifsFileInfo *cfile; + struct cifs_pending_open *open; + struct smb2_lease_break_work *lw; + bool found; + int ack_req = le32_to_cpu(rsp->Flags & + SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); + + lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); + if (!lw) { + cERROR(1, "Memory allocation failed during lease break check"); + return false; + } + + INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); + lw->lease_state = rsp->NewLeaseState; + + cFYI(1, "Checking for lease break"); + + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + + spin_lock(&cifs_file_list_lock); + list_for_each(tmp1, &ses->tcon_list) { + tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + + cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); + list_for_each(tmp2, &tcon->openFileList) { + cfile = list_entry(tmp2, struct cifsFileInfo, + tlist); + cinode = CIFS_I(cfile->dentry->d_inode); + + if (memcmp(cinode->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + cFYI(1, "found in the open list"); + cFYI(1, "lease key match, lease break 0x%d", + le32_to_cpu(rsp->NewLeaseState)); + + smb2_set_oplock_level(cinode, + smb2_map_lease_to_oplock(rsp->NewLeaseState)); + + if (ack_req) + cfile->oplock_break_cancelled = false; + else + cfile->oplock_break_cancelled = true; + + queue_work(cifsiod_wq, &cfile->oplock_break); + + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } + + found = false; + list_for_each_entry(open, &tcon->pending_opens, olist) { + if (memcmp(open->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + if (!found && ack_req) { + found = true; + memcpy(lw->lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + lw->tlink = cifs_get_tlink(open->tlink); + queue_work(cifsiod_wq, + &lw->lease_break); + } + + cFYI(1, "found in the pending open list"); + cFYI(1, "lease key match, lease break 0x%d", + le32_to_cpu(rsp->NewLeaseState)); + + open->oplock = + smb2_map_lease_to_oplock(rsp->NewLeaseState); + } + if (found) { + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } + } + spin_unlock(&cifs_file_list_lock); + } + spin_unlock(&cifs_tcp_ses_lock); + kfree(lw); + cFYI(1, "Can not process lease break - no lease matched"); + return false; +} + +bool +smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) +{ + struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer; + struct list_head *tmp, *tmp1, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct cifsInodeInfo *cinode; + struct cifsFileInfo *cfile; + + cFYI(1, "Checking for oplock break"); + + if (rsp->hdr.Command != SMB2_OPLOCK_BREAK) + return false; + + if (rsp->StructureSize != + smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { + if (le16_to_cpu(rsp->StructureSize) == 44) + return smb2_is_valid_lease_break(buffer, server); + else + return false; + } + + cFYI(1, "oplock level 0x%d", rsp->OplockLevel); + + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp1, &ses->tcon_list) { + tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + + cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); + spin_lock(&cifs_file_list_lock); + list_for_each(tmp2, &tcon->openFileList) { + cfile = list_entry(tmp2, struct cifsFileInfo, + tlist); + if (rsp->PersistentFid != + cfile->fid.persistent_fid || + rsp->VolatileFid != + cfile->fid.volatile_fid) + continue; + + cFYI(1, "file id match, oplock break"); + cinode = CIFS_I(cfile->dentry->d_inode); + + if (!cinode->clientCanCacheAll && + rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE) + cfile->oplock_break_cancelled = true; + else + cfile->oplock_break_cancelled = false; + + smb2_set_oplock_level(cinode, + rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0); + + queue_work(cifsiod_wq, &cfile->oplock_break); + + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + cFYI(1, "No matching file for oplock break"); + return true; + } + } + spin_unlock(&cifs_tcp_ses_lock); + cFYI(1, "Can not process oplock break for non-existent connection"); + return false; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 826209bf3684..4d9dbe0b7385 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -17,11 +17,15 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/pagemap.h> +#include <linux/vfs.h> #include "cifsglob.h" #include "smb2pdu.h" #include "smb2proto.h" #include "cifsproto.h" #include "cifs_debug.h" +#include "smb2status.h" +#include "smb2glob.h" static int change_conf(struct TCP_Server_Info *server) @@ -63,6 +67,17 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, server->in_flight--; if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP) rc = change_conf(server); + /* + * Sometimes server returns 0 credits on oplock break ack - we need to + * rebalance credits in this case. + */ + else if (server->in_flight > 0 && server->oplock_credits == 0 && + server->oplocks) { + if (server->credits > 1) { + server->credits--; + server->oplock_credits++; + } + } spin_unlock(&server->req_lock); wake_up(&server->request_q); if (rc) @@ -157,6 +172,42 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses) return rc; } +static unsigned int +smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +{ + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int wsize; + + /* start with specified wsize, or default */ + wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; + wsize = min_t(unsigned int, wsize, server->max_write); + /* + * limit write size to 2 ** 16, because we don't support multicredit + * requests now. + */ + wsize = min_t(unsigned int, wsize, 2 << 15); + + return wsize; +} + +static unsigned int +smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) +{ + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int rsize; + + /* start with specified rsize, or default */ + rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; + rsize = min_t(unsigned int, rsize, server->max_read); + /* + * limit write size to 2 ** 16, because we don't support multicredit + * requests now. + */ + rsize = min_t(unsigned int, rsize, 2 << 15); + + return rsize; +} + static int smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path) @@ -164,13 +215,14 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, int rc; __u64 persistent_fid, volatile_fid; __le16 *utf16_path; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); if (!utf16_path) return -ENOMEM; rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0); + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); if (rc) { kfree(utf16_path); return rc; @@ -190,6 +242,26 @@ smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon, return 0; } +static int +smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid, FILE_ALL_INFO *data) +{ + int rc; + struct smb2_file_all_info *smb2_data; + + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + GFP_KERNEL); + if (smb2_data == NULL) + return -ENOMEM; + + rc = SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid, + smb2_data); + if (!rc) + move_smb2_info_to_cifs(data, smb2_data); + kfree(smb2_data); + return rc; +} + static char * smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon) @@ -292,7 +364,221 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) #endif } +static void +smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) +{ + struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + cfile->fid.persistent_fid = fid->persistent_fid; + cfile->fid.volatile_fid = fid->volatile_fid; + smb2_set_oplock_level(cinode, oplock); + cinode->can_cache_brlcks = cinode->clientCanCacheAll; +} + +static void +smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); +} + +static int +smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return SMB2_flush(xid, tcon, fid->persistent_fid, fid->volatile_fid); +} + +static unsigned int +smb2_read_data_offset(char *buf) +{ + struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; + return rsp->DataOffset; +} + +static unsigned int +smb2_read_data_length(char *buf) +{ + struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; + return le32_to_cpu(rsp->DataLength); +} + + +static int +smb2_sync_read(const unsigned int xid, struct cifsFileInfo *cfile, + struct cifs_io_parms *parms, unsigned int *bytes_read, + char **buf, int *buf_type) +{ + parms->persistent_fid = cfile->fid.persistent_fid; + parms->volatile_fid = cfile->fid.volatile_fid; + return SMB2_read(xid, parms, bytes_read, buf, buf_type); +} + +static int +smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, + struct cifs_io_parms *parms, unsigned int *written, + struct kvec *iov, unsigned long nr_segs) +{ + + parms->persistent_fid = cfile->fid.persistent_fid; + parms->volatile_fid = cfile->fid.volatile_fid; + return SMB2_write(xid, parms, written, iov, nr_segs); +} + +static int +smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *cfile, __u64 size, bool set_alloc) +{ + __le64 eof = cpu_to_le64(size); + return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, cfile->pid, &eof); +} + +static int +smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, + const char *path, struct cifs_sb_info *cifs_sb, + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) +{ + __le16 *utf16_path; + int rc; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + __u64 persistent_fid, volatile_fid; + + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + + rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, + FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0, + &oplock, NULL); + kfree(utf16_path); + if (rc) { + cERROR(1, "open dir failed"); + return rc; + } + + srch_inf->entries_in_buffer = 0; + srch_inf->index_of_last_entry = 0; + fid->persistent_fid = persistent_fid; + fid->volatile_fid = volatile_fid; + + rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0, + srch_inf); + if (rc) { + cERROR(1, "query directory failed"); + SMB2_close(xid, tcon, persistent_fid, volatile_fid); + } + return rc; +} + +static int +smb2_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid, __u16 search_flags, + struct cifs_search_info *srch_inf) +{ + return SMB2_query_directory(xid, tcon, fid->persistent_fid, + fid->volatile_fid, 0, srch_inf); +} + +static int +smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) +{ + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); +} + +/* +* If we negotiate SMB2 protocol and get STATUS_PENDING - update +* the number of credits and return true. Otherwise - return false. +*/ +static bool +smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length) +{ + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; + + if (hdr->Status != STATUS_PENDING) + return false; + + if (!length) { + spin_lock(&server->req_lock); + server->credits += le16_to_cpu(hdr->CreditRequest); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + } + + return true; +} + +static int +smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, + struct cifsInodeInfo *cinode) +{ + if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) + return SMB2_lease_break(0, tcon, cinode->lease_key, + smb2_get_lease_state(cinode)); + + return SMB2_oplock_break(0, tcon, fid->persistent_fid, + fid->volatile_fid, + cinode->clientCanCacheRead ? 1 : 0); +} + +static int +smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, + struct kstatfs *buf) +{ + int rc; + u64 persistent_fid, volatile_fid; + __le16 srch_path = 0; /* Null - open root of share */ + u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + + rc = SMB2_open(xid, tcon, &srch_path, &persistent_fid, &volatile_fid, + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); + if (rc) + return rc; + buf->f_type = SMB2_MAGIC_NUMBER; + rc = SMB2_QFS_info(xid, tcon, persistent_fid, volatile_fid, buf); + SMB2_close(xid, tcon, persistent_fid, volatile_fid); + return rc; +} + +static bool +smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) +{ + return ob1->fid.persistent_fid == ob2->fid.persistent_fid && + ob1->fid.volatile_fid == ob2->fid.volatile_fid; +} + +static int +smb2_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, + __u64 length, __u32 type, int lock, int unlock, bool wait) +{ + if (unlock && !lock) + type = SMB2_LOCKFLAG_UNLOCK; + return SMB2_lock(xid, tlink_tcon(cfile->tlink), + cfile->fid.persistent_fid, cfile->fid.volatile_fid, + current->tgid, length, offset, type, wait); +} + +static void +smb2_get_lease_key(struct inode *inode, struct cifs_fid *fid) +{ + memcpy(fid->lease_key, CIFS_I(inode)->lease_key, SMB2_LEASE_KEY_SIZE); +} + +static void +smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid) +{ + memcpy(CIFS_I(inode)->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE); +} + +static void +smb2_new_lease_key(struct cifs_fid *fid) +{ + get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE); +} + struct smb_version_operations smb21_operations = { + .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, .check_receive = smb2_check_receive, @@ -301,13 +587,19 @@ struct smb_version_operations smb21_operations = { .get_credits_field = smb2_get_credits_field, .get_credits = smb2_get_credits, .get_next_mid = smb2_get_next_mid, + .read_data_offset = smb2_read_data_offset, + .read_data_length = smb2_read_data_length, + .map_error = map_smb2_to_linux_error, .find_mid = smb2_find_mid, .check_message = smb2_check_message, .dump_detail = smb2_dump_detail, .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, + .is_oplock_break = smb2_is_valid_oplock_break, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, + .negotiate_wsize = smb2_negotiate_wsize, + .negotiate_rsize = smb2_negotiate_rsize, .sess_setup = SMB2_sess_setup, .logoff = SMB2_logoff, .tree_connect = SMB2_tcon, @@ -317,16 +609,68 @@ struct smb_version_operations smb21_operations = { .echo = SMB2_echo, .query_path_info = smb2_query_path_info, .get_srv_inum = smb2_get_srv_inum, + .query_file_info = smb2_query_file_info, + .set_path_size = smb2_set_path_size, + .set_file_size = smb2_set_file_size, + .set_file_info = smb2_set_file_info, .build_path_to_root = smb2_build_path_to_root, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, .rmdir = smb2_rmdir, + .unlink = smb2_unlink, + .rename = smb2_rename_path, + .create_hardlink = smb2_create_hardlink, + .open = smb2_open_file, + .set_fid = smb2_set_fid, + .close = smb2_close_file, + .flush = smb2_flush_file, + .async_readv = smb2_async_readv, + .async_writev = smb2_async_writev, + .sync_read = smb2_sync_read, + .sync_write = smb2_sync_write, + .query_dir_first = smb2_query_dir_first, + .query_dir_next = smb2_query_dir_next, + .close_dir = smb2_close_dir, + .calc_smb_size = smb2_calc_size, + .is_status_pending = smb2_is_status_pending, + .oplock_response = smb2_oplock_response, + .queryfs = smb2_queryfs, + .mand_lock = smb2_mand_lock, + .mand_unlock_range = smb2_unlock_range, + .push_mand_locks = smb2_push_mandatory_locks, + .get_lease_key = smb2_get_lease_key, + .set_lease_key = smb2_set_lease_key, + .new_lease_key = smb2_new_lease_key, }; struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, + .protocol_id = SMB21_PROT_ID, + .req_capabilities = 0, /* MBZ on negotiate req until SMB3 dialect */ + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, + .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, +}; + +struct smb_version_values smb30_values = { + .version_string = SMB30_VERSION_STRING, + .protocol_id = SMB30_PROT_ID, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, .header_size = sizeof(struct smb2_hdr), .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, .lock_cmd = SMB2_LOCK, .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 62b3f17d0613..cf33622cdac8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1,7 +1,7 @@ /* * fs/cifs/smb2pdu.c * - * Copyright (C) International Business Machines Corp., 2009, 2011 + * Copyright (C) International Business Machines Corp., 2009, 2012 * Etersoft, 2012 * Author(s): Steve French (sfrench@us.ibm.com) * Pavel Shilovsky (pshilovsky@samba.org) 2012 @@ -31,7 +31,9 @@ #include <linux/fs.h> #include <linux/kernel.h> #include <linux/vfs.h> +#include <linux/task_io_accounting_ops.h> #include <linux/uaccess.h> +#include <linux/pagemap.h> #include <linux/xattr.h> #include "smb2pdu.h" #include "cifsglob.h" @@ -42,6 +44,8 @@ #include "cifs_debug.h" #include "ntlmssp.h" #include "smb2status.h" +#include "smb2glob.h" +#include "cifspdu.h" /* * The following table defines the expected "StructureSize" of SMB2 requests @@ -115,9 +119,9 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , /* BB how does SMB2 do case sensitive? */ /* if (tcon->nocase) hdr->Flags |= SMBFLG_CASELESS; */ - /* if (tcon->ses && tcon->ses->server && + if (tcon->ses && tcon->ses->server && (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) - hdr->Flags |= SMB2_FLAGS_SIGNED; */ + hdr->Flags |= SMB2_FLAGS_SIGNED; out: pdu->StructureSize2 = cpu_to_le16(parmsize); return; @@ -300,24 +304,6 @@ free_rsp_buf(int resp_buftype, void *rsp) cifs_buf_release(rsp); } -#define SMB2_NUM_PROT 1 - -#define SMB2_PROT 0 -#define SMB21_PROT 1 -#define BAD_PROT 0xFFFF - -#define SMB2_PROT_ID 0x0202 -#define SMB21_PROT_ID 0x0210 -#define BAD_PROT_ID 0xFFFF - -static struct { - int index; - __le16 name; -} smb2protocols[] = { - {SMB2_PROT, cpu_to_le16(SMB2_PROT_ID)}, - {SMB21_PROT, cpu_to_le16(SMB21_PROT_ID)}, - {BAD_PROT, cpu_to_le16(BAD_PROT_ID)} -}; /* * @@ -344,7 +330,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) int resp_buftype; struct TCP_Server_Info *server; unsigned int sec_flags; - u16 i; u16 temp = 0; int blob_offset, blob_length; char *security_blob; @@ -373,11 +358,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->hdr.SessionId = 0; - for (i = 0; i < SMB2_NUM_PROT; i++) - req->Dialects[i] = smb2protocols[i].name; + req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); - req->DialectCount = cpu_to_le16(i); - inc_rfc1001_len(req, i * 2); + req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */ + inc_rfc1001_len(req, 2); /* only one of SMB2 signing flags may be set in SMB2 request */ if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) @@ -387,7 +371,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->SecurityMode = cpu_to_le16(temp); - req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS); + req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); + + memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ @@ -403,17 +389,16 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) if (rc != 0) goto neg_exit; - if (rsp == NULL) { - rc = -EIO; - goto neg_exit; - } - cFYI(1, "mode 0x%x", rsp->SecurityMode); - if (rsp->DialectRevision == smb2protocols[SMB21_PROT].name) + /* BB we may eventually want to match the negotiated vs. requested + dialect, even though we are only requesting one at a time */ + if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) + cFYI(1, "negotiated smb2.0 dialect"); + else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) cFYI(1, "negotiated smb2.1 dialect"); - else if (rsp->DialectRevision == smb2protocols[SMB2_PROT].name) - cFYI(1, "negotiated smb2 dialect"); + else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) + cFYI(1, "negotiated smb3.0 dialect"); else { cERROR(1, "Illegal dialect returned by server %d", le16_to_cpu(rsp->DialectRevision)); @@ -438,6 +423,38 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) rc = -EIO; goto neg_exit; } + + cFYI(1, "sec_flags 0x%x", sec_flags); + if (sec_flags & CIFSSEC_MUST_SIGN) { + cFYI(1, "Signing required"); + if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | + SMB2_NEGOTIATE_SIGNING_ENABLED))) { + cERROR(1, "signing required but server lacks support"); + rc = -EOPNOTSUPP; + goto neg_exit; + } + server->sec_mode |= SECMODE_SIGN_REQUIRED; + } else if (sec_flags & CIFSSEC_MAY_SIGN) { + cFYI(1, "Signing optional"); + if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { + cFYI(1, "Server requires signing"); + server->sec_mode |= SECMODE_SIGN_REQUIRED; + } else { + server->sec_mode &= + ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); + } + } else { + cFYI(1, "Signing disabled"); + if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { + cERROR(1, "Server requires packet signing to be enabled" + " in /proc/fs/cifs/SecurityFlags."); + rc = -EOPNOTSUPP; + goto neg_exit; + } + server->sec_mode &= + ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); + } + #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ rc = decode_neg_token_init(security_blob, blob_length, &server->sec_type); @@ -599,13 +616,14 @@ ssetup_ntlmssp_authenticate: kfree(security_blob); rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; - if (rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { + if (resp_buftype != CIFS_NO_BUFFER && + rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { if (phase != NtLmNegotiate) { cERROR(1, "Unexpected more processing error"); goto ssetup_exit; } if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 != - le16_to_cpu(rsp->SecurityBufferOffset)) { + le16_to_cpu(rsp->SecurityBufferOffset)) { cERROR(1, "Invalid security buffer offset %d", le16_to_cpu(rsp->SecurityBufferOffset)); rc = -EIO; @@ -631,11 +649,6 @@ ssetup_ntlmssp_authenticate: if (rc != 0) goto ssetup_exit; - if (rsp == NULL) { - rc = -EIO; - goto ssetup_exit; - } - ses->session_flags = le16_to_cpu(rsp->SessionFlags); ssetup_exit: free_rsp_buf(resp_buftype, rsp); @@ -666,6 +679,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) /* since no tcon, smb2_init can not do this, so do here */ req->hdr.SessionId = ses->Suid; + if (server->sec_mode & SECMODE_SIGN_REQUIRED) + req->hdr.Flags |= SMB2_FLAGS_SIGNED; rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); /* @@ -753,11 +768,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, goto tcon_error_exit; } - if (rsp == NULL) { - rc = -EIO; - goto tcon_exit; - } - if (tcon == NULL) { ses->ipc_tid = rsp->hdr.TreeId; goto tcon_exit; @@ -830,18 +840,87 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) return rc; } +static struct create_lease * +create_lease_buf(u8 *lease_key, u8 oplock) +{ + struct create_lease *buf; + + buf = kmalloc(sizeof(struct create_lease), GFP_KERNEL); + if (!buf) + return NULL; + + memset(buf, 0, sizeof(struct create_lease)); + + buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); + buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) + buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING | + SMB2_LEASE_READ_CACHING; + else if (oplock == SMB2_OPLOCK_LEVEL_II) + buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING; + else if (oplock == SMB2_OPLOCK_LEVEL_BATCH) + buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING | + SMB2_LEASE_READ_CACHING | + SMB2_LEASE_WRITE_CACHING; + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_lease, lcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_lease, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + buf->Name[0] = 'R'; + buf->Name[1] = 'q'; + buf->Name[2] = 'L'; + buf->Name[3] = 's'; + return buf; +} + +static __u8 +parse_lease_state(struct smb2_create_rsp *rsp) +{ + char *data_offset; + struct create_lease *lc; + bool found = false; + + data_offset = (char *)rsp; + data_offset += 4 + le32_to_cpu(rsp->CreateContextsOffset); + lc = (struct create_lease *)data_offset; + do { + char *name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; + if (le16_to_cpu(lc->ccontext.NameLength) != 4 || + strncmp(name, "RqLs", 4)) { + lc = (struct create_lease *)((char *)lc + + le32_to_cpu(lc->ccontext.Next)); + continue; + } + if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) + return SMB2_OPLOCK_LEVEL_NOCHANGE; + found = true; + break; + } while (le32_to_cpu(lc->ccontext.Next) != 0); + + if (!found) + return 0; + + return smb2_map_lease_to_oplock(lc->lcontext.LeaseState); +} + int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, - __u32 create_disposition, __u32 file_attributes, __u32 create_options) + __u32 create_disposition, __u32 file_attributes, __u32 create_options, + __u8 *oplock, struct smb2_file_all_info *buf) { struct smb2_create_req *req; struct smb2_create_rsp *rsp; struct TCP_Server_Info *server; struct cifs_ses *ses = tcon->ses; - struct kvec iov[2]; + struct kvec iov[3]; int resp_buftype; int uni_path_len; + __le16 *copy_path = NULL; + int copy_size; int rc = 0; int num_iovecs = 2; @@ -856,10 +935,6 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, if (rc) return rc; - if (enable_oplocks) - req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_BATCH; - else - req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE; req->ImpersonationLevel = IL_IMPERSONATION; req->DesiredAccess = cpu_to_le32(desired_access); /* File attributes ignored on open (used in create though) */ @@ -869,7 +944,7 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, req->CreateOptions = cpu_to_le32(create_options); uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - - 1 /* pad */ - 4 /* do not count rfc1001 len field */); + - 8 /* pad */ - 4 /* do not count rfc1001 len field */); iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ @@ -880,6 +955,20 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, req->NameLength = cpu_to_le16(uni_path_len - 2); /* -1 since last byte is buf[0] which is sent below (path) */ iov[0].iov_len--; + if (uni_path_len % 8 != 0) { + copy_size = uni_path_len / 8 * 8; + if (copy_size < uni_path_len) + copy_size += 8; + + copy_path = kzalloc(copy_size, GFP_KERNEL); + if (!copy_path) + return -ENOMEM; + memcpy((char *)copy_path, (const char *)path, + uni_path_len); + uni_path_len = copy_size; + path = copy_path; + } + iov[1].iov_len = uni_path_len; iov[1].iov_base = path; /* @@ -888,10 +977,37 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, */ inc_rfc1001_len(req, uni_path_len - 1); } else { + iov[0].iov_len += 7; + req->hdr.smb2_buf_length = cpu_to_be32(be32_to_cpu( + req->hdr.smb2_buf_length) + 8 - 1); num_iovecs = 1; req->NameLength = 0; } + if (!server->oplocks) + *oplock = SMB2_OPLOCK_LEVEL_NONE; + + if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) || + *oplock == SMB2_OPLOCK_LEVEL_NONE) + req->RequestedOplockLevel = *oplock; + else { + iov[num_iovecs].iov_base = create_lease_buf(oplock+1, *oplock); + if (iov[num_iovecs].iov_base == NULL) { + cifs_small_buf_release(req); + kfree(copy_path); + return -ENOMEM; + } + iov[num_iovecs].iov_len = sizeof(struct create_lease); + req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; + req->CreateContextsOffset = cpu_to_le32( + sizeof(struct smb2_create_req) - 4 - 8 + + iov[num_iovecs-1].iov_len); + req->CreateContextsLength = cpu_to_le32( + sizeof(struct create_lease)); + inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); + num_iovecs++; + } + rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); rsp = (struct smb2_create_rsp *)iov[0].iov_base; @@ -900,13 +1016,24 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, goto creat_exit; } - if (rsp == NULL) { - rc = -EIO; - goto creat_exit; - } *persistent_fid = rsp->PersistentFileId; *volatile_fid = rsp->VolatileFileId; + + if (buf) { + memcpy(buf, &rsp->CreationTime, 32); + buf->AllocationSize = rsp->AllocationSize; + buf->EndOfFile = rsp->EndofFile; + buf->Attributes = rsp->FileAttributes; + buf->NumberOfLinks = cpu_to_le32(1); + buf->DeletePending = 0; + } + + if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) + *oplock = parse_lease_state(rsp); + else + *oplock = rsp->OplockLevel; creat_exit: + kfree(copy_path); free_rsp_buf(resp_buftype, rsp); return rc; } @@ -950,11 +1077,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, goto close_exit; } - if (rsp == NULL) { - rc = -EIO; - goto close_exit; - } - /* BB FIXME - decode close response, update inode for caching */ close_exit: @@ -1019,10 +1141,10 @@ validate_and_copy_buf(unsigned int offset, unsigned int buffer_length, return 0; } -int -SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb2_file_all_info *data) +static int +query_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, u8 info_class, + size_t output_len, size_t min_len, void *data) { struct smb2_query_info_req *req; struct smb2_query_info_rsp *rsp = NULL; @@ -1044,37 +1166,56 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, return rc; req->InfoType = SMB2_O_INFO_FILE; - req->FileInfoClass = FILE_ALL_INFORMATION; + req->FileInfoClass = info_class; req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; /* 4 for rfc1002 length field and 1 for Buffer */ req->InputBufferOffset = cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); - req->OutputBufferLength = - cpu_to_le32(sizeof(struct smb2_file_all_info) + MAX_NAME * 2); + req->OutputBufferLength = cpu_to_le32(output_len); iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ iov[0].iov_len = get_rfc1002_length(req) + 4; rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); + rsp = (struct smb2_query_info_rsp *)iov[0].iov_base; + if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); goto qinf_exit; } - rsp = (struct smb2_query_info_rsp *)iov[0].iov_base; - rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset), le32_to_cpu(rsp->OutputBufferLength), - &rsp->hdr, sizeof(struct smb2_file_all_info), - (char *)data); + &rsp->hdr, min_len, data); qinf_exit: free_rsp_buf(resp_buftype, rsp); return rc; } +int +SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + struct smb2_file_all_info *data) +{ + return query_info(xid, tcon, persistent_fid, volatile_fid, + FILE_ALL_INFORMATION, + sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + sizeof(struct smb2_file_all_info), data); +} + +int +SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid) +{ + return query_info(xid, tcon, persistent_fid, volatile_fid, + FILE_INTERNAL_INFORMATION, + sizeof(struct smb2_file_internal_info), + sizeof(struct smb2_file_internal_info), uniqueid); +} + /* * This is a no-op for now. We're not really interested in the reply, but * rather in the fact that the server sent one and that server->lstrp @@ -1102,6 +1243,8 @@ SMB2_echo(struct TCP_Server_Info *server) struct smb2_echo_req *req; int rc = 0; struct kvec iov; + struct smb_rqst rqst = { .rq_iov = &iov, + .rq_nvec = 1 }; cFYI(1, "In echo request"); @@ -1115,7 +1258,7 @@ SMB2_echo(struct TCP_Server_Info *server) /* 4 for rfc1002 length field */ iov.iov_len = get_rfc1002_length(req) + 4; - rc = cifs_call_async(server, &iov, 1, NULL, smb2_echo_callback, server, + rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, server, CIFS_ECHO_OP); if (rc) cFYI(1, "Echo request failed: %d", rc); @@ -1123,3 +1266,945 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_small_buf_release(req); return rc; } + +int +SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, + u64 volatile_fid) +{ + struct smb2_flush_req *req; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + struct kvec iov[1]; + int resp_buftype; + int rc = 0; + + cFYI(1, "Flush"); + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req); + if (rc) + return rc; + + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); + + if ((rc != 0) && tcon) + cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); + + free_rsp_buf(resp_buftype, iov[0].iov_base); + return rc; +} + +/* + * To form a chain of read requests, any read requests after the first should + * have the end_of_chain boolean set to true. + */ +static int +smb2_new_read_req(struct kvec *iov, struct cifs_io_parms *io_parms, + unsigned int remaining_bytes, int request_type) +{ + int rc = -EACCES; + struct smb2_read_req *req = NULL; + + rc = small_smb2_init(SMB2_READ, io_parms->tcon, (void **) &req); + if (rc) + return rc; + if (io_parms->tcon->ses->server == NULL) + return -ECONNABORTED; + + req->hdr.ProcessId = cpu_to_le32(io_parms->pid); + + req->PersistentFileId = io_parms->persistent_fid; + req->VolatileFileId = io_parms->volatile_fid; + req->ReadChannelInfoOffset = 0; /* reserved */ + req->ReadChannelInfoLength = 0; /* reserved */ + req->Channel = 0; /* reserved */ + req->MinimumCount = 0; + req->Length = cpu_to_le32(io_parms->length); + req->Offset = cpu_to_le64(io_parms->offset); + + if (request_type & CHAINED_REQUEST) { + if (!(request_type & END_OF_CHAIN)) { + /* 4 for rfc1002 length field */ + req->hdr.NextCommand = + cpu_to_le32(get_rfc1002_length(req) + 4); + } else /* END_OF_CHAIN */ + req->hdr.NextCommand = 0; + if (request_type & RELATED_REQUEST) { + req->hdr.Flags |= SMB2_FLAGS_RELATED_OPERATIONS; + /* + * Related requests use info from previous read request + * in chain. + */ + req->hdr.SessionId = 0xFFFFFFFF; + req->hdr.TreeId = 0xFFFFFFFF; + req->PersistentFileId = 0xFFFFFFFF; + req->VolatileFileId = 0xFFFFFFFF; + } + } + if (remaining_bytes > io_parms->length) + req->RemainingBytes = cpu_to_le32(remaining_bytes); + else + req->RemainingBytes = 0; + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + return rc; +} + +static void +smb2_readv_callback(struct mid_q_entry *mid) +{ + struct cifs_readdata *rdata = mid->callback_data; + struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; + struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov.iov_base; + unsigned int credits_received = 1; + struct smb_rqst rqst = { .rq_iov = &rdata->iov, + .rq_nvec = 1, + .rq_pages = rdata->pages, + .rq_npages = rdata->nr_pages, + .rq_pagesz = rdata->pagesz, + .rq_tailsz = rdata->tailsz }; + + cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, + mid->mid, mid->mid_state, rdata->result, rdata->bytes); + + switch (mid->mid_state) { + case MID_RESPONSE_RECEIVED: + credits_received = le16_to_cpu(buf->CreditRequest); + /* result already set, check signature */ + if (server->sec_mode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + int rc; + + rc = smb2_verify_signature(&rqst, server); + if (rc) + cERROR(1, "SMB signature verification returned " + "error = %d", rc); + } + /* FIXME: should this be counted toward the initiating task? */ + task_io_account_read(rdata->bytes); + cifs_stats_bytes_read(tcon, rdata->bytes); + break; + case MID_REQUEST_SUBMITTED: + case MID_RETRY_NEEDED: + rdata->result = -EAGAIN; + break; + default: + if (rdata->result != -ENODATA) + rdata->result = -EIO; + } + + if (rdata->result) + cifs_stats_fail_inc(tcon, SMB2_READ_HE); + + queue_work(cifsiod_wq, &rdata->work); + DeleteMidQEntry(mid); + add_credits(server, credits_received, 0); +} + +/* smb2_async_readv - send an async write, and set up mid to handle result */ +int +smb2_async_readv(struct cifs_readdata *rdata) +{ + int rc; + struct smb2_hdr *buf; + struct cifs_io_parms io_parms; + struct smb_rqst rqst = { .rq_iov = &rdata->iov, + .rq_nvec = 1 }; + + cFYI(1, "%s: offset=%llu bytes=%u", __func__, + rdata->offset, rdata->bytes); + + io_parms.tcon = tlink_tcon(rdata->cfile->tlink); + io_parms.offset = rdata->offset; + io_parms.length = rdata->bytes; + io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; + io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; + io_parms.pid = rdata->pid; + rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0); + if (rc) + return rc; + + buf = (struct smb2_hdr *)rdata->iov.iov_base; + /* 4 for rfc1002 length field */ + rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4; + + kref_get(&rdata->refcount); + rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, + cifs_readv_receive, smb2_readv_callback, + rdata, 0); + if (rc) { + kref_put(&rdata->refcount, cifs_readdata_release); + cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); + } + + cifs_small_buf_release(buf); + return rc; +} + +int +SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, char **buf, int *buf_type) +{ + int resp_buftype, rc = -EACCES; + struct smb2_read_rsp *rsp = NULL; + struct kvec iov[1]; + + *nbytes = 0; + rc = smb2_new_read_req(iov, io_parms, 0, 0); + if (rc) + return rc; + + rc = SendReceive2(xid, io_parms->tcon->ses, iov, 1, + &resp_buftype, CIFS_LOG_ERROR); + + rsp = (struct smb2_read_rsp *)iov[0].iov_base; + + if (rsp->hdr.Status == STATUS_END_OF_FILE) { + free_rsp_buf(resp_buftype, iov[0].iov_base); + return 0; + } + + if (rc) { + cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); + cERROR(1, "Send error in read = %d", rc); + } else { + *nbytes = le32_to_cpu(rsp->DataLength); + if ((*nbytes > CIFS_MAX_MSGSIZE) || + (*nbytes > io_parms->length)) { + cFYI(1, "bad length %d for count %d", *nbytes, + io_parms->length); + rc = -EIO; + *nbytes = 0; + } + } + + if (*buf) { + memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset, + *nbytes); + free_rsp_buf(resp_buftype, iov[0].iov_base); + } else if (resp_buftype != CIFS_NO_BUFFER) { + *buf = iov[0].iov_base; + if (resp_buftype == CIFS_SMALL_BUFFER) + *buf_type = CIFS_SMALL_BUFFER; + else if (resp_buftype == CIFS_LARGE_BUFFER) + *buf_type = CIFS_LARGE_BUFFER; + } + return rc; +} + +/* + * Check the mid_state and signature on received buffer (if any), and queue the + * workqueue completion task. + */ +static void +smb2_writev_callback(struct mid_q_entry *mid) +{ + struct cifs_writedata *wdata = mid->callback_data; + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); + unsigned int written; + struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; + unsigned int credits_received = 1; + + switch (mid->mid_state) { + case MID_RESPONSE_RECEIVED: + credits_received = le16_to_cpu(rsp->hdr.CreditRequest); + wdata->result = smb2_check_receive(mid, tcon->ses->server, 0); + if (wdata->result != 0) + break; + + written = le32_to_cpu(rsp->DataLength); + /* + * Mask off high 16 bits when bytes written as returned + * by the server is greater than bytes requested by the + * client. OS/2 servers are known to set incorrect + * CountHigh values. + */ + if (written > wdata->bytes) + written &= 0xFFFF; + + if (written < wdata->bytes) + wdata->result = -ENOSPC; + else + wdata->bytes = written; + break; + case MID_REQUEST_SUBMITTED: + case MID_RETRY_NEEDED: + wdata->result = -EAGAIN; + break; + default: + wdata->result = -EIO; + break; + } + + if (wdata->result) + cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); + + queue_work(cifsiod_wq, &wdata->work); + DeleteMidQEntry(mid); + add_credits(tcon->ses->server, credits_received, 0); +} + +/* smb2_async_writev - send an async write, and set up mid to handle result */ +int +smb2_async_writev(struct cifs_writedata *wdata) +{ + int rc = -EACCES; + struct smb2_write_req *req = NULL; + struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); + struct kvec iov; + struct smb_rqst rqst; + + rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); + if (rc) + goto async_writev_out; + + req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); + + req->PersistentFileId = wdata->cfile->fid.persistent_fid; + req->VolatileFileId = wdata->cfile->fid.volatile_fid; + req->WriteChannelInfoOffset = 0; + req->WriteChannelInfoLength = 0; + req->Channel = 0; + req->Offset = cpu_to_le64(wdata->offset); + /* 4 for rfc1002 length field */ + req->DataOffset = cpu_to_le16( + offsetof(struct smb2_write_req, Buffer) - 4); + req->RemainingBytes = 0; + + /* 4 for rfc1002 length field and 1 for Buffer */ + iov.iov_len = get_rfc1002_length(req) + 4 - 1; + iov.iov_base = req; + + rqst.rq_iov = &iov; + rqst.rq_nvec = 1; + rqst.rq_pages = wdata->pages; + rqst.rq_npages = wdata->nr_pages; + rqst.rq_pagesz = wdata->pagesz; + rqst.rq_tailsz = wdata->tailsz; + + cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); + + req->Length = cpu_to_le32(wdata->bytes); + + inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); + + kref_get(&wdata->refcount); + rc = cifs_call_async(tcon->ses->server, &rqst, NULL, + smb2_writev_callback, wdata, 0); + + if (rc) { + kref_put(&wdata->refcount, cifs_writedata_release); + cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); + } + +async_writev_out: + cifs_small_buf_release(req); + return rc; +} + +/* + * SMB2_write function gets iov pointer to kvec array with n_vec as a length. + * The length field from io_parms must be at least 1 and indicates a number of + * elements with data to write that begins with position 1 in iov array. All + * data length is specified by count. + */ +int +SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, struct kvec *iov, int n_vec) +{ + int rc = 0; + struct smb2_write_req *req = NULL; + struct smb2_write_rsp *rsp = NULL; + int resp_buftype; + *nbytes = 0; + + if (n_vec < 1) + return rc; + + rc = small_smb2_init(SMB2_WRITE, io_parms->tcon, (void **) &req); + if (rc) + return rc; + + if (io_parms->tcon->ses->server == NULL) + return -ECONNABORTED; + + req->hdr.ProcessId = cpu_to_le32(io_parms->pid); + + req->PersistentFileId = io_parms->persistent_fid; + req->VolatileFileId = io_parms->volatile_fid; + req->WriteChannelInfoOffset = 0; + req->WriteChannelInfoLength = 0; + req->Channel = 0; + req->Length = cpu_to_le32(io_parms->length); + req->Offset = cpu_to_le64(io_parms->offset); + /* 4 for rfc1002 length field */ + req->DataOffset = cpu_to_le16( + offsetof(struct smb2_write_req, Buffer) - 4); + req->RemainingBytes = 0; + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field and 1 for Buffer */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + + /* length of entire message including data to be written */ + inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */); + + rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1, + &resp_buftype, 0); + rsp = (struct smb2_write_rsp *)iov[0].iov_base; + + if (rc) { + cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE); + cERROR(1, "Send error in write = %d", rc); + } else + *nbytes = le32_to_cpu(rsp->DataLength); + + free_rsp_buf(resp_buftype, rsp); + return rc; +} + +static unsigned int +num_entries(char *bufstart, char *end_of_buf, char **lastentry, size_t size) +{ + int len; + unsigned int entrycount = 0; + unsigned int next_offset = 0; + FILE_DIRECTORY_INFO *entryptr; + + if (bufstart == NULL) + return 0; + + entryptr = (FILE_DIRECTORY_INFO *)bufstart; + + while (1) { + entryptr = (FILE_DIRECTORY_INFO *) + ((char *)entryptr + next_offset); + + if ((char *)entryptr + size > end_of_buf) { + cERROR(1, "malformed search entry would overflow"); + break; + } + + len = le32_to_cpu(entryptr->FileNameLength); + if ((char *)entryptr + len + size > end_of_buf) { + cERROR(1, "directory entry name would overflow frame " + "end of buf %p", end_of_buf); + break; + } + + *lastentry = (char *)entryptr; + entrycount++; + + next_offset = le32_to_cpu(entryptr->NextEntryOffset); + if (!next_offset) + break; + } + + return entrycount; +} + +/* + * Readdir/FindFirst + */ +int +SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, int index, + struct cifs_search_info *srch_inf) +{ + struct smb2_query_directory_req *req; + struct smb2_query_directory_rsp *rsp = NULL; + struct kvec iov[2]; + int rc = 0; + int len; + int resp_buftype; + unsigned char *bufptr; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + __le16 asteriks = cpu_to_le16('*'); + char *end_of_smb; + unsigned int output_size = CIFSMaxBufSize; + size_t info_buf_size; + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req); + if (rc) + return rc; + + switch (srch_inf->info_level) { + case SMB_FIND_FILE_DIRECTORY_INFO: + req->FileInformationClass = FILE_DIRECTORY_INFORMATION; + info_buf_size = sizeof(FILE_DIRECTORY_INFO) - 1; + break; + case SMB_FIND_FILE_ID_FULL_DIR_INFO: + req->FileInformationClass = FILEID_FULL_DIRECTORY_INFORMATION; + info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1; + break; + default: + cERROR(1, "info level %u isn't supported", + srch_inf->info_level); + rc = -EINVAL; + goto qdir_exit; + } + + req->FileIndex = cpu_to_le32(index); + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + + len = 0x2; + bufptr = req->Buffer; + memcpy(bufptr, &asteriks, len); + + req->FileNameOffset = + cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1 - 4); + req->FileNameLength = cpu_to_le16(len); + /* + * BB could be 30 bytes or so longer if we used SMB2 specific + * buffer lengths, but this is safe and close enough. + */ + output_size = min_t(unsigned int, output_size, server->maxBuf); + output_size = min_t(unsigned int, output_size, 2 << 15); + req->OutputBufferLength = cpu_to_le32(output_size); + + iov[0].iov_base = (char *)req; + /* 4 for RFC1001 length and 1 for Buffer */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + + iov[1].iov_base = (char *)(req->Buffer); + iov[1].iov_len = len; + + inc_rfc1001_len(req, len - 1 /* Buffer */); + + rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0); + rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; + + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); + goto qdir_exit; + } + + rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, + info_buf_size); + if (rc) + goto qdir_exit; + + srch_inf->unicode = true; + + if (srch_inf->ntwrk_buf_start) { + if (srch_inf->smallBuf) + cifs_small_buf_release(srch_inf->ntwrk_buf_start); + else + cifs_buf_release(srch_inf->ntwrk_buf_start); + } + srch_inf->ntwrk_buf_start = (char *)rsp; + srch_inf->srch_entries_start = srch_inf->last_entry = 4 /* rfclen */ + + (char *)&rsp->hdr + le16_to_cpu(rsp->OutputBufferOffset); + /* 4 for rfc1002 length field */ + end_of_smb = get_rfc1002_length(rsp) + 4 + (char *)&rsp->hdr; + srch_inf->entries_in_buffer = + num_entries(srch_inf->srch_entries_start, end_of_smb, + &srch_inf->last_entry, info_buf_size); + srch_inf->index_of_last_entry += srch_inf->entries_in_buffer; + cFYI(1, "num entries %d last_index %lld srch start %p srch end %p", + srch_inf->entries_in_buffer, srch_inf->index_of_last_entry, + srch_inf->srch_entries_start, srch_inf->last_entry); + if (resp_buftype == CIFS_LARGE_BUFFER) + srch_inf->smallBuf = false; + else if (resp_buftype == CIFS_SMALL_BUFFER) + srch_inf->smallBuf = true; + else + cERROR(1, "illegal search buffer type"); + + if (rsp->hdr.Status == STATUS_NO_MORE_FILES) + srch_inf->endOfSearch = 1; + else + srch_inf->endOfSearch = 0; + + return rc; + +qdir_exit: + free_rsp_buf(resp_buftype, rsp); + return rc; +} + +static int +send_set_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class, + unsigned int num, void **data, unsigned int *size) +{ + struct smb2_set_info_req *req; + struct smb2_set_info_rsp *rsp = NULL; + struct kvec *iov; + int rc = 0; + int resp_buftype; + unsigned int i; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + if (!num) + return -EINVAL; + + iov = kmalloc(sizeof(struct kvec) * num, GFP_KERNEL); + if (!iov) + return -ENOMEM; + + rc = small_smb2_init(SMB2_SET_INFO, tcon, (void **) &req); + if (rc) { + kfree(iov); + return rc; + } + + req->hdr.ProcessId = cpu_to_le32(pid); + + req->InfoType = SMB2_O_INFO_FILE; + req->FileInfoClass = info_class; + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + + /* 4 for RFC1001 length and 1 for Buffer */ + req->BufferOffset = + cpu_to_le16(sizeof(struct smb2_set_info_req) - 1 - 4); + req->BufferLength = cpu_to_le32(*size); + + inc_rfc1001_len(req, *size - 1 /* Buffer */); + + memcpy(req->Buffer, *data, *size); + + iov[0].iov_base = (char *)req; + /* 4 for RFC1001 length */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + for (i = 1; i < num; i++) { + inc_rfc1001_len(req, size[i]); + le32_add_cpu(&req->BufferLength, size[i]); + iov[i].iov_base = (char *)data[i]; + iov[i].iov_len = size[i]; + } + + rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0); + rsp = (struct smb2_set_info_rsp *)iov[0].iov_base; + + if (rc != 0) { + cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); + goto out; + } +out: + free_rsp_buf(resp_buftype, rsp); + kfree(iov); + return rc; +} + +int +SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, __le16 *target_file) +{ + struct smb2_file_rename_info info; + void **data; + unsigned int size[2]; + int rc; + int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX)); + + data = kmalloc(sizeof(void *) * 2, GFP_KERNEL); + if (!data) + return -ENOMEM; + + info.ReplaceIfExists = 1; /* 1 = replace existing target with new */ + /* 0 = fail if target already exists */ + info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */ + info.FileNameLength = cpu_to_le32(len); + + data[0] = &info; + size[0] = sizeof(struct smb2_file_rename_info); + + data[1] = target_file; + size[1] = len + 2 /* null */; + + rc = send_set_info(xid, tcon, persistent_fid, volatile_fid, + current->tgid, FILE_RENAME_INFORMATION, 2, data, + size); + kfree(data); + return rc; +} + +int +SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, __le16 *target_file) +{ + struct smb2_file_link_info info; + void **data; + unsigned int size[2]; + int rc; + int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX)); + + data = kmalloc(sizeof(void *) * 2, GFP_KERNEL); + if (!data) + return -ENOMEM; + + info.ReplaceIfExists = 0; /* 1 = replace existing link with new */ + /* 0 = fail if link already exists */ + info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */ + info.FileNameLength = cpu_to_le32(len); + + data[0] = &info; + size[0] = sizeof(struct smb2_file_link_info); + + data[1] = target_file; + size[1] = len + 2 /* null */; + + rc = send_set_info(xid, tcon, persistent_fid, volatile_fid, + current->tgid, FILE_LINK_INFORMATION, 2, data, size); + kfree(data); + return rc; +} + +int +SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, + u64 volatile_fid, u32 pid, __le64 *eof) +{ + struct smb2_file_eof_info info; + void *data; + unsigned int size; + + info.EndOfFile = *eof; + + data = &info; + size = sizeof(struct smb2_file_eof_info); + + return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, + FILE_END_OF_FILE_INFORMATION, 1, &data, &size); +} + +int +SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf) +{ + unsigned int size; + size = sizeof(FILE_BASIC_INFO); + return send_set_info(xid, tcon, persistent_fid, volatile_fid, + current->tgid, FILE_BASIC_INFORMATION, 1, + (void **)&buf, &size); +} + +int +SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, + const u64 persistent_fid, const u64 volatile_fid, + __u8 oplock_level) +{ + int rc; + struct smb2_oplock_break *req = NULL; + + cFYI(1, "SMB2_oplock_break"); + rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); + + if (rc) + return rc; + + req->VolatileFid = volatile_fid; + req->PersistentFid = persistent_fid; + req->OplockLevel = oplock_level; + req->hdr.CreditRequest = cpu_to_le16(1); + + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP); + /* SMB2 buffer freed by function above */ + + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); + cFYI(1, "Send error in Oplock Break = %d", rc); + } + + return rc; +} + +static void +copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, + struct kstatfs *kst) +{ + kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * + le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); + kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); + kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); + kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + return; +} + +static int +build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, + int outbuf_len, u64 persistent_fid, u64 volatile_fid) +{ + int rc; + struct smb2_query_info_req *req; + + cFYI(1, "Query FSInfo level %d", level); + + if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) + return -EIO; + + rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req); + if (rc) + return rc; + + req->InfoType = SMB2_O_INFO_FILESYSTEM; + req->FileInfoClass = level; + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + /* 4 for rfc1002 length field and 1 for pad */ + req->InputBufferOffset = + cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); + req->OutputBufferLength = cpu_to_le32( + outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4); + + iov->iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov->iov_len = get_rfc1002_length(req) + 4; + return 0; +} + +int +SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata) +{ + struct smb2_query_info_rsp *rsp = NULL; + struct kvec iov; + int rc = 0; + int resp_buftype; + struct cifs_ses *ses = tcon->ses; + struct smb2_fs_full_size_info *info = NULL; + + rc = build_qfs_info_req(&iov, tcon, FS_FULL_SIZE_INFORMATION, + sizeof(struct smb2_fs_full_size_info), + persistent_fid, volatile_fid); + if (rc) + return rc; + + rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0); + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); + goto qinf_exit; + } + rsp = (struct smb2_query_info_rsp *)iov.iov_base; + + info = (struct smb2_fs_full_size_info *)(4 /* RFC1001 len */ + + le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr); + rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, + sizeof(struct smb2_fs_full_size_info)); + if (!rc) + copy_fs_info_to_kstatfs(info, fsdata); + +qinf_exit: + free_rsp_buf(resp_buftype, iov.iov_base); + return rc; +} + +int +smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, + const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, + const __u32 num_lock, struct smb2_lock_element *buf) +{ + int rc = 0; + struct smb2_lock_req *req = NULL; + struct kvec iov[2]; + int resp_buf_type; + unsigned int count; + + cFYI(1, "smb2_lockv num lock %d", num_lock); + + rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req); + if (rc) + return rc; + + req->hdr.ProcessId = cpu_to_le32(pid); + req->LockCount = cpu_to_le16(num_lock); + + req->PersistentFileId = persist_fid; + req->VolatileFileId = volatile_fid; + + count = num_lock * sizeof(struct smb2_lock_element); + inc_rfc1001_len(req, count - sizeof(struct smb2_lock_element)); + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field and count for all locks */ + iov[0].iov_len = get_rfc1002_length(req) + 4 - count; + iov[1].iov_base = (char *)buf; + iov[1].iov_len = count; + + cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); + rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP); + if (rc) { + cFYI(1, "Send error in smb2_lockv = %d", rc); + cifs_stats_fail_inc(tcon, SMB2_LOCK_HE); + } + + return rc; +} + +int +SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, + const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, + const __u64 length, const __u64 offset, const __u32 lock_flags, + const bool wait) +{ + struct smb2_lock_element lock; + + lock.Offset = cpu_to_le64(offset); + lock.Length = cpu_to_le64(length); + lock.Flags = cpu_to_le32(lock_flags); + if (!wait && lock_flags != SMB2_LOCKFLAG_UNLOCK) + lock.Flags |= cpu_to_le32(SMB2_LOCKFLAG_FAIL_IMMEDIATELY); + + return smb2_lockv(xid, tcon, persist_fid, volatile_fid, pid, 1, &lock); +} + +int +SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, + __u8 *lease_key, const __le32 lease_state) +{ + int rc; + struct smb2_lease_ack *req = NULL; + + cFYI(1, "SMB2_lease_break"); + rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); + + if (rc) + return rc; + + req->hdr.CreditRequest = cpu_to_le16(1); + req->StructureSize = cpu_to_le16(36); + inc_rfc1001_len(req, 12); + + memcpy(req->LeaseKey, lease_key, 16); + req->LeaseState = lease_state; + + rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP); + /* SMB2 buffer freed by function above */ + + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); + cFYI(1, "Send error in Lease Break = %d", rc); + } + + return rc; +} diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 15dc8eea8273..4cb4ced258cb 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -150,6 +150,10 @@ struct smb2_err_rsp { __u8 ErrorData[1]; /* variable length */ } __packed; +#define SMB2_CLIENT_GUID_SIZE 16 + +extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; + struct smb2_negotiate_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 36 */ @@ -157,11 +161,17 @@ struct smb2_negotiate_req { __le16 SecurityMode; __le16 Reserved; /* MBZ */ __le32 Capabilities; - __u8 ClientGUID[16]; /* MBZ */ + __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE]; __le64 ClientStartTime; /* MBZ */ - __le16 Dialects[2]; /* variable length */ + __le16 Dialects[1]; /* One dialect (vers=) at a time for now */ } __packed; +/* Dialects */ +#define SMB20_PROT_ID 0x0202 +#define SMB21_PROT_ID 0x0210 +#define SMB30_PROT_ID 0x0300 +#define BAD_PROT_ID 0xFFFF + /* SecurityMode flags */ #define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001 #define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002 @@ -169,6 +179,10 @@ struct smb2_negotiate_req { #define SMB2_GLOBAL_CAP_DFS 0x00000001 #define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ #define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ +#define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_ENCRYPTION 0x00000040 /* New to SMB3 */ /* Internal types */ #define SMB2_NT_FIND 0x00100000 #define SMB2_LARGE_FILES 0x00200000 @@ -307,6 +321,8 @@ struct smb2_tree_disconnect_rsp { #define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 #define SMB2_OPLOCK_LEVEL_BATCH 0x09 #define SMB2_OPLOCK_LEVEL_LEASE 0xFF +/* Non-spec internal type */ +#define SMB2_OPLOCK_LEVEL_NOCHANGE 0x99 /* Desired Access Flags */ #define FILE_READ_DATA_LE cpu_to_le32(0x00000001) @@ -404,7 +420,7 @@ struct smb2_create_req { __le16 NameLength; __le32 CreateContextsOffset; __le32 CreateContextsLength; - __u8 Buffer[1]; + __u8 Buffer[8]; } __packed; struct smb2_create_rsp { @@ -428,6 +444,39 @@ struct smb2_create_rsp { __u8 Buffer[1]; } __packed; +struct create_context { + __le32 Next; + __le16 NameOffset; + __le16 NameLength; + __le16 Reserved; + __le16 DataOffset; + __le32 DataLength; + __u8 Buffer[0]; +} __packed; + +#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00) +#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01) +#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02) +#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04) + +#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02) + +#define SMB2_LEASE_KEY_SIZE 16 + +struct lease_context { + __le64 LeaseKeyLow; + __le64 LeaseKeyHigh; + __le32 LeaseState; + __le32 LeaseFlags; + __le64 LeaseDuration; +} __packed; + +struct create_lease { + struct create_context ccontext; + __u8 Name[8]; + struct lease_context lcontext; +} __packed; + /* Currently defined values for close flags */ #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) struct smb2_close_req { @@ -453,6 +502,108 @@ struct smb2_close_rsp { __le32 Attributes; } __packed; +struct smb2_flush_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 24 */ + __le16 Reserved1; + __le32 Reserved2; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ +} __packed; + +struct smb2_flush_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; + __le16 Reserved; +} __packed; + +struct smb2_read_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 49 */ + __u8 Padding; /* offset from start of SMB2 header to place read */ + __u8 Reserved; + __le32 Length; + __le64 Offset; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le32 MinimumCount; + __le32 Channel; /* Reserved MBZ */ + __le32 RemainingBytes; + __le16 ReadChannelInfoOffset; /* Reserved MBZ */ + __le16 ReadChannelInfoLength; /* Reserved MBZ */ + __u8 Buffer[1]; +} __packed; + +struct smb2_read_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 17 */ + __u8 DataOffset; + __u8 Reserved; + __le32 DataLength; + __le32 DataRemaining; + __u32 Reserved2; + __u8 Buffer[1]; +} __packed; + +/* For write request Flags field below the following flag is defined: */ +#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 + +struct smb2_write_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 49 */ + __le16 DataOffset; /* offset from start of SMB2 header to write data */ + __le32 Length; + __le64 Offset; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le32 Channel; /* Reserved MBZ */ + __le32 RemainingBytes; + __le16 WriteChannelInfoOffset; /* Reserved MBZ */ + __le16 WriteChannelInfoLength; /* Reserved MBZ */ + __le32 Flags; + __u8 Buffer[1]; +} __packed; + +struct smb2_write_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 17 */ + __u8 DataOffset; + __u8 Reserved; + __le32 DataLength; + __le32 DataRemaining; + __u32 Reserved2; + __u8 Buffer[1]; +} __packed; + +#define SMB2_LOCKFLAG_SHARED_LOCK 0x0001 +#define SMB2_LOCKFLAG_EXCLUSIVE_LOCK 0x0002 +#define SMB2_LOCKFLAG_UNLOCK 0x0004 +#define SMB2_LOCKFLAG_FAIL_IMMEDIATELY 0x0010 + +struct smb2_lock_element { + __le64 Offset; + __le64 Length; + __le32 Flags; + __le32 Reserved; +} __packed; + +struct smb2_lock_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 48 */ + __le16 LockCount; + __le32 Reserved; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + /* Followed by at least one */ + struct smb2_lock_element locks[1]; +} __packed; + +struct smb2_lock_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + struct smb2_echo_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ @@ -465,6 +616,34 @@ struct smb2_echo_rsp { __u16 Reserved; } __packed; +/* search (query_directory) Flags field */ +#define SMB2_RESTART_SCANS 0x01 +#define SMB2_RETURN_SINGLE_ENTRY 0x02 +#define SMB2_INDEX_SPECIFIED 0x04 +#define SMB2_REOPEN 0x10 + +struct smb2_query_directory_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 33 */ + __u8 FileInformationClass; + __u8 Flags; + __le32 FileIndex; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le16 FileNameOffset; + __le16 FileNameLength; + __le32 OutputBufferLength; + __u8 Buffer[1]; +} __packed; + +struct smb2_query_directory_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 9 */ + __le16 OutputBufferOffset; + __le32 OutputBufferLength; + __u8 Buffer[1]; +} __packed; + /* Possible InfoType values */ #define SMB2_O_INFO_FILE 0x01 #define SMB2_O_INFO_FILESYSTEM 0x02 @@ -495,11 +674,84 @@ struct smb2_query_info_rsp { __u8 Buffer[1]; } __packed; +struct smb2_set_info_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 33 */ + __u8 InfoType; + __u8 FileInfoClass; + __le32 BufferLength; + __le16 BufferOffset; + __u16 Reserved; + __le32 AdditionalInformation; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __u8 Buffer[1]; +} __packed; + +struct smb2_set_info_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 2 */ +} __packed; + +struct smb2_oplock_break { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 24 */ + __u8 OplockLevel; + __u8 Reserved; + __le32 Reserved2; + __u64 PersistentFid; + __u64 VolatileFid; +} __packed; + +#define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01) + +struct smb2_lease_break { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 44 */ + __le16 Reserved; + __le32 Flags; + __u8 LeaseKey[16]; + __le32 CurrentLeaseState; + __le32 NewLeaseState; + __le32 BreakReason; + __le32 AccessMaskHint; + __le32 ShareMaskHint; +} __packed; + +struct smb2_lease_ack { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 36 */ + __le16 Reserved; + __le32 Flags; + __u8 LeaseKey[16]; + __le32 LeaseState; + __le64 LeaseDuration; +} __packed; + /* * PDU infolevel structure definitions * BB consider moving to a different header */ +/* File System Information Classes */ +#define FS_VOLUME_INFORMATION 1 /* Query */ +#define FS_LABEL_INFORMATION 2 /* Set */ +#define FS_SIZE_INFORMATION 3 /* Query */ +#define FS_DEVICE_INFORMATION 4 /* Query */ +#define FS_ATTRIBUTE_INFORMATION 5 /* Query */ +#define FS_CONTROL_INFORMATION 6 /* Query, Set */ +#define FS_FULL_SIZE_INFORMATION 7 /* Query */ +#define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */ +#define FS_DRIVER_PATH_INFORMATION 9 /* Query */ + +struct smb2_fs_full_size_info { + __le64 TotalAllocationUnits; + __le64 CallerAvailableAllocationUnits; + __le64 ActualAvailableAllocationUnits; + __le32 SectorsPerAllocationUnit; + __le32 BytesPerSector; +} __packed; + /* partial list of QUERY INFO levels */ #define FILE_DIRECTORY_INFORMATION 1 #define FILE_FULL_DIRECTORY_INFORMATION 2 @@ -548,6 +800,28 @@ struct smb2_query_info_rsp { #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 #define FILE_STANDARD_LINK_INFORMATION 54 +struct smb2_file_internal_info { + __le64 IndexNumber; +} __packed; /* level 6 Query */ + +struct smb2_file_rename_info { /* encoding of request for level 10 */ + __u8 ReplaceIfExists; /* 1 = replace existing target with new */ + /* 0 = fail if target already exists */ + __u8 Reserved[7]; + __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ + __le32 FileNameLength; + char FileName[0]; /* New name to be assigned */ +} __packed; /* level 10 Set */ + +struct smb2_file_link_info { /* encoding of request for level 11 */ + __u8 ReplaceIfExists; /* 1 = replace existing link with new */ + /* 0 = fail if link already exists */ + __u8 Reserved[7]; + __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ + __le32 FileNameLength; + char FileName[0]; /* Name to be assigned to new link */ +} __packed; /* level 11 Set */ + /* * This level 18, although with struct with same name is different from cifs * level 0x107. Level 0x107 has an extra u64 between AccessFlags and @@ -576,4 +850,8 @@ struct smb2_file_all_info { /* data block encoding of response to level 18 */ char FileName[1]; } __packed; /* level 18 Query */ +struct smb2_file_eof_info { /* encoding of request for level 10 */ + __le64 EndOfFile; /* new end of file value */ +} __packed; /* level 20 Set */ + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index bfaa7b148afd..7d25f8b14f93 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -26,6 +26,7 @@ #include <linux/key-type.h> struct statfs; +struct smb_rqst; /* ***************************************************************** @@ -34,24 +35,35 @@ struct statfs; */ extern int map_smb2_to_linux_error(char *buf, bool log_err); extern int smb2_check_message(char *buf, unsigned int length); -extern unsigned int smb2_calc_size(struct smb2_hdr *hdr); +extern unsigned int smb2_calc_size(void *buf); extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); extern __le16 *cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb); +extern int smb2_verify_signature(struct smb_rqst *, struct TCP_Server_Info *); extern int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); -extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid); -extern int smb2_setup_async_request(struct TCP_Server_Info *server, - struct kvec *iov, unsigned int nvec, - struct mid_q_entry **ret_mid); +extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, + struct smb_rqst *rqst); +extern struct mid_q_entry *smb2_setup_async_request( + struct TCP_Server_Info *server, struct smb_rqst *rqst); extern void smb2_echo_request(struct work_struct *work); +extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); +extern __u8 smb2_map_lease_to_oplock(__le32 lease_state); +extern bool smb2_is_valid_oplock_break(char *buffer, + struct TCP_Server_Info *srv); +extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, + struct smb2_file_all_info *src); extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, bool *adjust_tz); +extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, __u64 size, + struct cifs_sb_info *cifs_sb, bool set_alloc); +extern int smb2_set_file_info(struct inode *inode, const char *full_path, + FILE_BASIC_INFO *buf, const unsigned int xid); extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, @@ -59,6 +71,24 @@ extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, struct cifs_tcon *tcon, const unsigned int xid); extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); +extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *name, struct cifs_sb_info *cifs_sb); +extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); +extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); + +extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, int disposition, + int desired_access, int create_options, + struct cifs_fid *fid, __u32 *oplock, + FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb); +extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); +extern int smb2_unlock_range(struct cifsFileInfo *cfile, + struct file_lock *flock, const unsigned int xid); +extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); /* * SMB2 Worker functions - most of protocol specific implementation details @@ -75,12 +105,55 @@ extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, __u32 create_disposition, - __u32 file_attributes, __u32 create_options); + __u32 file_attributes, __u32 create_options, + __u8 *oplock, struct smb2_file_all_info *buf); extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); +extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct smb2_file_all_info *data); +extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + __le64 *uniqueid); +extern int smb2_async_readv(struct cifs_readdata *rdata); +extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, char **buf, int *buf_type); +extern int smb2_async_writev(struct cifs_writedata *wdata); +extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, + unsigned int *nbytes, struct kvec *iov, int n_vec); extern int SMB2_echo(struct TCP_Server_Info *server); +extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, int index, + struct cifs_search_info *srch_inf); +extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + __le16 *target_file); +extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + __le16 *target_file); +extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, u32 pid, + __le64 *eof); +extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + FILE_BASIC_INFO *buf); +extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, + const u64 persistent_fid, const u64 volatile_fid, + const __u8 oplock_level); +extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_file_id, u64 volatile_file_id, + struct kstatfs *FSData); +extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, + const __u64 persist_fid, const __u64 volatile_fid, + const __u32 pid, const __u64 length, const __u64 offset, + const __u32 lockFlags, const bool wait); +extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, + const __u64 persist_fid, const __u64 volatile_fid, + const __u32 pid, const __u32 num_lock, + struct smb2_lock_element *buf); +extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, + __u8 *lease_key, const __le32 lease_state); #endif /* _SMB2PROTO_H */ diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 31f5d420b3ea..2a5fdf26f79f 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -30,12 +30,156 @@ #include <linux/uaccess.h> #include <asm/processor.h> #include <linux/mempool.h> +#include <linux/highmem.h> #include "smb2pdu.h" #include "cifsglob.h" #include "cifsproto.h" #include "smb2proto.h" #include "cifs_debug.h" #include "smb2status.h" +#include "smb2glob.h" + +static int +smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) +{ + int i, rc; + unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; + unsigned char *sigptr = smb2_signature; + struct kvec *iov = rqst->rq_iov; + int n_vec = rqst->rq_nvec; + struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + + memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); + memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); + + rc = crypto_shash_setkey(server->secmech.hmacsha256, + server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + if (rc) { + cERROR(1, "%s: Could not update with response\n", __func__); + return rc; + } + + rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); + if (rc) { + cERROR(1, "%s: Could not init md5\n", __func__); + return rc; + } + + for (i = 0; i < n_vec; i++) { + if (iov[i].iov_len == 0) + continue; + if (iov[i].iov_base == NULL) { + cERROR(1, "null iovec entry"); + return -EIO; + } + /* + * The first entry includes a length field (which does not get + * signed that occupies the first 4 bytes before the header). + */ + if (i == 0) { + if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ + break; /* nothing to sign or corrupt header */ + rc = + crypto_shash_update( + &server->secmech.sdeschmacsha256->shash, + iov[i].iov_base + 4, iov[i].iov_len - 4); + } else { + rc = + crypto_shash_update( + &server->secmech.sdeschmacsha256->shash, + iov[i].iov_base, iov[i].iov_len); + } + if (rc) { + cERROR(1, "%s: Could not update with payload\n", + __func__); + return rc; + } + } + + /* now hash over the rq_pages array */ + for (i = 0; i < rqst->rq_npages; i++) { + struct kvec p_iov; + + cifs_rqst_page_to_kvec(rqst, i, &p_iov); + crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + p_iov.iov_base, p_iov.iov_len); + kunmap(rqst->rq_pages[i]); + } + + rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, + sigptr); + if (rc) + cERROR(1, "%s: Could not generate sha256 hash\n", __func__); + + memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE); + + return rc; +} + +/* must be called with server->srv_mutex held */ +static int +smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) +{ + int rc = 0; + struct smb2_hdr *smb2_pdu = rqst->rq_iov[0].iov_base; + + if (!(smb2_pdu->Flags & SMB2_FLAGS_SIGNED) || + server->tcpStatus == CifsNeedNegotiate) + return rc; + + if (!server->session_estab) { + strncpy(smb2_pdu->Signature, "BSRSPYL", 8); + return rc; + } + + rc = smb2_calc_signature(rqst, server); + + return rc; +} + +int +smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) +{ + unsigned int rc; + char server_response_sig[16]; + struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; + + if ((smb2_pdu->Command == SMB2_NEGOTIATE) || + (smb2_pdu->Command == SMB2_OPLOCK_BREAK) || + (!server->session_estab)) + return 0; + + /* + * BB what if signatures are supposed to be on for session but + * server does not send one? BB + */ + + /* Do not need to verify session setups with signature "BSRSPYL " */ + if (memcmp(smb2_pdu->Signature, "BSRSPYL ", 8) == 0) + cFYI(1, "dummy signature received for smb command 0x%x", + smb2_pdu->Command); + + /* + * Save off the origiginal signature so we can modify the smb and check + * our calculated signature against what the server sent. + */ + memcpy(server_response_sig, smb2_pdu->Signature, SMB2_SIGNATURE_SIZE); + + memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE); + + mutex_lock(&server->srv_mutex); + rc = smb2_calc_signature(rqst, server); + mutex_unlock(&server->srv_mutex); + + if (rc) + return rc; + + if (memcmp(server_response_sig, smb2_pdu->Signature, + SMB2_SIGNATURE_SIZE)) + return -EACCES; + else + return 0; +} /* * Set message id for the request. Should be called after wait_for_free_request @@ -115,58 +259,66 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error) { unsigned int len = get_rfc1002_length(mid->resp_buf); + struct kvec iov; + struct smb_rqst rqst = { .rq_iov = &iov, + .rq_nvec = 1 }; + + iov.iov_base = (char *)mid->resp_buf; + iov.iov_len = get_rfc1002_length(mid->resp_buf) + 4; dump_smb(mid->resp_buf, min_t(u32, 80, len)); /* convert the length into a more usable form */ - /* BB - uncomment with SMB2 signing implementation */ - /* if ((len > 24) && + if ((len > 24) && (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { - if (smb2_verify_signature(mid->resp_buf, server)) - cERROR(1, "Unexpected SMB signature"); - } */ + int rc; + + rc = smb2_verify_signature(&rqst, server); + if (rc) + cERROR(1, "SMB signature verification returned error = " + "%d", rc); + } return map_smb2_to_linux_error(mid->resp_buf, log_error); } -int -smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid) +struct mid_q_entry * +smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) { int rc; - struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; + struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; smb2_seq_num_into_buf(ses->server, hdr); rc = smb2_get_mid_entry(ses, hdr, &mid); if (rc) - return rc; - /* rc = smb2_sign_smb2(iov, nvec, ses->server); - if (rc) - delete_mid(mid); */ - *ret_mid = mid; - return rc; + return ERR_PTR(rc); + rc = smb2_sign_rqst(rqst, ses->server); + if (rc) { + cifs_delete_mid(mid); + return ERR_PTR(rc); + } + return mid; } -int -smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid) +struct mid_q_entry * +smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) { - int rc = 0; - struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; + int rc; + struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; smb2_seq_num_into_buf(server, hdr); mid = smb2_mid_entry_alloc(hdr, server); if (mid == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - /* rc = smb2_sign_smb2(iov, nvec, server); + rc = smb2_sign_rqst(rqst, server); if (rc) { DeleteMidQEntry(mid); - return rc; - }*/ - *ret_mid = mid; - return rc; + return ERR_PTR(rc); + } + + return mid; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index d9b639b95fa8..2126ab185045 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -27,6 +27,8 @@ #include <linux/net.h> #include <linux/delay.h> #include <linux/freezer.h> +#include <linux/tcp.h> +#include <linux/highmem.h> #include <asm/uaccess.h> #include <asm/processor.h> #include <linux/mempool.h> @@ -109,8 +111,8 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) mempool_free(midEntry, cifs_mid_poolp); } -static void -delete_mid(struct mid_q_entry *mid) +void +cifs_delete_mid(struct mid_q_entry *mid) { spin_lock(&GlobalMid_Lock); list_del(&mid->qhead); @@ -119,18 +121,29 @@ delete_mid(struct mid_q_entry *mid) DeleteMidQEntry(mid); } +/* + * smb_send_kvec - send an array of kvecs to the server + * @server: Server to send the data to + * @iov: Pointer to array of kvecs + * @n_vec: length of kvec array + * @sent: amount of data sent on socket is stored here + * + * Our basic "send data to server" function. Should be called with srv_mutex + * held. The caller is responsible for handling the results. + */ static int -smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) +smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, + size_t *sent) { int rc = 0; int i = 0; struct msghdr smb_msg; - unsigned int len = iov[0].iov_len; - unsigned int total_len; - int first_vec = 0; - unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); + unsigned int remaining; + size_t first_vec = 0; struct socket *ssocket = server->ssocket; + *sent = 0; + if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -143,56 +156,60 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) else smb_msg.msg_flags = MSG_NOSIGNAL; - total_len = 0; + remaining = 0; for (i = 0; i < n_vec; i++) - total_len += iov[i].iov_len; - - cFYI(1, "Sending smb: total_len %d", total_len); - dump_smb(iov[0].iov_base, len); + remaining += iov[i].iov_len; i = 0; - while (total_len) { + while (remaining) { + /* + * If blocking send, we try 3 times, since each can block + * for 5 seconds. For nonblocking we have to try more + * but wait increasing amounts of time allowing time for + * socket to clear. The overall time we wait in either + * case to send on the socket is about 15 seconds. + * Similarly we wait for 15 seconds for a response from + * the server in SendReceive[2] for the server to send + * a response back for most types of requests (except + * SMB Write past end of file which can be slow, and + * blocking lock operations). NFS waits slightly longer + * than CIFS, but this can make it take longer for + * nonresponsive servers to be detected and 15 seconds + * is more than enough time for modern networks to + * send a packet. In most cases if we fail to send + * after the retries we will kill the socket and + * reconnect which may clear the network problem. + */ rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], - n_vec - first_vec, total_len); - if ((rc == -ENOSPC) || (rc == -EAGAIN)) { + n_vec - first_vec, remaining); + if (rc == -ENOSPC || rc == -EAGAIN) { i++; - /* - * If blocking send we try 3 times, since each can block - * for 5 seconds. For nonblocking we have to try more - * but wait increasing amounts of time allowing time for - * socket to clear. The overall time we wait in either - * case to send on the socket is about 15 seconds. - * Similarly we wait for 15 seconds for a response from - * the server in SendReceive[2] for the server to send - * a response back for most types of requests (except - * SMB Write past end of file which can be slow, and - * blocking lock operations). NFS waits slightly longer - * than CIFS, but this can make it take longer for - * nonresponsive servers to be detected and 15 seconds - * is more than enough time for modern networks to - * send a packet. In most cases if we fail to send - * after the retries we will kill the socket and - * reconnect which may clear the network problem. - */ - if ((i >= 14) || (!server->noblocksnd && (i > 2))) { - cERROR(1, "sends on sock %p stuck for 15 seconds", - ssocket); + if (i >= 14 || (!server->noblocksnd && (i > 2))) { + cERROR(1, "sends on sock %p stuck for 15 " + "seconds", ssocket); rc = -EAGAIN; break; } msleep(1 << i); continue; } + if (rc < 0) break; - if (rc == total_len) { - total_len = 0; + /* send was at least partially successful */ + *sent += rc; + + if (rc == remaining) { + remaining = 0; break; - } else if (rc > total_len) { - cERROR(1, "sent %d requested %d", rc, total_len); + } + + if (rc > remaining) { + cERROR(1, "sent %d requested %d", rc, remaining); break; } + if (rc == 0) { /* should never happen, letting socket clear before retrying is our only obvious option here */ @@ -200,7 +217,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) msleep(500); continue; } - total_len -= rc; + + remaining -= rc; + /* the line below resets i */ for (i = first_vec; i < n_vec; i++) { if (iov[i].iov_len) { @@ -215,16 +234,97 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) } } } + i = 0; /* in case we get ENOSPC on the next send */ + rc = 0; } + return rc; +} + +/** + * rqst_page_to_kvec - Turn a slot in the smb_rqst page array into a kvec + * @rqst: pointer to smb_rqst + * @idx: index into the array of the page + * @iov: pointer to struct kvec that will hold the result + * + * Helper function to convert a slot in the rqst->rq_pages array into a kvec. + * The page will be kmapped and the address placed into iov_base. The length + * will then be adjusted according to the ptailoff. + */ +void +cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, + struct kvec *iov) +{ + /* + * FIXME: We could avoid this kmap altogether if we used + * kernel_sendpage instead of kernel_sendmsg. That will only + * work if signing is disabled though as sendpage inlines the + * page directly into the fraglist. If userspace modifies the + * page after we calculate the signature, then the server will + * reject it and may break the connection. kernel_sendmsg does + * an extra copy of the data and avoids that issue. + */ + iov->iov_base = kmap(rqst->rq_pages[idx]); + + /* if last page, don't send beyond this offset into page */ + if (idx == (rqst->rq_npages - 1)) + iov->iov_len = rqst->rq_tailsz; + else + iov->iov_len = rqst->rq_pagesz; +} + +static int +smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) +{ + int rc; + struct kvec *iov = rqst->rq_iov; + int n_vec = rqst->rq_nvec; + unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); + unsigned int i; + size_t total_len = 0, sent; + struct socket *ssocket = server->ssocket; + int val = 1; + + cFYI(1, "Sending smb: smb_len=%u", smb_buf_length); + dump_smb(iov[0].iov_base, iov[0].iov_len); + + /* cork the socket */ + kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, + (char *)&val, sizeof(val)); + + rc = smb_send_kvec(server, iov, n_vec, &sent); + if (rc < 0) + goto uncork; + + total_len += sent; + + /* now walk the page array and send each page in it */ + for (i = 0; i < rqst->rq_npages; i++) { + struct kvec p_iov; + + cifs_rqst_page_to_kvec(rqst, i, &p_iov); + rc = smb_send_kvec(server, &p_iov, 1, &sent); + kunmap(rqst->rq_pages[i]); + if (rc < 0) + break; + + total_len += sent; + } + +uncork: + /* uncork it */ + val = 0; + kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, + (char *)&val, sizeof(val)); if ((total_len > 0) && (total_len != smb_buf_length + 4)) { - cFYI(1, "partial send (%d remaining), terminating session", - total_len); - /* If we have only sent part of an SMB then the next SMB - could be taken as the remainder of this one. We need - to kill the socket so the server throws away the partial - SMB */ + cFYI(1, "partial send (wanted=%u sent=%zu): terminating " + "session", smb_buf_length + 4, total_len); + /* + * If we have only sent part of an SMB then the next SMB could + * be taken as the remainder of this one. We need to kill the + * socket so the server throws away the partial SMB + */ server->tcpStatus = CifsNeedReconnect; } @@ -236,6 +336,15 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) return rc; } +static int +smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) +{ + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = n_vec }; + + return smb_send_rqst(server, &rqst); +} + int smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, unsigned int smb_buf_length) @@ -345,12 +454,11 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) return 0; } -int -cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid) +struct mid_q_entry * +cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) { int rc; - struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base; + struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; /* enable signing if server requires it */ @@ -359,16 +467,15 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, mid = AllocMidQEntry(hdr, server); if (mid == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - rc = cifs_sign_smbv(iov, nvec, server, &mid->sequence_number); + rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); if (rc) { DeleteMidQEntry(mid); - return rc; + return ERR_PTR(rc); } - *ret_mid = mid; - return 0; + return mid; } /* @@ -376,9 +483,9 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, * the result. Caller is responsible for dealing with timeouts. */ int -cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int nvec, mid_receive_t *receive, - mid_callback_t *callback, void *cbdata, const int flags) +cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, + mid_receive_t *receive, mid_callback_t *callback, + void *cbdata, const int flags) { int rc, timeout, optype; struct mid_q_entry *mid; @@ -391,12 +498,12 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, return rc; mutex_lock(&server->srv_mutex); - rc = server->ops->setup_async_request(server, iov, nvec, &mid); - if (rc) { + mid = server->ops->setup_async_request(server, rqst); + if (IS_ERR(mid)) { mutex_unlock(&server->srv_mutex); add_credits(server, 1, optype); wake_up(&server->request_q); - return rc; + return PTR_ERR(mid); } mid->receive = receive; @@ -411,7 +518,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, cifs_in_send_inc(server); - rc = smb_sendv(server, iov, nvec); + rc = smb_send_rqst(server, rqst); cifs_in_send_dec(server); cifs_save_when_sent(mid); mutex_unlock(&server->srv_mutex); @@ -419,7 +526,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, if (rc == 0) return 0; - delete_mid(mid); + cifs_delete_mid(mid); add_credits(server, 1, optype); wake_up(&server->request_q); return rc; @@ -504,11 +611,13 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { struct kvec iov; int rc = 0; + struct smb_rqst rqst = { .rq_iov = &iov, + .rq_nvec = 1 }; iov.iov_base = mid->resp_buf; iov.iov_len = len; /* FIXME: add code to kill session */ - rc = cifs_verify_signature(&iov, 1, server, + rc = cifs_verify_signature(&rqst, server, mid->sequence_number + 1); if (rc) cERROR(1, "SMB signature verification returned error = " @@ -519,22 +628,22 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, return map_smb_to_linux_error(mid->resp_buf, log_error); } -int -cifs_setup_request(struct cifs_ses *ses, struct kvec *iov, - unsigned int nvec, struct mid_q_entry **ret_mid) +struct mid_q_entry * +cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) { int rc; - struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base; + struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; rc = allocate_mid(ses, hdr, &mid); if (rc) - return rc; - rc = cifs_sign_smbv(iov, nvec, ses->server, &mid->sequence_number); - if (rc) - delete_mid(mid); - *ret_mid = mid; - return rc; + return ERR_PTR(rc); + rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number); + if (rc) { + cifs_delete_mid(mid); + return ERR_PTR(rc); + } + return mid; } int @@ -547,6 +656,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, struct mid_q_entry *midQ; char *buf = iov[0].iov_base; unsigned int credits = 1; + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = n_vec }; timeout = flags & CIFS_TIMEOUT_MASK; optype = flags & CIFS_OP_MASK; @@ -584,13 +695,13 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, mutex_lock(&ses->server->srv_mutex); - rc = ses->server->ops->setup_request(ses, iov, n_vec, &midQ); - if (rc) { + midQ = ses->server->ops->setup_request(ses, &rqst); + if (IS_ERR(midQ)) { mutex_unlock(&ses->server->srv_mutex); cifs_small_buf_release(buf); /* Update # of requests on wire to server */ add_credits(ses->server, 1, optype); - return rc; + return PTR_ERR(midQ); } midQ->mid_state = MID_REQUEST_SUBMITTED; @@ -652,11 +763,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, rc = ses->server->ops->check_receive(midQ, ses->server, flags & CIFS_LOG_ERROR); - /* mark it so buf will not be freed by delete_mid */ + /* mark it so buf will not be freed by cifs_delete_mid */ if ((flags & CIFS_NO_RESP) == 0) midQ->resp_buf = NULL; out: - delete_mid(midQ); + cifs_delete_mid(midQ); add_credits(ses->server, credits, optype); return rc; @@ -762,7 +873,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); rc = cifs_check_receive(midQ, ses->server, 0); out: - delete_mid(midQ); + cifs_delete_mid(midQ); add_credits(ses->server, 1, 0); return rc; @@ -846,7 +957,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); if (rc) { - delete_mid(midQ); + cifs_delete_mid(midQ); mutex_unlock(&ses->server->srv_mutex); return rc; } @@ -859,7 +970,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, mutex_unlock(&ses->server->srv_mutex); if (rc < 0) { - delete_mid(midQ); + cifs_delete_mid(midQ); return rc; } @@ -880,7 +991,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, blocking lock to return. */ rc = send_cancel(ses->server, in_buf, midQ); if (rc) { - delete_mid(midQ); + cifs_delete_mid(midQ); return rc; } } else { @@ -892,7 +1003,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, /* If we get -ENOLCK back the lock may have already been removed. Don't exit in this case. */ if (rc && rc != -ENOLCK) { - delete_mid(midQ); + cifs_delete_mid(midQ); return rc; } } @@ -929,7 +1040,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); rc = cifs_check_receive(midQ, ses->server, 0); out: - delete_mid(midQ); + cifs_delete_mid(midQ); if (rstart && rc == -EACCES) return -ERESTARTSYS; return rc; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index debdfe0fc809..59f8db4a39a7 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -866,6 +866,12 @@ COMPATIBLE_IOCTL(TIOCGPTN) COMPATIBLE_IOCTL(TIOCSPTLCK) COMPATIBLE_IOCTL(TIOCSERGETLSR) COMPATIBLE_IOCTL(TIOCSIG) +#ifdef TIOCSRS485 +COMPATIBLE_IOCTL(TIOCSRS485) +#endif +#ifdef TIOCGRS485 +COMPATIBLE_IOCTL(TIOCGRS485) +#endif #ifdef TCGETS2 COMPATIBLE_IOCTL(TCGETS2) COMPATIBLE_IOCTL(TCSETS2) diff --git a/fs/dcache.c b/fs/dcache.c index 8086636bf796..693f95bf1cae 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -389,7 +389,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) * Inform try_to_ascend() that we are no longer attached to the * dentry tree */ - dentry->d_flags |= DCACHE_DISCONNECTED; + dentry->d_flags |= DCACHE_DENTRY_KILLED; if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1048,7 +1048,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq * or deletion */ if (new != old->d_parent || - (old->d_flags & DCACHE_DISCONNECTED) || + (old->d_flags & DCACHE_DENTRY_KILLED) || (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&new->d_lock); new = NULL; @@ -1134,6 +1134,8 @@ positive: return 1; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -1141,7 +1143,7 @@ rename_retry: EXPORT_SYMBOL(have_submounts); /* - * Search the dentry child list for the specified parent, + * Search the dentry child list of the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level * whenever the d_subdirs list is non-empty and continue @@ -1236,6 +1238,8 @@ out: rename_retry: if (found) return found; + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -3035,6 +3039,8 @@ resume: return; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 2340f6978d6e..c5ca6ae5a30c 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -526,73 +526,51 @@ struct array_data { u32 elements; }; -static int u32_array_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return nonseekable_open(inode, file); -} - -static size_t format_array(char *buf, size_t bufsize, const char *fmt, - u32 *array, u32 array_size) +static size_t u32_format_array(char *buf, size_t bufsize, + u32 *array, int array_size) { size_t ret = 0; - u32 i; - for (i = 0; i < array_size; i++) { + while (--array_size >= 0) { size_t len; + char term = array_size ? ' ' : '\n'; - len = snprintf(buf, bufsize, fmt, array[i]); - len++; /* ' ' or '\n' */ + len = snprintf(buf, bufsize, "%u%c", *array++, term); ret += len; - if (buf) { - buf += len; - bufsize -= len; - buf[-1] = (i == array_size-1) ? '\n' : ' '; - } + buf += len; + bufsize -= len; } - - ret++; /* \0 */ - if (buf) - *buf = '\0'; - return ret; } -static char *format_array_alloc(const char *fmt, u32 *array, - u32 array_size) +static int u32_array_open(struct inode *inode, struct file *file) { - size_t len = format_array(NULL, 0, fmt, array, array_size); - char *ret; - - ret = kmalloc(len, GFP_KERNEL); - if (ret == NULL) - return NULL; + struct array_data *data = inode->i_private; + int size, elements = data->elements; + char *buf; + + /* + * Max size: + * - 10 digits + ' '/'\n' = 11 bytes per number + * - terminating NUL character + */ + size = elements*11; + buf = kmalloc(size+1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + buf[size] = 0; + + file->private_data = buf; + u32_format_array(buf, size, data->array, data->elements); - format_array(ret, len, fmt, array, array_size); - return ret; + return nonseekable_open(inode, file); } static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; - struct array_data *data = inode->i_private; - size_t size; - - if (*ppos == 0) { - if (file->private_data) { - kfree(file->private_data); - file->private_data = NULL; - } - - file->private_data = format_array_alloc("%u", data->array, - data->elements); - } - - size = 0; - if (file->private_data) - size = strlen(file->private_data); + size_t size = strlen(file->private_data); return simple_read_from_buffer(buf, len, ppos, file->private_data, size); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4733eab34a23..6393fd61d5c4 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -28,7 +28,7 @@ #include <linux/magic.h> #include <linux/slab.h> -#define DEBUGFS_DEFAULT_MODE 0755 +#define DEBUGFS_DEFAULT_MODE 0700 static struct vfsmount *debugfs_mount; static int debugfs_mount_count; @@ -291,9 +291,9 @@ static struct file_system_type debug_fs_type = { .kill_sb = kill_litter_super, }; -struct dentry *__create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops) +static struct dentry *__create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) { struct dentry *dentry = NULL; int error; diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 63dc19c54d5a..27a6ba9aaeec 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -15,8 +15,8 @@ #include "lock.h" #include "user.h" -static uint64_t dlm_cb_seq; -static spinlock_t dlm_cb_seq_spin; +static uint64_t dlm_cb_seq; +static DEFINE_SPINLOCK(dlm_cb_seq_spin); static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) { diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 9ccf7346834a..a0387dd8b1f0 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -750,6 +750,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) { struct sockaddr_storage *addr; + int rv; if (len != sizeof(struct sockaddr_storage)) return -EINVAL; @@ -762,6 +763,13 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) return -ENOMEM; memcpy(addr, buf, len); + + rv = dlm_lowcomms_addr(cm->nodeid, addr, len); + if (rv) { + kfree(addr); + return rv; + } + cm->addr[cm->addr_count++] = addr; return len; } @@ -878,34 +886,7 @@ static void put_space(struct dlm_space *sp) config_item_put(&sp->group.cg_item); } -static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) -{ - switch (x->ss_family) { - case AF_INET: { - struct sockaddr_in *sinx = (struct sockaddr_in *)x; - struct sockaddr_in *siny = (struct sockaddr_in *)y; - if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) - return 0; - if (sinx->sin_port != siny->sin_port) - return 0; - break; - } - case AF_INET6: { - struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; - struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; - if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) - return 0; - if (sinx->sin6_port != siny->sin6_port) - return 0; - break; - } - default: - return 0; - } - return 1; -} - -static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) +static struct dlm_comm *get_comm(int nodeid) { struct config_item *i; struct dlm_comm *cm = NULL; @@ -919,19 +900,11 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) list_for_each_entry(i, &comm_list->cg_children, ci_entry) { cm = config_item_to_comm(i); - if (nodeid) { - if (cm->nodeid != nodeid) - continue; - found = 1; - config_item_get(i); - break; - } else { - if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) - continue; - found = 1; - config_item_get(i); - break; - } + if (cm->nodeid != nodeid) + continue; + found = 1; + config_item_get(i); + break; } mutex_unlock(&clusters_root.subsys.su_mutex); @@ -995,7 +968,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, int dlm_comm_seq(int nodeid, uint32_t *seq) { - struct dlm_comm *cm = get_comm(nodeid, NULL); + struct dlm_comm *cm = get_comm(nodeid); if (!cm) return -EEXIST; *seq = cm->seq; @@ -1003,28 +976,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq) return 0; } -int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) -{ - struct dlm_comm *cm = get_comm(nodeid, NULL); - if (!cm) - return -EEXIST; - if (!cm->addr_count) - return -ENOENT; - memcpy(addr, cm->addr[0], sizeof(*addr)); - put_comm(cm); - return 0; -} - -int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) -{ - struct dlm_comm *cm = get_comm(0, addr); - if (!cm) - return -EEXIST; - *nodeid = cm->nodeid; - put_comm(cm); - return 0; -} - int dlm_our_nodeid(void) { return local_comm ? local_comm->nodeid : 0; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index dbd35a08f3a5..f30697bc2780 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -46,8 +46,6 @@ void dlm_config_exit(void); int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, int *count_out); int dlm_comm_seq(int nodeid, uint32_t *seq); -int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); -int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); int dlm_our_nodeid(void); int dlm_our_addr(struct sockaddr_storage *addr, int num); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 9d3e485f88c8..871c1abf6029 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -604,6 +604,7 @@ struct dlm_ls { struct idr ls_recover_idr; spinlock_t ls_recover_idr_lock; wait_queue_head_t ls_wait_general; + wait_queue_head_t ls_recover_lock_wait; struct mutex ls_clear_proc_locks; struct list_head ls_root_list; /* root resources */ @@ -616,15 +617,40 @@ struct dlm_ls { char ls_name[1]; }; -#define LSFL_WORK 0 -#define LSFL_RUNNING 1 -#define LSFL_RECOVERY_STOP 2 -#define LSFL_RCOM_READY 3 -#define LSFL_RCOM_WAIT 4 -#define LSFL_UEVENT_WAIT 5 -#define LSFL_TIMEWARN 6 -#define LSFL_CB_DELAY 7 -#define LSFL_NODIR 8 +/* + * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines + * that they should abort what they're doing so new recovery can be started. + * + * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it + * should do down_write() on the in_recovery rw_semaphore. (doing down_write + * within dlm_ls_stop causes complaints about the lock acquired/released + * in different contexts.) + * + * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore. + * It sets this after it is done with down_write() on the in_recovery + * rw_semaphore and clears it after it has released the rw_semaphore. + * + * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it + * should begin recovery of the lockspace. + * + * LSFL_RUNNING - set when normal locking activity is enabled. + * dlm_ls_stop() clears this to tell dlm locking routines that they should + * quit what they are doing so recovery can run. dlm_recoverd sets + * this after recovery is finished. + */ + +#define LSFL_RECOVER_STOP 0 +#define LSFL_RECOVER_DOWN 1 +#define LSFL_RECOVER_LOCK 2 +#define LSFL_RECOVER_WORK 3 +#define LSFL_RUNNING 4 + +#define LSFL_RCOM_READY 5 +#define LSFL_RCOM_WAIT 6 +#define LSFL_UEVENT_WAIT 7 +#define LSFL_TIMEWARN 8 +#define LSFL_CB_DELAY 9 +#define LSFL_NODIR 10 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ @@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls) static inline int dlm_recovery_stopped(struct dlm_ls *ls) { - return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); } static inline int dlm_no_directory(struct dlm_ls *ls) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 952557d00ccd..2e99fb0c9737 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster, INIT_LIST_HEAD(&ls->ls_root_list); init_rwsem(&ls->ls_root_sem); - down_write(&ls->ls_in_recovery); - spin_lock(&lslist_lock); ls->ls_create_count = 1; list_add(&ls->ls_list, &lslist); @@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster, } } - /* needs to find ls in lslist */ + init_waitqueue_head(&ls->ls_recover_lock_wait); + + /* + * Once started, dlm_recoverd first looks for ls in lslist, then + * initializes ls_in_recovery as locked in "down" mode. We need + * to wait for the wakeup from dlm_recoverd because in_recovery + * has to start out in down mode. + */ + error = dlm_recoverd_start(ls); if (error) { log_error(ls, "can't start dlm_recoverd %d", error); goto out_callback; } + wait_event(ls->ls_recover_lock_wait, + test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); + ls->ls_kobj.kset = dlm_kset; error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, "%s", ls->ls_name); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 5c1b0e38c7a4..331ea4f94efd 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -140,6 +140,16 @@ struct writequeue_entry { struct connection *con; }; +struct dlm_node_addr { + struct list_head list; + int nodeid; + int addr_count; + struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; +}; + +static LIST_HEAD(dlm_node_addrs); +static DEFINE_SPINLOCK(dlm_node_addrs_spin); + static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; static int dlm_local_count; static int dlm_allow_conn; @@ -264,31 +274,146 @@ static struct connection *assoc2con(int assoc_id) return NULL; } -static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) +static struct dlm_node_addr *find_node_addr(int nodeid) +{ + struct dlm_node_addr *na; + + list_for_each_entry(na, &dlm_node_addrs, list) { + if (na->nodeid == nodeid) + return na; + } + return NULL; +} + +static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) +{ + switch (x->ss_family) { + case AF_INET: { + struct sockaddr_in *sinx = (struct sockaddr_in *)x; + struct sockaddr_in *siny = (struct sockaddr_in *)y; + if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) + return 0; + if (sinx->sin_port != siny->sin_port) + return 0; + break; + } + case AF_INET6: { + struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; + struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; + if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) + return 0; + if (sinx->sin6_port != siny->sin6_port) + return 0; + break; + } + default: + return 0; + } + return 1; +} + +static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, + struct sockaddr *sa_out) { - struct sockaddr_storage addr; - int error; + struct sockaddr_storage sas; + struct dlm_node_addr *na; if (!dlm_local_count) return -1; - error = dlm_nodeid_to_addr(nodeid, &addr); - if (error) - return error; + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (na && na->addr_count) + memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); + spin_unlock(&dlm_node_addrs_spin); + + if (!na) + return -EEXIST; + + if (!na->addr_count) + return -ENOENT; + + if (sas_out) + memcpy(sas_out, &sas, sizeof(struct sockaddr_storage)); + + if (!sa_out) + return 0; if (dlm_local_addr[0]->ss_family == AF_INET) { - struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; - struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; + struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; + struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; ret4->sin_addr.s_addr = in4->sin_addr.s_addr; } else { - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; - struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas; + struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out; ret6->sin6_addr = in6->sin6_addr; } return 0; } +static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) +{ + struct dlm_node_addr *na; + int rv = -EEXIST; + + spin_lock(&dlm_node_addrs_spin); + list_for_each_entry(na, &dlm_node_addrs, list) { + if (!na->addr_count) + continue; + + if (!addr_compare(na->addr[0], addr)) + continue; + + *nodeid = na->nodeid; + rv = 0; + break; + } + spin_unlock(&dlm_node_addrs_spin); + return rv; +} + +int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) +{ + struct sockaddr_storage *new_addr; + struct dlm_node_addr *new_node, *na; + + new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); + if (!new_node) + return -ENOMEM; + + new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS); + if (!new_addr) { + kfree(new_node); + return -ENOMEM; + } + + memcpy(new_addr, addr, len); + + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (!na) { + new_node->nodeid = nodeid; + new_node->addr[0] = new_addr; + new_node->addr_count = 1; + list_add(&new_node->list, &dlm_node_addrs); + spin_unlock(&dlm_node_addrs_spin); + return 0; + } + + if (na->addr_count >= DLM_MAX_ADDR_COUNT) { + spin_unlock(&dlm_node_addrs_spin); + kfree(new_addr); + kfree(new_node); + return -ENOSPC; + } + + na->addr[na->addr_count++] = new_addr; + spin_unlock(&dlm_node_addrs_spin); + kfree(new_node); + return 0; +} + /* Data available on socket or listen socket received a connect */ static void lowcomms_data_ready(struct sock *sk, int count_unused) { @@ -348,7 +473,7 @@ int dlm_lowcomms_connect_node(int nodeid) } /* Make a socket active */ -static int add_sock(struct socket *sock, struct connection *con) +static void add_sock(struct socket *sock, struct connection *con) { con->sock = sock; @@ -358,7 +483,6 @@ static int add_sock(struct socket *sock, struct connection *con) con->sock->sk->sk_state_change = lowcomms_state_change; con->sock->sk->sk_user_data = con; con->sock->sk->sk_allocation = GFP_NOFS; - return 0; } /* Add the port number to an IPv6 or 4 sockaddr and return the address @@ -510,7 +634,7 @@ static void process_sctp_notification(struct connection *con, return; } make_sockaddr(&prim.ssp_addr, 0, &addr_len); - if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { + if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { unsigned char *b=(unsigned char *)&prim.ssp_addr; log_print("reject connect from unknown addr"); print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, @@ -747,7 +871,7 @@ static int tcp_accept_from_sock(struct connection *con) /* Get the new node's NODEID */ make_sockaddr(&peeraddr, 0, &len); - if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { + if (addr_to_nodeid(&peeraddr, &nodeid)) { unsigned char *b=(unsigned char *)&peeraddr; log_print("connect from non cluster node"); print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, @@ -862,7 +986,7 @@ static void sctp_init_assoc(struct connection *con) if (con->retries++ > MAX_CONNECT_RETRIES) return; - if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { + if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { log_print("no address for nodeid %d", con->nodeid); return; } @@ -928,11 +1052,11 @@ static void sctp_init_assoc(struct connection *con) /* Connect a new socket to its peer */ static void tcp_connect_to_sock(struct connection *con) { - int result = -EHOSTUNREACH; struct sockaddr_storage saddr, src_addr; int addr_len; struct socket *sock = NULL; int one = 1; + int result; if (con->nodeid == 0) { log_print("attempt to connect sock 0 foiled"); @@ -944,10 +1068,8 @@ static void tcp_connect_to_sock(struct connection *con) goto out; /* Some odd races can cause double-connects, ignore them */ - if (con->sock) { - result = 0; + if (con->sock) goto out; - } /* Create a socket to communicate with */ result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, @@ -956,8 +1078,11 @@ static void tcp_connect_to_sock(struct connection *con) goto out_err; memset(&saddr, 0, sizeof(saddr)); - if (dlm_nodeid_to_addr(con->nodeid, &saddr)) + result = nodeid_to_addr(con->nodeid, &saddr, NULL); + if (result < 0) { + log_print("no address for nodeid %d", con->nodeid); goto out_err; + } sock->sk->sk_user_data = con; con->rx_action = receive_from_sock; @@ -983,8 +1108,7 @@ static void tcp_connect_to_sock(struct connection *con) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, sizeof(one)); - result = - sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, + result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, O_NONBLOCK); if (result == -EINPROGRESS) result = 0; @@ -1002,11 +1126,17 @@ out_err: * Some errors are fatal and this list might need adjusting. For other * errors we try again until the max number of retries is reached. */ - if (result != -EHOSTUNREACH && result != -ENETUNREACH && - result != -ENETDOWN && result != -EINVAL - && result != -EPROTONOSUPPORT) { + if (result != -EHOSTUNREACH && + result != -ENETUNREACH && + result != -ENETDOWN && + result != -EINVAL && + result != -EPROTONOSUPPORT) { + log_print("connect %d try %d error %d", con->nodeid, + con->retries, result); + mutex_unlock(&con->sock_mutex); + msleep(1000); lowcomms_connect_sock(con); - result = 0; + return; } out: mutex_unlock(&con->sock_mutex); @@ -1044,10 +1174,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con, if (result < 0) { log_print("Failed to set SO_REUSEADDR on socket: %d", result); } - sock->sk->sk_user_data = con; con->rx_action = tcp_accept_from_sock; con->connect_action = tcp_connect_to_sock; - con->sock = sock; /* Bind to our port */ make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); @@ -1358,8 +1486,7 @@ static void send_to_sock(struct connection *con) } cond_resched(); goto out; - } - if (ret <= 0) + } else if (ret < 0) goto send_error; } @@ -1376,7 +1503,6 @@ static void send_to_sock(struct connection *con) if (e->len == 0 && e->users == 0) { list_del(&e->list); free_entry(e); - continue; } } spin_unlock(&con->writequeue_lock); @@ -1394,7 +1520,6 @@ out_connect: mutex_unlock(&con->sock_mutex); if (!test_bit(CF_INIT_PENDING, &con->flags)) lowcomms_connect_sock(con); - return; } static void clean_one_writequeue(struct connection *con) @@ -1414,6 +1539,7 @@ static void clean_one_writequeue(struct connection *con) int dlm_lowcomms_close(int nodeid) { struct connection *con; + struct dlm_node_addr *na; log_print("closing connection to node %d", nodeid); con = nodeid2con(nodeid, 0); @@ -1428,6 +1554,17 @@ int dlm_lowcomms_close(int nodeid) clean_one_writequeue(con); close_connection(con, true); } + + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (na) { + list_del(&na->list); + while (na->addr_count--) + kfree(na->addr[na->addr_count]); + kfree(na); + } + spin_unlock(&dlm_node_addrs_spin); + return 0; } @@ -1577,3 +1714,17 @@ fail_destroy: fail: return error; } + +void dlm_lowcomms_exit(void) +{ + struct dlm_node_addr *na, *safe; + + spin_lock(&dlm_node_addrs_spin); + list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) { + list_del(&na->list); + while (na->addr_count--) + kfree(na->addr[na->addr_count]); + kfree(na); + } + spin_unlock(&dlm_node_addrs_spin); +} diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 1311e6426287..67462e54fc2f 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -16,10 +16,12 @@ int dlm_lowcomms_start(void); void dlm_lowcomms_stop(void); +void dlm_lowcomms_exit(void); int dlm_lowcomms_close(int nodeid); void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); void dlm_lowcomms_commit_buffer(void *mh); int dlm_lowcomms_connect_node(int nodeid); +int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); #endif /* __LOWCOMMS_DOT_H__ */ diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 5a59efa0bb46..079c0bd71ab7 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -17,6 +17,7 @@ #include "user.h" #include "memory.h" #include "config.h" +#include "lowcomms.h" static int __init init_dlm(void) { @@ -78,6 +79,7 @@ static void __exit exit_dlm(void) dlm_config_exit(); dlm_memory_exit(); dlm_lockspace_exit(); + dlm_lowcomms_exit(); dlm_unregister_debugfs(); } diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 862640a36d5c..476557b54921 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls) down_write(&ls->ls_recv_active); /* - * Abort any recovery that's in progress (see RECOVERY_STOP, + * Abort any recovery that's in progress (see RECOVER_STOP, * dlm_recovery_stopped()) and tell any other threads running in the * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). */ spin_lock(&ls->ls_recover_lock); - set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + set_bit(LSFL_RECOVER_STOP, &ls->ls_flags); new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); ls->ls_recover_seq++; spin_unlock(&ls->ls_recover_lock); @@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls) * when recovery is complete. */ - if (new) - down_write(&ls->ls_in_recovery); + if (new) { + set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags); + wake_up_process(ls->ls_recoverd_task); + wait_event(ls->ls_recover_lock_wait, + test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); + } /* * The recoverd suspend/resume makes sure that dlm_recoverd (if - * running) has noticed RECOVERY_STOP above and quit processing the + * running) has noticed RECOVER_STOP above and quit processing the * previous recovery. */ @@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls) kfree(rv_old); } - dlm_recoverd_kick(ls); + set_bit(LSFL_RECOVER_WORK, &ls->ls_flags); + wake_up_process(ls->ls_recoverd_task); return 0; fail: diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 87f1a56eab32..9d61947d473a 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) spin_lock(&ls->ls_recover_lock); status = ls->ls_recover_status; - stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); seq = ls->ls_recover_seq; spin_unlock(&ls->ls_recover_lock); diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 88ce65ff021e..32f9f8926ec3 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq) set_bit(LSFL_RUNNING, &ls->ls_flags); /* unblocks processes waiting to enter the dlm */ up_write(&ls->ls_in_recovery); + clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); error = 0; } spin_unlock(&ls->ls_recover_lock); @@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls) rv = ls->ls_recover_args; ls->ls_recover_args = NULL; if (rv && ls->ls_recover_seq == rv->seq) - clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags); spin_unlock(&ls->ls_recover_lock); if (rv) { @@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg) return -1; } + down_write(&ls->ls_in_recovery); + set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); + wake_up(&ls->ls_recover_lock_wait); + while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); - if (!test_bit(LSFL_WORK, &ls->ls_flags)) + if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && + !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) schedule(); set_current_state(TASK_RUNNING); - if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) + if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { + down_write(&ls->ls_in_recovery); + set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); + wake_up(&ls->ls_recover_lock_wait); + } + + if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags)) do_ls_recovery(ls); } + if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)) + up_write(&ls->ls_in_recovery); + dlm_put_lockspace(ls); return 0; } -void dlm_recoverd_kick(struct dlm_ls *ls) -{ - set_bit(LSFL_WORK, &ls->ls_flags); - wake_up_process(ls->ls_recoverd_task); -} - int dlm_recoverd_start(struct dlm_ls *ls) { struct task_struct *p; diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h index 866657c5d69d..8856079733fa 100644 --- a/fs/dlm/recoverd.h +++ b/fs/dlm/recoverd.h @@ -14,7 +14,6 @@ #ifndef __RECOVERD_DOT_H__ #define __RECOVERD_DOT_H__ -void dlm_recoverd_kick(struct dlm_ls *ls); void dlm_recoverd_stop(struct dlm_ls *ls); int dlm_recoverd_start(struct dlm_ls *ls); void dlm_recoverd_suspend(struct dlm_ls *ls); diff --git a/fs/dlm/user.c b/fs/dlm/user.c index eb4ed9ba3098..7ff49852b0cb 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -503,6 +503,13 @@ static ssize_t device_write(struct file *file, const char __user *buf, #endif return -EINVAL; +#ifdef CONFIG_COMPAT + if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) +#else + if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) +#endif + return -EINVAL; + kbuf = kzalloc(count + 1, GFP_NOFS); if (!kbuf) return -ENOMEM; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 44ce5c6a541d..d45ba4568128 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -275,8 +275,14 @@ out: static int ecryptfs_flush(struct file *file, fl_owner_t td) { - return file->f_mode & FMODE_WRITE - ? filemap_write_and_wait(file->f_mapping) : 0; + struct file *lower_file = ecryptfs_file_to_lower(file); + + if (lower_file->f_op && lower_file->f_op->flush) { + filemap_write_and_wait(file->f_mapping); + return lower_file->f_op->flush(lower_file, td); + } + + return 0; } static int ecryptfs_release(struct inode *inode, struct file *file) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 534b129ea676..cc7709e7c508 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -619,6 +619,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_old_dir_dentry; struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; + struct inode *target_inode; lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); @@ -626,6 +627,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, dget(lower_new_dentry); lower_old_dir_dentry = dget_parent(lower_old_dentry); lower_new_dir_dentry = dget_parent(lower_new_dentry); + target_inode = new_dentry->d_inode; trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); /* source should not be ancestor of target */ if (trap == lower_old_dentry) { @@ -641,6 +643,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, lower_new_dir_dentry->d_inode, lower_new_dentry); if (rc) goto out_lock; + if (target_inode) + fsstack_copy_attr_all(target_inode, + ecryptfs_inode_to_lower(target_inode)); fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); if (new_dir != old_dir) fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 2768138eefee..9b627c15010a 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode) inode_info = ecryptfs_inode_to_private(inode); if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, &inode_info->lower_file_mutex)) { + filemap_write_and_wait(inode->i_mapping); fput(inode_info->lower_file); inode_info->lower_file = NULL; mutex_unlock(&inode_info->lower_file_mutex); diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 376aa77f3ca7..2616d0ea5c5c 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -479,7 +479,7 @@ void ext2_discard_reservation(struct inode *inode) /** * ext2_free_blocks() -- Free given blocks and update quota and i_blocks * @inode: inode - * @block: start physcial block to free + * @block: start physical block to free * @count: number of blocks to free */ void ext2_free_blocks (struct inode * inode, unsigned long block, diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 90d901f0486b..7320a66e958f 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -483,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode) * ext3_free_blocks_sb() -- Free given blocks and update quota * @handle: handle to this transaction * @sb: super block - * @block: start physcial block to free + * @block: start physical block to free * @count: number of blocks to free * @pdquot_freed_blocks: pointer to quota */ diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ff574b4e345e..7e87e37a372a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3207,7 +3207,7 @@ out_brelse: * * - Within generic_file_write() for O_SYNC files. * Here, there will be no transaction running. We wait for any running - * trasnaction to commit. + * transaction to commit. * * - Within sys_sync(), kupdate and such. * We wait on commit, if tol to. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index dff171c3a123..c862ee5fe79d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3313,7 +3313,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle, * handle: The journal handle * inode: The files inode * page: A locked page that contains the offset "from" - * from: The starting byte offset (from the begining of the file) + * from: The starting byte offset (from the beginning of the file) * to begin discarding * len: The length of bytes to discard * flags: Optional flags that may be used: @@ -3321,11 +3321,11 @@ int ext4_discard_partial_page_buffers(handle_t *handle, * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED * Only zero the regions of the page whose buffer heads * have already been unmapped. This flag is appropriate - * for updateing the contents of a page whose blocks may + * for updating the contents of a page whose blocks may * have already been released, and we only want to zero * out the regions that correspond to those released blocks. * - * Returns zero on sucess or negative on failure. + * Returns zero on success or negative on failure. */ static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, struct inode *inode, struct page *page, loff_t from, @@ -3486,7 +3486,7 @@ int ext4_can_truncate(struct inode *inode) * @offset: The offset where the hole will begin * @len: The length of the hole * - * Returns: 0 on sucess or negative on failure + * Returns: 0 on success or negative on failure */ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) @@ -4008,7 +4008,7 @@ static int ext4_inode_blocks_set(handle_t *handle, if (i_blocks <= ~0U) { /* - * i_blocks can be represnted in a 32 bit variable + * i_blocks can be represented in a 32 bit variable * as multiple of 512 bytes */ raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); @@ -4169,7 +4169,7 @@ out_brelse: * * - Within generic_file_write() for O_SYNC files. * Here, there will be no transaction running. We wait for any running - * trasnaction to commit. + * transaction to commit. * * - Within sys_sync(), kupdate and such. * We wait on commit, if tol to. @@ -4413,7 +4413,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) * worse case, the indexs blocks spread over different block groups * * If datablocks are discontiguous, they are possible to spread over - * different block groups too. If they are contiuguous, with flexbg, + * different block groups too. If they are contiguous, with flexbg, * they could still across block group boundary. * * Also account for superblock, inode, quota and xattr blocks diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8eae94771c45..08778f6cdfe9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4709,7 +4709,7 @@ error_return: * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block - * @block: start physcial block to add to the block group + * @block: start physical block to add to the block group * @count: number of blocks to free * * This marks the blocks as free in the bitmap and buddy. diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index be3efc4f64f4..6d46c0d78338 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -577,10 +577,6 @@ static long writeback_chunk_size(struct backing_dev_info *bdi, /* * Write a portion of b_io inodes which belong to @sb. * - * If @only_this_sb is true, then find and write all such - * inodes. Otherwise write only ones which go sequentially - * in reverse order. - * * Return the number of pages and/or inodes written. */ static long writeback_sb_inodes(struct super_block *sb, diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index d6526347d386..01c4975da4bc 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(mapping->host); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + unsigned requested = 0; int alloc_required; int error = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; @@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (error) goto out_unlock; - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); + requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, requested); if (error) goto out_qunlock; } @@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (&ip->i_inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; if (alloc_required) - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, requested); error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); @@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (gfs2_mb_reserved(ip)) - gfs2_inplace_release(ip); + gfs2_inplace_release(ip); if (ip->i_res->rs_qa_qd_num) gfs2_quota_unlock(ip); if (inode == sdp->sd_rindex) { @@ -1023,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); out: - gfs2_glock_dq_m(1, &gh); + gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); return rv; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 49cd7dd4a9fa..1fd3ae237bdd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, goto out_rlist; if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index d1d791ef38de..30e21997a1a1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -323,6 +323,29 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } /** + * gfs2_size_hint - Give a hint to the size of a write request + * @file: The struct file + * @offset: The file offset of the write + * @size: The length of the write + * + * When we are about to do a write, this function records the total + * write size in order to provide a suitable hint to the lower layers + * about how many blocks will be required. + * + */ + +static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) +{ + struct inode *inode = filep->f_dentry->d_inode; + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_inode *ip = GFS2_I(inode); + size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; + int hint = min_t(size_t, INT_MAX, blks); + + atomic_set(&ip->i_res->rs_sizehint, hint); +} + +/** * gfs2_allocate_page_backing - Use bmap to allocate blocks * @page: The (locked) page to allocate backing for * @@ -382,8 +405,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) return ret; - atomic_set(&ip->i_res->rs_sizehint, - PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift); + gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); @@ -419,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) @@ -663,7 +685,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret) return ret; - atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift); + gfs2_size_hint(file, pos, writesize); + if (file->f_flags & O_APPEND) { struct gfs2_holder gh; @@ -789,7 +812,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (unlikely(error)) goto out_uninit; - atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift); + gfs2_size_hint(file, offset, len); while (len > 0) { if (len < bytes) @@ -822,7 +845,7 @@ retry: &max_bytes, &data_blocks, &ind_blocks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(ip); + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1ed81f40da0d..e6c2fd53cab2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -186,20 +186,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) } /** - * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list - * @gl: the glock - * - * If the glock is demotable, then we add it (or move it) to the end - * of the glock LRU list. - */ - -static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) -{ - if (demote_ok(gl)) - gfs2_glock_add_to_lru(gl); -} - -/** * gfs2_glock_put_nolock() - Decrement reference count on glock * @gl: The glock to put * @@ -883,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word) return 0; } -static void wait_on_holder(struct gfs2_holder *gh) +/** + * gfs2_glock_wait - wait on a glock acquisition + * @gh: the glock holder + * + * Returns: 0 on success + */ + +int gfs2_glock_wait(struct gfs2_holder *gh) { unsigned long time1 = jiffies; @@ -894,12 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh) gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + GL_GLOCK_HOLD_INCR, GL_GLOCK_MAX_HOLD); -} - -static void wait_on_demote(struct gfs2_glock *gl) -{ - might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); + return gh->gh_error; } /** @@ -929,19 +917,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, trace_gfs2_demote_rq(gl); } -/** - * gfs2_glock_wait - wait on a glock acquisition - * @gh: the glock holder - * - * Returns: 0 on success - */ - -int gfs2_glock_wait(struct gfs2_holder *gh) -{ - wait_on_holder(gh); - return gh->gh_error; -} - void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { struct va_format vaf; @@ -979,7 +954,7 @@ __acquires(&gl->gl_spin) struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; - int try_lock = 0; + int try_futile = 0; BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) @@ -987,7 +962,7 @@ __acquires(&gl->gl_spin) if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { if (test_bit(GLF_LOCK, &gl->gl_flags)) - try_lock = 1; + try_futile = !may_grant(gl, gh); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) goto fail; } @@ -996,9 +971,8 @@ __acquires(&gl->gl_spin) if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) goto trap_recursive; - if (try_lock && - !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && - !may_grant(gl, gh)) { + if (try_futile && + !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { fail: gh->gh_error = GLR_TRYFAILED; gfs2_holder_wake(gh); @@ -1121,8 +1095,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } - if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) - __gfs2_glock_schedule_for_reclaim(gl); + if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) + gfs2_glock_add_to_lru(gl); + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) @@ -1141,7 +1116,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); - wait_on_demote(gl); + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4bdcf3784187..32cc4fde975c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) /* A shortened, inline version of gfs2_trans_begin() */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); + sb_start_intwrite(sdp->sd_vfs); gfs2_log_reserve(sdp, tr.tr_reserved); BUG_ON(current->journal_info); current->journal_info = &tr; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index aaecc8085fc5..3d469d37345e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -99,9 +99,26 @@ struct gfs2_rgrpd { #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct rb_root rd_rstree; /* multi-block reservation tree */ - u32 rd_rs_cnt; /* count of current reservations */ }; +struct gfs2_rbm { + struct gfs2_rgrpd *rgd; + struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ + u32 offset; /* The offset is bitmap relative */ +}; + +static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; +} + +static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, + const struct gfs2_rbm *rbm2) +{ + return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && + (rbm1->offset == rbm2->offset); +} + enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, @@ -250,18 +267,11 @@ struct gfs2_blkreserv { /* components used during write (step 1): */ atomic_t rs_sizehint; /* hint of the write size */ - /* components used during inplace_reserve (step 2): */ - u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ - - /* components used during get_local_rgrp (step 3): */ - struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ struct rb_node rs_node; /* link to other block reservations */ - - /* components used during block searches and assignments (step 4): */ - struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ - u32 rs_biblk; /* start block relative to the bi */ + struct gfs2_rbm rs_rbm; /* Start of reservation */ u32 rs_free; /* how many blocks are still free */ + u64 rs_inum; /* Inode number for reservation */ /* ancillary quota stuff */ struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 4ce22e547308..381893ceefa4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; - /* The newly created inode needs a reservation so it can allocate - xattrs. At the same time, we want new blocks allocated to the new - dinode to be as contiguous as possible. Since we allocated the - dinode block under the directory's reservation, we transfer - ownership of that reservation to the new inode. The directory - doesn't need a reservation unless it needs a new allocation. */ - ip->i_res = dip->i_res; - dip->i_res = NULL; + error = gfs2_rs_alloc(ip); + if (error) + goto fail_gunlock2; error = gfs2_acl_create(dip, inode); if (error) @@ -737,10 +732,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, brelse(bh); gfs2_trans_end(sdp); - /* Check if we reserved space in the rgrp. Function link_dinode may - not, depending on whether alloc is required. */ - if (gfs2_mb_reserved(dip)) - gfs2_inplace_release(dip); + gfs2_inplace_release(dip); gfs2_quota_unlock(dip); mark_inode_dirty(inode); gfs2_glock_dq_uninit_m(2, ghs); @@ -897,7 +889,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(dip) + + gfs2_rg_blocks(dip, sdp->sd_max_dirres) + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) @@ -1378,7 +1370,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(ndip) + + gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + 4 * RES_DINODE + 4 * RES_LEAF + RES_STATFS + RES_QUOTA + 4, 0); if (error) @@ -1722,7 +1714,9 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name, gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { - ret = generic_setxattr(dentry, name, data, size, flags); + ret = gfs2_rs_alloc(ip); + if (ret == 0) + ret = generic_setxattr(dentry, name, data, size, flags); gfs2_glock_dq(&gh); } gfs2_holder_uninit(&gh); @@ -1757,7 +1751,9 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { - ret = generic_removexattr(dentry, name); + ret = gfs2_rs_alloc(ip); + if (ret == 0) + ret = generic_removexattr(dentry, name); gfs2_glock_dq(&gh); } gfs2_holder_uninit(&gh); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 4a38db739ca0..0fb6539b0c8c 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1289,7 +1289,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) spin_lock(&ls->ls_recover_spin); set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); spin_unlock(&ls->ls_recover_spin); - flush_delayed_work_sync(&sdp->sd_control_work); + flush_delayed_work(&sdp->sd_control_work); /* mounted_lock and control_lock will be purged in dlm recovery */ release: diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e5af9dc420ef..e443966c8106 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -19,6 +19,7 @@ #include <linux/mount.h> #include <linux/gfs2_ondisk.h> #include <linux/quotaops.h> +#include <linux/lockdep.h> #include "gfs2.h" #include "incore.h" @@ -766,6 +767,7 @@ fail: return error; } +static struct lock_class_key gfs2_quota_imutex_key; static int init_inodes(struct gfs2_sbd *sdp, int undo) { @@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) fs_err(sdp, "can't get quota file inode: %d\n", error); goto fail_rindex; } + /* + * i_mutex on quota files is special. Since this inode is hidden system + * file, we are safe to define locking ourselves. + */ + lockdep_set_class(&sdp->sd_quota_inode->i_mutex, + &gfs2_quota_imutex_key); error = gfs2_rindex_update(sdp); if (error) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3bde91645c2..4021deca61ef 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) struct gfs2_holder *ghs, i_gh; unsigned int qx, x; struct gfs2_quota_data *qd; + unsigned reserved; loff_t offset; unsigned int nalloc = 0, blocks; int error; @@ -781,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); - mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); + mutex_lock(&ip->i_inode.i_mutex); for (qx = 0; qx < num_qd; qx++) { error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); @@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) * two blocks need to be updated instead of 1 */ blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; - error = gfs2_inplace_reserve(ip, 1 + - (nalloc * (data_blocks + ind_blocks))); + reserved = 1 + (nalloc * (data_blocks + ind_blocks)); + error = gfs2_inplace_reserve(ip, reserved); if (error) goto out_alloc; if (nalloc) - blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; + blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS; error = gfs2_trans_begin(sdp, blocks, 0); if (error) @@ -1598,7 +1599,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, error = gfs2_inplace_reserve(ip, blocks); if (error) goto out_i; - blocks += gfs2_rg_blocks(ip); + blocks += gfs2_rg_blocks(ip, blocks); } /* Some quotas span block boundaries and can update two blocks, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4d34887a601d..3cc402ce6fea 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,9 +35,6 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -#define RSRV_CONTENTION_FACTOR 4 -#define RGRP_RSRV_MAX_CONTENDERS 2 - #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -67,53 +64,48 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, - struct gfs2_bitmap **rbi); +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, + const struct gfs2_inode *ip, bool nowrap); + /** * gfs2_setbit - Set a bit in the bitmaps - * @rgd: the resource group descriptor - * @buf2: the clone buffer that holds the bitmaps - * @bi: the bitmap structure - * @block: the block to set + * @rbm: The position of the bit to set + * @do_clone: Also set the clone bitmap, if it exists * @new_state: the new state of the block * */ -static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, - struct gfs2_bitmap *bi, u32 block, +static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = bi->bi_len; - const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; + unsigned int buflen = rbm->bi->bi_len; + const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); - end = bi->bi_bh->b_data + bi->bi_offset + buflen; + byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; BUG_ON(byte1 >= end); cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { - printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " - "new_state=%d\n", - (unsigned long long)block, cur_state, new_state); - printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", - (unsigned long long)rgd->rd_addr, - (unsigned long)bi->bi_start); - printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", - (unsigned long)bi->bi_offset, - (unsigned long)bi->bi_len); + printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " + "new_state=%d\n", rbm->offset, cur_state, new_state); + printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", + (unsigned long long)rbm->rgd->rd_addr, + rbm->bi->bi_start); + printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", + rbm->bi->bi_offset, rbm->bi->bi_len); dump_stack(); - gfs2_consist_rgrpd(rgd); + gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (buf2) { - byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); + if (do_clone && rbm->bi->bi_clone) { + byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -121,30 +113,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, /** * gfs2_testbit - test a bit in the bitmaps - * @rgd: the resource group descriptor - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer - * @block: the block to read + * @rbm: The bit to test * + * Returns: The two bit block state of the requested bit */ -static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, - const unsigned char *buffer, - unsigned int buflen, u32 block) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const unsigned char *byte, *end; - unsigned char cur_state; + const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + const u8 *byte; unsigned int bit; - byte = buffer + (block / GFS2_NBBY); - bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; - end = buffer + buflen; - - gfs2_assert(rgd->rd_sbd, byte < end); + byte = buffer + (rbm->offset / GFS2_NBBY); + bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - cur_state = (*byte >> bit) & GFS2_BIT_MASK; - - return cur_state; + return (*byte >> bit) & GFS2_BIT_MASK; } /** @@ -192,7 +175,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) */ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rs_startblk(rs); + u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); if (blk >= startblk + rs->rs_free) return 1; @@ -202,36 +185,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) } /** - * rs_find - Find a rgrp multi-block reservation that contains a given block - * @rgd: The rgrp - * @rgblk: The block we're looking for, relative to the rgrp - */ -static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) -{ - struct rb_node **newn; - int rc; - u64 fsblk = rgblk + rgd->rd_data0; - - spin_lock(&rgd->rd_rsspin); - newn = &rgd->rd_rstree.rb_node; - while (*newn) { - struct gfs2_blkreserv *cur = - rb_entry(*newn, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(fsblk, 1, cur); - if (rc < 0) - newn = &((*newn)->rb_left); - else if (rc > 0) - newn = &((*newn)->rb_right); - else { - spin_unlock(&rgd->rd_rsspin); - return cur; - } - } - spin_unlock(&rgd->rd_rsspin); - return NULL; -} - -/** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. * @buf: the buffer that holds the bitmaps @@ -262,8 +215,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, u64 mask = 0x5555555555555555ULL; u32 bit; - BUG_ON(state > 3); - /* Mask off bits we don't care about at the start of the search */ mask <<= spoint; tmp = gfs2_bit_search(ptr, mask, state); @@ -285,6 +236,131 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, } /** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + break; + } + } + + return 0; +} + +/** + * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned + * @rbm: Position to search (value/result) + * @n_unaligned: Number of unaligned blocks to check + * @len: Decremented for each block found (terminate on zero) + * + * Returns: true if a non-free block is encountered + */ + +static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) +{ + u64 block; + u32 n; + u8 res; + + for (n = 0; n < n_unaligned; n++) { + res = gfs2_testbit(rbm); + if (res != GFS2_BLKST_FREE) + return true; + (*len)--; + if (*len == 0) + return true; + block = gfs2_rbm_to_block(rbm); + if (gfs2_rbm_from_block(rbm, block + 1)) + return true; + } + + return false; +} + +/** + * gfs2_free_extlen - Return extent length of free blocks + * @rbm: Starting position + * @len: Max length to check + * + * Starting at the block specified by the rbm, see how many free blocks + * there are, not reading more than len blocks ahead. This can be done + * using memchr_inv when the blocks are byte aligned, but has to be done + * on a block by block basis in case of unaligned blocks. Also this + * function can cope with bitmap boundaries (although it must stop on + * a resource group boundary) + * + * Returns: Number of free blocks in the extent + */ + +static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) +{ + struct gfs2_rbm rbm = *rrbm; + u32 n_unaligned = rbm.offset & 3; + u32 size = len; + u32 bytes; + u32 chunk_size; + u8 *ptr, *start, *end; + u64 block; + + if (n_unaligned && + gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) + goto out; + + n_unaligned = len & 3; + /* Start is now byte aligned */ + while (len > 3) { + start = rbm.bi->bi_bh->b_data; + if (rbm.bi->bi_clone) + start = rbm.bi->bi_clone; + end = start + rbm.bi->bi_bh->b_size; + start += rbm.bi->bi_offset; + BUG_ON(rbm.offset & 3); + start += (rbm.offset / GFS2_NBBY); + bytes = min_t(u32, len / GFS2_NBBY, (end - start)); + ptr = memchr_inv(start, 0, bytes); + chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); + chunk_size *= GFS2_NBBY; + BUG_ON(len < chunk_size); + len -= chunk_size; + block = gfs2_rbm_to_block(&rbm); + gfs2_rbm_from_block(&rbm, block + chunk_size); + n_unaligned = 3; + if (ptr) + break; + n_unaligned = len & 3; + } + + /* Deal with any bits left over at the end */ + if (n_unaligned) + gfs2_unaligned_extlen(&rbm, n_unaligned, &len); +out: + return size - len; +} + +/** * gfs2_bitcount - count the number of bits in a certain state * @rgd: the resource group descriptor * @buffer: the buffer that holds the bitmaps @@ -487,6 +563,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; + RB_CLEAR_NODE(&res->rs_node); + down_write(&ip->i_rw_mutex); if (ip->i_res) kmem_cache_free(gfs2_rsrv_cachep, res); @@ -496,11 +574,12 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) return error; } -static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) { - gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, - rs->rs_free); + gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + (unsigned long long)rs->rs_inum, + (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.offset, rs->rs_free); } /** @@ -508,41 +587,26 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -static void __rs_deltree(struct gfs2_blkreserv *rs) +static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rgd; - /* We can't do this: The reason is that when the rgrp is invalidated, - it's in the "middle" of acquiring the glock, but the HOLDER bit - isn't set yet: - BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ - trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); - - if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) - rb_erase(&rs->rs_node, &rgd->rd_rstree); - BUG_ON(!rgd->rd_rs_cnt); - rgd->rd_rs_cnt--; + rgd = rs->rs_rbm.rgd; + trace_gfs2_rs(rs, TRACE_RS_TREEDEL); + rb_erase(&rs->rs_node, &rgd->rd_rstree); + RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ - BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); - rs->rs_rgd->rd_reserved -= rs->rs_free; + BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); + rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); smp_mb__after_clear_bit(); } - /* We can't change any of the step 1 or step 2 components of the rs. - E.g. We can't set rs_rgd to NULL because the rgd glock is held and - dequeued through this pointer. - Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_requested = 0; - Can't: rs->rs_rgd = NULL;*/ - rs->rs_bi = NULL; - rs->rs_biblk = 0; } /** @@ -550,17 +614,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - if (!gfs2_rs_active(rs)) - return; - - rgd = rs->rs_rgd; - spin_lock(&rgd->rd_rsspin); - __rs_deltree(rs); - spin_unlock(&rgd->rd_rsspin); + rgd = rs->rs_rbm.rgd; + if (rgd) { + spin_lock(&rgd->rd_rsspin); + __rs_deltree(ip, rs); + spin_unlock(&rgd->rd_rsspin); + } } /** @@ -572,8 +635,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); if (ip->i_res) { - gfs2_rs_deltree(ip->i_res); - trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); + gfs2_rs_deltree(ip, ip->i_res); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; @@ -597,7 +659,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd) spin_lock(&rgd->rd_rsspin); while ((n = rb_first(&rgd->rd_rstree))) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - __rs_deltree(rs); + __rs_deltree(NULL, rs); } spin_unlock(&rgd->rd_rsspin); } @@ -1270,211 +1332,276 @@ out: /** * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree - * @bi: the bitmap with the blocks * @ip: the inode structure - * @biblk: the 32-bit block number relative to the start of the bitmap - * @amount: the number of blocks to reserve * - * Returns: NULL - reservation was already taken, so not inserted - * pointer to the inserted reservation */ -static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, - struct gfs2_inode *ip, u32 biblk, - int amount) +static void rs_insert(struct gfs2_inode *ip) { struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; - u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; + u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); + + BUG_ON(gfs2_rs_active(rs)); spin_lock(&rgd->rd_rsspin); newn = &rgd->rd_rstree.rb_node; - BUG_ON(!ip->i_res); - BUG_ON(gfs2_rs_active(rs)); - /* Figure out where to put new node */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, amount, cur); + rc = rs_cmp(fsblock, rs->rs_free, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) newn = &((*newn)->rb_left); else { spin_unlock(&rgd->rd_rsspin); - return NULL; /* reservation already in use */ + WARN_ON(1); + return; } } - /* Do our reservation work */ - rs = ip->i_res; - rs->rs_free = amount; - rs->rs_biblk = biblk; - rs->rs_bi = bi; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); - /* Do our inode accounting for the reservation */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += amount; /* blocks reserved */ - rgd->rd_rs_cnt++; /* number of in-tree reservations */ + rgd->rd_reserved += rs->rs_free; /* blocks reserved */ spin_unlock(&rgd->rd_rsspin); - trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); - return rs; + trace_gfs2_rs(rs, TRACE_RS_INSERT); } /** - * unclaimed_blocks - return number of blocks that aren't spoken for - */ -static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) -{ - return rgd->rd_free_clone - rgd->rd_reserved; -} - -/** - * rg_mblk_search - find a group of multiple free blocks + * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor - * @rs: the block reservation * @ip: pointer to the inode for which we're reserving blocks + * @requested: number of blocks required for this allocation * - * This is very similar to rgblk_search, except we're looking for whole - * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing - * on aligned dwords for speed's sake. - * - * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, + unsigned requested) { - struct gfs2_bitmap *bi = rgd->rd_bits; - const u32 length = rgd->rd_length; - u32 blk; - unsigned int buf, x, search_bytes; - u8 *buffer = NULL; - u8 *ptr, *end, *nonzero; - u32 goal, rsv_bytes; - struct gfs2_blkreserv *rs; - u32 best_rs_bytes, unclaimed; - int best_rs_blocks; + struct gfs2_rbm rbm = { .rgd = rgd, }; + u64 goal; + struct gfs2_blkreserv *rs = ip->i_res; + u32 extlen; + u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; + int ret; + + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); + extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); + if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) + return; /* Find bitmap block that contains bits for goal block */ if (rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + goal = ip->i_goal; else - goal = rgd->rd_last_alloc; - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within - found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } + goal = rgd->rd_last_alloc + rgd->rd_data0; + + if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) + return; + + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); + if (ret == 0) { + rs->rs_rbm = rbm; + rs->rs_free = extlen; + rs->rs_inum = ip->i_no_addr; + rs_insert(ip); } - buf = 0; - goal = 0; - -do_search: - best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), - (RGRP_RSRV_MINBLKS * rgd->rd_length)); - best_rs_bytes = (best_rs_blocks * - (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / - GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ - best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); - unclaimed = unclaimed_blocks(rgd); - if (best_rs_bytes * GFS2_NBBY > unclaimed) - best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; - - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; +} - if (test_bit(GBF_FULL, &bi->bi_flags)) - goto skip; +/** + * gfs2_next_unreserved_block - Return next block that is not reserved + * @rgd: The resource group + * @block: The starting block + * @length: The required length + * @ip: Ignore any reservations for this inode + * + * If the block does not appear in any reservation, then return the + * block number unchanged. If it does appear in the reservation, then + * keep looking through the tree of reservations in order to find the + * first block number which is not reserved. + */ - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; +static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, + u32 length, + const struct gfs2_inode *ip) +{ + struct gfs2_blkreserv *rs; + struct rb_node *n; + int rc; + + spin_lock(&rgd->rd_rsspin); + n = rgd->rd_rstree.rb_node; + while (n) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(block, length, rs); + if (rc < 0) + n = n->rb_left; + else if (rc > 0) + n = n->rb_right; else - buffer = bi->bi_bh->b_data + bi->bi_offset; - - /* We have to keep the reservations aligned on u64 boundaries - otherwise we could get situations where a byte can't be - used because it's after a reservation, but a free bit still - is within the reservation's area. */ - ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); - end = (buffer + bi->bi_len); - while (ptr < end) { - rsv_bytes = 0; - if ((ptr + best_rs_bytes) <= end) - search_bytes = best_rs_bytes; - else - search_bytes = end - ptr; - BUG_ON(!search_bytes); - nonzero = memchr_inv(ptr, 0, search_bytes); - /* If the lot is all zeroes, reserve the whole size. If - there's enough zeroes to satisfy the request, use - what we can. If there's not enough, keep looking. */ - if (nonzero == NULL) - rsv_bytes = search_bytes; - else if ((nonzero - ptr) * GFS2_NBBY >= - ip->i_res->rs_requested) - rsv_bytes = (nonzero - ptr); - - if (rsv_bytes) { - blk = ((ptr - buffer) * GFS2_NBBY); - BUG_ON(blk >= bi->bi_len * GFS2_NBBY); - rs = rs_insert(bi, ip, blk, - rsv_bytes * GFS2_NBBY); - if (IS_ERR(rs)) - return PTR_ERR(rs); - if (rs) - return 0; - } - ptr += ALIGN(search_bytes, sizeof(u64)); + break; + } + + if (n) { + while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { + block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; + n = n->rb_right; + if (n == NULL) + break; + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); } -skip: - /* Try next bitmap block (wrap back to rgrp header - if at end) */ - buf++; - buf %= length; - goal = 0; } - return BFITNOENT; + spin_unlock(&rgd->rd_rsspin); + return block; } /** - * try_rgrp_fit - See if a given reservation will fit in a given RG - * @rgd: the RG data - * @ip: the inode + * gfs2_reservation_check_and_update - Check for reservations during block alloc + * @rbm: The current position in the resource group + * @ip: The inode for which we are searching for blocks + * @minext: The minimum extent length * - * If there's room for the requested blocks to be allocated from the RG: - * This will try to get a multi-block reservation first, and if that doesn't - * fit, it will take what it can. + * This checks the current position in the rgrp to see whether there is + * a reservation covering this block. If not then this function is a + * no-op. If there is, then the position is moved to the end of the + * contiguous reservation(s) so that we are pointing at the first + * non-reserved block. * - * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) + * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error */ -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, + const struct gfs2_inode *ip, + u32 minext) { - struct gfs2_blkreserv *rs = ip->i_res; + u64 block = gfs2_rbm_to_block(rbm); + u32 extlen = 1; + u64 nblock; + int ret; - if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + /* + * If we have a minimum extent length, then skip over any extent + * which is less than the min extent length in size. + */ + if (minext) { + extlen = gfs2_free_extlen(rbm, minext); + nblock = block + extlen; + if (extlen < minext) + goto fail; + } + + /* + * Check the extent which has been found against the reservations + * and skip if parts of it are already reserved + */ + nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); + if (nblock == block) return 0; - /* Look for a multi-block reservation. */ - if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && - rg_mblk_search(rgd, ip) != BFITNOENT) - return 1; - if (unclaimed_blocks(rgd) >= rs->rs_requested) - return 1; +fail: + ret = gfs2_rbm_from_block(rbm, nblock); + if (ret < 0) + return ret; + return 1; +} - return 0; +/** + * gfs2_rbm_find - Look for blocks of a particular state + * @rbm: Value/result starting position and final position + * @state: The state which we want to find + * @minext: The requested extent length (0 for a single block) + * @ip: If set, check for reservations + * @nowrap: Stop looking at the end of the rgrp, rather than wrapping + * around until we've reached the starting point. + * + * Side effects: + * - If looking for free blocks, we set GBF_FULL on each bitmap which + * has no free blocks in it. + * + * Returns: 0 on success, -ENOSPC if there is no block of the requested state + */ + +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, + const struct gfs2_inode *ip, bool nowrap) +{ + struct buffer_head *bh; + struct gfs2_bitmap *initial_bi; + u32 initial_offset; + u32 offset; + u8 *buffer; + int index; + int n = 0; + int iters = rbm->rgd->rd_length; + int ret; + + /* If we are not starting at the beginning of a bitmap, then we + * need to add one to the bitmap count to ensure that we search + * the starting bitmap twice. + */ + if (rbm->offset != 0) + iters++; + + while(1) { + if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && + (state == GFS2_BLKST_FREE)) + goto next_bitmap; + + bh = rbm->bi->bi_bh; + buffer = bh->b_data + rbm->bi->bi_offset; + WARN_ON(!buffer_uptodate(bh)); + if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) + buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; + initial_offset = rbm->offset; + offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); + if (offset == BFITNOENT) + goto bitmap_full; + rbm->offset = offset; + if (ip == NULL) + return 0; + + initial_bi = rbm->bi; + ret = gfs2_reservation_check_and_update(rbm, ip, minext); + if (ret == 0) + return 0; + if (ret > 0) { + n += (rbm->bi - initial_bi); + goto next_iter; + } + if (ret == -E2BIG) { + index = 0; + rbm->offset = 0; + n += (rbm->bi - initial_bi); + goto res_covered_end_of_rgrp; + } + return ret; + +bitmap_full: /* Mark bitmap as full and fall through */ + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) + set_bit(GBF_FULL, &rbm->bi->bi_flags); + +next_bitmap: /* Find next bitmap in the rgrp */ + rbm->offset = 0; + index = rbm->bi - rbm->rgd->rd_bits; + index++; + if (index == rbm->rgd->rd_length) + index = 0; +res_covered_end_of_rgrp: + rbm->bi = &rbm->rgd->rd_bits[index]; + if ((index == 0) && nowrap) + break; + n++; +next_iter: + if (n >= iters) + break; + } + + return -ENOSPC; } /** @@ -1489,34 +1616,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) { - u32 goal = 0, block; - u64 no_addr; + u64 block; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl; struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_bitmap *bi; + struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; - while (goal < rgd->rd_data) { + while (1) { down_write(&sdp->sd_log_flush_lock); - block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); up_write(&sdp->sd_log_flush_lock); - if (block == BFITNOENT) + if (error == -ENOSPC) + break; + if (WARN_ON_ONCE(error)) break; - block = gfs2_bi2rgd_blk(bi, block); - /* rgblk_search can return a block < goal, so we need to - keep it marching forward. */ - no_addr = block + rgd->rd_data0; - goal = max(block + 1, goal + 1); - if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) + block = gfs2_rbm_to_block(&rbm); + if (gfs2_rbm_from_block(&rbm, block + 1)) + break; + if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) continue; - if (no_addr == skip) + if (block == skip) continue; - *last_unlinked = no_addr; + *last_unlinked = block; - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); + error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); if (error) continue; @@ -1543,6 +1669,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } +static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) +{ + struct gfs2_rgrpd *rgd = *pos; + + rgd = gfs2_rgrpd_get_next(rgd); + if (rgd == NULL) + rgd = gfs2_rgrpd_get_next(NULL); + *pos = rgd; + if (rgd != begin) /* If we didn't wrap */ + return true; + return false; +} + /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1562,103 +1701,96 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - rs->rs_requested = requested; - if (gfs2_assert_warn(sdp, requested)) { - error = -EINVAL; - goto out; - } + if (gfs2_assert_warn(sdp, requested)) + return -EINVAL; if (gfs2_rs_active(rs)) { - begin = rs->rs_rgd; + begin = rs->rs_rbm.rgd; flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { - rs->rs_rgd = begin = ip->i_rgd; + rs->rs_rbm.rgd = begin = ip->i_rgd; } else { - rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (rs->rs_rgd == NULL) + if (rs->rs_rbm.rgd == NULL) return -EBADSLT; while (loops < 3) { - rg_locked = 0; - - if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { - rg_locked = 1; - error = 0; - } else if (!loops && !gfs2_rs_active(rs) && - rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { - /* If the rgrp already is maxed out for contenders, - we can eliminate it as a "first pass" without even - requesting the rgrp glock. */ - error = GLR_TRYFAILED; - } else { - error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + rg_locked = 1; + + if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { + rg_locked = 0; + error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); - if (!error && sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rgd); - if (error) { + if (error == GLR_TRYFAILED) + goto next_rgrp; + if (unlikely(error)) + return error; + if (sdp->sd_args.ar_rgrplvb) { + error = update_rgrp_lvb(rs->rs_rbm.rgd); + if (unlikely(error)) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; } } } - switch (error) { - case 0: - if (gfs2_rs_active(rs)) { - if (unclaimed_blocks(rs->rs_rgd) + - rs->rs_free >= rs->rs_requested) { - ip->i_rgd = rs->rs_rgd; - return 0; - } - /* We have a multi-block reservation, but the - rgrp doesn't have enough free blocks to - satisfy the request. Free the reservation - and look for a suitable rgrp. */ - gfs2_rs_deltree(rs); - } - if (try_rgrp_fit(rs->rs_rgd, ip)) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - ip->i_rgd = rs->rs_rgd; - return 0; - } - if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - try_rgrp_unlink(rs->rs_rgd, &last_unlinked, - ip->i_no_addr); - } - if (!rg_locked) - gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - /* fall through */ - case GLR_TRYFAILED: - rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); - rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ - if (rs->rs_rgd != begin) /* If we didn't wrap */ - break; - flags &= ~LM_FLAG_TRY; - loops++; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && - !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - goto out; - } else if (loops == 2) - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - break; - default: - goto out; + /* Skip unuseable resource groups */ + if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + goto skip_rgrp; + + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + + /* Get a reservation if we don't already have one */ + if (!gfs2_rs_active(rs)) + rg_mblk_search(rs->rs_rbm.rgd, ip, requested); + + /* Skip rgrps when we can't get a reservation on first pass */ + if (!gfs2_rs_active(rs) && (loops < 1)) + goto check_rgrp; + + /* If rgrp has enough free space, use it */ + if (rs->rs_rbm.rgd->rd_free_clone >= requested) { + ip->i_rgd = rs->rs_rbm.rgd; + return 0; + } + + /* Drop reservation, if we couldn't use reserved rgrp */ + if (gfs2_rs_active(rs)) + gfs2_rs_deltree(ip, rs); +check_rgrp: + /* Check for unlinked inodes which can be reclaimed */ + if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, + ip->i_no_addr); +skip_rgrp: + /* Unlock rgrp if required */ + if (!rg_locked) + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); +next_rgrp: + /* Find the next rgrp, and continue looking */ + if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) + continue; + + /* If we've scanned all the rgrps, but found no free blocks + * then this checks for some less likely conditions before + * trying again. + */ + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + return error; } + /* Flushing the log may release space */ + if (loops == 2) + gfs2_log_flush(sdp, NULL); } - error = -ENOSPC; -out: - if (error) - rs->rs_requested = 0; - return error; + return -ENOSPC; } /** @@ -1672,15 +1804,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; - if (!rs) - return; - - if (!rs->rs_free) - gfs2_rs_deltree(rs); - if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - rs->rs_requested = 0; } /** @@ -1693,173 +1818,47 @@ void gfs2_inplace_release(struct gfs2_inode *ip) static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) { - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_block, buf_block; - unsigned int buf; - unsigned char type; - - length = rgd->rd_length; - rgrp_block = block - rgd->rd_data0; - - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; - } + struct gfs2_rbm rbm = { .rgd = rgd, }; + int ret; - gfs2_assert(rgd->rd_sbd, buf < length); - buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; + ret = gfs2_rbm_from_block(&rbm, block); + WARN_ON_ONCE(ret != 0); - type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, buf_block); - - return type; + return gfs2_testbit(&rbm); } -/** - * rgblk_search - find a block in @state - * @rgd: the resource group descriptor - * @goal: the goal block within the RG (start here to search for avail block) - * @state: GFS2_BLKST_XXX the before-allocation state to find - * @rbi: address of the pointer to the bitmap containing the block found - * - * Walk rgrp's bitmap to find bits that represent a block in @state. - * - * This function never fails, because we wouldn't call it unless we - * know (from reservation results, etc.) that a block is available. - * - * Scope of @goal is just within rgrp, not the whole filesystem. - * Scope of @returned block is just within bitmap, not the whole filesystem. - * - * Returns: the block number found relative to the bitmap rbi - */ - -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, - struct gfs2_bitmap **rbi) -{ - struct gfs2_bitmap *bi = NULL; - const u32 length = rgd->rd_length; - u32 biblk = BFITNOENT; - unsigned int buf, x; - const u8 *buffer = NULL; - - *rbi = NULL; - /* Find bitmap block that contains bits for goal block */ - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } - } - buf = 0; - goal = 0; - -do_search: - /* Search (up to entire) bitmap in this rgrp for allocatable block. - "x <= length", instead of "x < length", because we typically start - the search in the middle of a bit block, but if we can't find an - allocatable block anywhere else, we want to be able wrap around and - search in the first part of our first-searched bit block. */ - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; - - if (test_bit(GBF_FULL, &bi->bi_flags) && - (state == GFS2_BLKST_FREE)) - goto skip; - - /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone - bitmaps, so we must search the originals for that. */ - buffer = bi->bi_bh->b_data + bi->bi_offset; - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; - - while (1) { - struct gfs2_blkreserv *rs; - u32 rgblk; - - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); - if (biblk == BFITNOENT) - break; - /* Check if this block is reserved() */ - rgblk = gfs2_bi2rgd_blk(bi, biblk); - rs = rs_find(rgd, rgblk); - if (rs == NULL) - break; - - BUG_ON(rs->rs_bi != bi); - biblk = BFITNOENT; - /* This should jump to the first block after the - reservation. */ - goal = rs->rs_biblk + rs->rs_free; - if (goal >= bi->bi_len * GFS2_NBBY) - break; - } - if (biblk != BFITNOENT) - break; - - if ((goal == 0) && (state == GFS2_BLKST_FREE)) - set_bit(GBF_FULL, &bi->bi_flags); - - /* Try next bitmap block (wrap back to rgrp header if at end) */ -skip: - buf++; - buf %= length; - goal = 0; - } - - if (biblk != BFITNOENT) - *rbi = bi; - - return biblk; -} /** * gfs2_alloc_extent - allocate an extent from a given bitmap - * @rgd: the resource group descriptor - * @bi: the bitmap within the rgrp - * @blk: the block within the bitmap + * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode - * @n: The extent length + * @n: The extent length (value/result) * - * Add the found bitmap buffer to the transaction. + * Add the bitmap buffer to the transaction. * Set the found bits to @new_state to change block's allocation state. - * Returns: starting block number of the extent (fs scope) */ -static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, - u32 blk, bool dinode, unsigned int *n) +static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, + unsigned int *n) { + struct gfs2_rbm pos = { .rgd = rbm->rgd, }; const unsigned int elen = *n; - u32 goal, rgblk; - const u8 *buffer = NULL; - struct gfs2_blkreserv *rs; - - *n = 0; - buffer = bi->bi_bh->b_data + bi->bi_offset; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_clone, bi, blk, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - (*n)++; - goal = blk; + u64 block; + int ret; + + *n = 1; + block = gfs2_rbm_to_block(rbm); + gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); + gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + block++; while (*n < elen) { - goal++; - if (goal >= (bi->bi_len * GFS2_NBBY)) - break; - rgblk = gfs2_bi2rgd_blk(bi, goal); - rs = rs_find(rgd, rgblk); - if (rs) /* Oops, we bumped into someone's reservation */ - break; - if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != - GFS2_BLKST_FREE) + ret = gfs2_rbm_from_block(&pos, block); + if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); + gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); + gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; + block++; } - blk = gfs2_bi2rgd_blk(bi, blk); - rgd->rd_last_alloc = blk + *n - 1; - return rgd->rd_data0 + blk; } /** @@ -1875,46 +1874,30 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { - struct gfs2_rgrpd *rgd; - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_blk, buf_blk; - unsigned int buf; + struct gfs2_rbm rbm; - rgd = gfs2_blk2rgrpd(sdp, bstart, 1); - if (!rgd) { + rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); + if (!rbm.rgd) { if (gfs2_consist(sdp)) fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); return NULL; } - length = rgd->rd_length; - - rgrp_blk = bstart - rgd->rd_data0; - while (blen--) { - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; - } - - gfs2_assert(rgd->rd_sbd, buf < length); - - buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; - rgrp_blk++; - - if (!bi->bi_clone) { - bi->bi_clone = kmalloc(bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(bi->bi_clone + bi->bi_offset, - bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len); + gfs2_rbm_from_block(&rbm, bstart); + bstart++; + if (!rbm.bi->bi_clone) { + rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, + rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len); } - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); + gfs2_setbit(&rbm, false, new_state); } - return rgd; + return rbm.rgd; } /** @@ -1956,56 +1939,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) } /** - * claim_reserved_blks - Claim previously reserved blocks - * @ip: the inode that's claiming the reservation - * @dinode: 1 if this block is a dinode block, otherwise data block - * @nblocks: desired extent length + * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation + * @ip: The inode we have just allocated blocks for + * @rbm: The start of the allocated blocks + * @len: The extent length * - * Lay claim to previously allocated block reservation blocks. - * Returns: Starting block number of the blocks claimed. - * Sets *nblocks to the actual extent length allocated. + * Adjusts a reservation after an allocation has taken place. If the + * reservation does not match the allocation, or if it is now empty + * then it is removed. */ -static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, - unsigned int *nblocks) + +static void gfs2_adjust_reservation(struct gfs2_inode *ip, + const struct gfs2_rbm *rbm, unsigned len) { struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_bitmap *bi; - u64 start_block = gfs2_rs_startblk(rs); - const unsigned int elen = *nblocks; - - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - gfs2_assert_withdraw(sdp, rgd); - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ - bi = rs->rs_bi; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - - for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { - /* Make sure the bitmap hasn't changed */ - gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - rs->rs_biblk++; - rs->rs_free--; - - BUG_ON(!rgd->rd_reserved); - rgd->rd_reserved--; - dinode = false; - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); - } - - if (!rs->rs_free) { - struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; + struct gfs2_rgrpd *rgd = rbm->rgd; + unsigned rlen; + u64 block; + int ret; - gfs2_rs_deltree(rs); - /* -nblocks because we haven't returned to do the math yet. - I'm doing the math backwards to prevent negative numbers, - but think of it as: - if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ - if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) - rg_mblk_search(rgd, ip); + spin_lock(&rgd->rd_rsspin); + if (gfs2_rs_active(rs)) { + if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { + block = gfs2_rbm_to_block(rbm); + ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); + rlen = min(rs->rs_free, len); + rs->rs_free -= rlen; + rgd->rd_reserved -= rlen; + trace_gfs2_rs(rs, TRACE_RS_CLAIM); + if (rs->rs_free && !ret) + goto out; + } + __rs_deltree(ip, rs); } - return start_block; +out: + spin_unlock(&rgd->rd_rsspin); } /** @@ -2024,47 +1992,40 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rgrpd *rgd; + struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u32 goal, blk; /* block, within the rgrp scope */ + u64 goal; u64 block; /* block, within the file system scope */ int error; - struct gfs2_bitmap *bi; - /* Only happens if there is a bug in gfs2, return something distinctive - * to ensure that it is noticed. - */ - if (ip->i_res->rs_requested == 0) - return -ECANCELED; - - /* Check if we have a multi-block reservation, and if so, claim the - next free block from it. */ - if (gfs2_rs_active(ip->i_res)) { - BUG_ON(!ip->i_res->rs_free); - rgd = ip->i_res->rs_rgd; - block = claim_reserved_blks(ip, dinode, nblocks); - } else { - rgd = ip->i_rgd; + if (gfs2_rs_active(ip->i_res)) + goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); + else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal; + else + goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; - else - goal = rgd->rd_last_alloc; - - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); - - /* Since all blocks are reserved in advance, this shouldn't - happen */ - if (blk == BFITNOENT) { - printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", - *nblocks); - printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); - goto rgrp_error; - } + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + if (error == -ENOSPC) { + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); + } + + /* Since all blocks are reserved in advance, this shouldn't happen */ + if (error) { + fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", + (unsigned long long)ip->i_no_addr, error, *nblocks, + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); + goto rgrp_error; } + + gfs2_alloc_extent(&rbm, dinode, nblocks); + block = gfs2_rbm_to_block(&rbm); + rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; + if (gfs2_rs_active(ip->i_res)) + gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; if (dinode) ndata--; @@ -2081,22 +2042,22 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rgd->rd_free < *nblocks) { + if (rbm.rgd->rd_free < *nblocks) { printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; } - rgd->rd_free -= *nblocks; + rbm.rgd->rd_free -= *nblocks; if (dinode) { - rgd->rd_dinodes++; - *generation = rgd->rd_igeneration++; + rbm.rgd->rd_dinodes++; + *generation = rbm.rgd->rd_igeneration++; if (*generation == 0) - *generation = rgd->rd_igeneration++; + *generation = rbm.rgd->rd_igeneration++; } - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); - gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); + gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2110,14 +2071,14 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, ip->i_inode.i_gid); - rgd->rd_free_clone -= *nblocks; - trace_gfs2_block_alloc(ip, rgd, block, *nblocks, + rbm.rgd->rd_free_clone -= *nblocks; + trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; rgrp_error: - gfs2_rgrp_error(rgd); + gfs2_rgrp_error(rbm.rgd); return -EIO; } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index ca6e26729b86..24077958dcf6 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern int gfs2_rs_alloc(struct gfs2_inode *ip); -extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); +extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs); extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); @@ -73,30 +73,10 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); -/* This is how to tell if a multi-block reservation is "inplace" reserved: */ -static inline int gfs2_mb_reserved(struct gfs2_inode *ip) +/* This is how to tell if a reservation is in the rgrp tree: */ +static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) { - if (ip->i_res && ip->i_res->rs_requested) - return 1; - return 0; -} - -/* This is how to tell if a multi-block reservation is in the rgrp tree: */ -static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) -{ - if (rs && rs->rs_bi) - return 1; - return 0; -} - -static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) -{ - return (bi->bi_start * GFS2_NBBY) + blk; -} - -static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs) -{ - return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0; + return rs && !RB_EMPTY_NODE(&rs->rs_node); } #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fc3168f47a14..bc737261f234 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) val = sdp->sd_tune.gt_statfs_quantum; if (val != 30) seq_printf(s, ",statfs_quantum=%d", val); + else if (sdp->sd_tune.gt_statfs_slow) + seq_puts(s, ",statfs_quantum=0"); val = sdp->sd_tune.gt_quota_quantum; if (val != 60) seq_printf(s, ",quota_quantum=%d", val); @@ -1543,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode) out_truncate: gfs2_log_flush(sdp, ip->i_gl); + if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { + struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); + filemap_fdatawrite(metamapping); + filemap_fdatawait(metamapping); + } write_inode_now(inode, 1); gfs2_ail_flush(ip->i_gl, 0); @@ -1557,7 +1564,7 @@ out_truncate: out_unlock: /* Error path for case 1 */ if (gfs2_rs_active(ip->i_res)) - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) gfs2_glock_dq(&ip->i_iopen_gh); @@ -1572,7 +1579,7 @@ out: clear_inode(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; - flush_delayed_work_sync(&ip->i_gl->gl_work); + flush_delayed_work(&ip->i_gl->gl_work); gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); ip->i_gl = NULL; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index a25c252fe412..bbdc78af60ca 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc, /* Keep track of multi-block reservations as they are allocated/freed */ TRACE_EVENT(gfs2_rs, - TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, - u8 func), + TP_PROTO(const struct gfs2_blkreserv *rs, u8 func), - TP_ARGS(ip, rs, func), + TP_ARGS(rs, func), TP_STRUCT__entry( __field( dev_t, dev ) @@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs, ), TP_fast_assign( - __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; - __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; - __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; - __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; - __entry->inum = ip ? ip->i_no_addr : 0; - __entry->start = gfs2_rs_startblk(rs); + __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev; + __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; + __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; + __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; + __entry->inum = rs->rs_inum; + __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); __entry->free = rs->rs_free; __entry->func = func; ), - TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " - "f:%lu", + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 41f42cdccbb8..bf2ae9aeee7a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -28,11 +28,10 @@ struct gfs2_glock; /* reserve either the number of blocks to be allocated plus the rg header * block, or all of the blocks in the rg, whichever is smaller */ -static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) +static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) { - const struct gfs2_blkreserv *rs = ip->i_res; - if (rs && rs->rs_requested < ip->i_rgd->rd_length) - return rs->rs_requested + 1; + if (requested < ip->i_rgd->rd_length) + return requested + 1; return ip->i_rgd->rd_length; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 27a0b4a901f5..db330e5518cd 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) } /** - * ea_get_unstuffed - actually copies the unstuffed data into the - * request buffer + * ea_iter_unstuffed - copies the unstuffed xattr data to/from the + * request buffer * @ip: The GFS2 inode * @ea: The extended attribute header structure - * @data: The data to be copied + * @din: The data to be copied in + * @dout: The data to be copied out (one of din,dout will be NULL) * * Returns: errno */ -static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, - char *data) +static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, + const char *din, char *dout) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head **bh; @@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); unsigned int x; int error = 0; + unsigned char *pos; + unsigned cp_size; bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); if (!bh) @@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, goto out; } - memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header), - (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); + pos = bh[x]->b_data + sizeof(struct gfs2_meta_header); + cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize; - amount -= sdp->sd_jbsize; - data += sdp->sd_jbsize; + if (dout) { + memcpy(dout, pos, cp_size); + dout += sdp->sd_jbsize; + } + + if (din) { + gfs2_trans_add_bh(ip->i_gl, bh[x], 1); + memcpy(pos, din, cp_size); + din += sdp->sd_jbsize; + } + amount -= sdp->sd_jbsize; brelse(bh[x]); } @@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, memcpy(data, GFS2_EA2DATA(el->el_ea), len); return len; } - ret = ea_get_unstuffed(ip, el->el_ea, data); + ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data); if (ret < 0) return ret; return len; @@ -727,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out_gunlock_q; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), - blks + gfs2_rg_blocks(ip) + + blks + gfs2_rg_blocks(ip, blks) + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipres; @@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name, size, flags, type); } + static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, char *data) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head **bh; unsigned int amount = GFS2_EA_DATA_LEN(ea); unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); - __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); - unsigned int x; - int error; - - bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); - if (!bh) - return -ENOMEM; - - error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); - if (error) - goto out; - - for (x = 0; x < nptrs; x++) { - error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, - bh + x); - if (error) { - while (x--) - brelse(bh[x]); - goto fail; - } - dataptrs++; - } - - for (x = 0; x < nptrs; x++) { - error = gfs2_meta_wait(sdp, bh[x]); - if (error) { - for (; x < nptrs; x++) - brelse(bh[x]); - goto fail; - } - if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { - for (; x < nptrs; x++) - brelse(bh[x]); - error = -EIO; - goto fail; - } - - gfs2_trans_add_bh(ip->i_gl, bh[x], 1); - - memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data, - (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); - - amount -= sdp->sd_jbsize; - data += sdp->sd_jbsize; - - brelse(bh[x]); - } + int ret; -out: - kfree(bh); - return error; + ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); + if (ret) + return ret; -fail: + ret = gfs2_iter_unstuffed(ip, ea, data, NULL); gfs2_trans_end(sdp); - kfree(bh); - return error; + + return ret; } int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index ee1bc55677f1..553909395270 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -644,7 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, /* sync the superblock to buffers */ sb = inode->i_sb; - flush_delayed_work_sync(&HFS_SB(sb)->mdb_work); + flush_delayed_work(&HFS_SB(sb)->mdb_work); /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); if (!ret) diff --git a/fs/libfs.c b/fs/libfs.c index a74cb1725ac6..7cc37ca19cd8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -874,7 +874,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, EXPORT_SYMBOL_GPL(generic_fh_to_dentry); /** - * generic_fh_to_dentry - generic helper for the fh_to_parent export operation + * generic_fh_to_parent - generic helper for the fh_to_parent export operation * @sb: filesystem to do the file handle conversion on * @fid: file handle to convert * @fh_len: length of the file handle in bytes diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index fb1a2bedbe97..8d80c990dffd 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref) dprintk("lockd: freeing block %p...\n", block); /* Remove block from file's list of blocks */ - mutex_lock(&file->f_mutex); list_del_init(&block->b_flist); mutex_unlock(&file->f_mutex); @@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref) static void nlmsvc_release_block(struct nlm_block *block) { if (block != NULL) - kref_put(&block->b_count, nlmsvc_free_block); + kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex); } /* diff --git a/fs/namespace.c b/fs/namespace.c index 4d31f73e2561..7bdf7907413f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) return err; err = -EINVAL; - if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt))) - goto unlock; + if (unlikely(!check_mnt(real_mount(path->mnt)))) { + /* that's acceptable only for automounts done in private ns */ + if (!(mnt_flags & MNT_SHRINKABLE)) + goto unlock; + /* ... and for those we'd better have mountpoint still alive */ + if (!real_mount(path->mnt)->mnt_ns) + goto unlock; + } /* Refuse the same filesystem on the same mount point */ err = -EBUSY; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 333df07ae3bd..eaa74323663a 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -314,11 +314,11 @@ static void ncp_stop_tasks(struct ncp_server *server) { release_sock(sk); del_timer_sync(&server->timeout_tm); - flush_work_sync(&server->rcv.tq); + flush_work(&server->rcv.tq); if (sk->sk_socket->type == SOCK_STREAM) - flush_work_sync(&server->tx.tq); + flush_work(&server->tx.tq); else - flush_work_sync(&server->timeout_tq); + flush_work(&server->timeout_tq); } static int ncp_show_options(struct seq_file *seq, struct dentry *root) diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6930bec91bca..1720d32ffa54 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -117,8 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) timeout = 5 * HZ; dprintk("%s: requeueing work. Lease period = %ld\n", __func__, (timeout + HZ - 1) / HZ); - cancel_delayed_work(&clp->cl_renewd); - schedule_delayed_work(&clp->cl_renewd, timeout); + mod_delayed_work(system_wq, &clp->cl_renewd, timeout); set_bit(NFS_CS_RENEWD, &clp->cl_res_state); spin_unlock(&clp->cl_lock); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b8eda700584b..d2c7f5db0847 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1537,7 +1537,7 @@ static int nfs_parse_mount_options(char *raw, /* * verify that any proto=/mountproto= options match the address - * familiies in the addr=/mountaddr= options. + * families in the addr=/mountaddr= options. */ if (protofamily != AF_UNSPEC && protofamily != mnt->nfs_server.address.ss_family) diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 8f9cea1597af..c19897d0fe14 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -327,5 +327,5 @@ void o2quo_exit(void) { struct o2quo_state *qs = &o2quo_state; - flush_work_sync(&qs->qs_work); + flush_work(&qs->qs_work); } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index dfafeb2b05a0..eb7cc91b7258 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -462,9 +462,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); - if (h) - sysctl_head_finish(h); - if (!inode) goto out; @@ -473,6 +470,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, d_add(dentry, inode); out: + if (h) + sysctl_head_finish(h); sysctl_head_finish(head); return err; } diff --git a/fs/stat.c b/fs/stat.c index b6ff11825fc8..208039eec6c7 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) { int fput_needed; - struct file *f = fget_light(fd, &fput_needed); + struct file *f = fget_raw_light(fd, &fput_needed); int error = -EBADF; if (f) { @@ -326,7 +326,7 @@ SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf, /* ---------- LFS-64 ----------- */ -#ifdef __ARCH_WANT_STAT64 +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) #ifndef INIT_STRUCT_STAT64_PADDING # define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) @@ -415,7 +415,7 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, return error; return cp_new_stat64(&stat, statbuf); } -#endif /* __ARCH_WANT_STAT64 */ +#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ void __inode_add_bytes(struct inode *inode, loff_t bytes) diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index a7ac78f8e67a..3c9eb5624f5e 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -113,7 +113,7 @@ int sysfs_create_link(struct kobject *kobj, struct kobject *target, * @target: object we're pointing to. * @name: name of the symlink. * - * This function does the same as sysf_create_link(), but it + * This function does the same as sysfs_create_link(), but it * doesn't warn if the link already exists. */ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, diff --git a/fs/xattr.c b/fs/xattr.c index 4d45b7189e7e..014f11321fd9 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -791,3 +791,183 @@ EXPORT_SYMBOL(generic_getxattr); EXPORT_SYMBOL(generic_listxattr); EXPORT_SYMBOL(generic_setxattr); EXPORT_SYMBOL(generic_removexattr); + +/* + * Allocate new xattr and copy in the value; but leave the name to callers. + */ +struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) +{ + struct simple_xattr *new_xattr; + size_t len; + + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len <= sizeof(*new_xattr)) + return NULL; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return NULL; + + new_xattr->size = size; + memcpy(new_xattr->value, value, size); + return new_xattr; +} + +/* + * xattr GET operation for in-memory/pseudo filesystems + */ +int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, + void *buffer, size_t size) +{ + struct simple_xattr *xattr; + int ret = -ENODATA; + + spin_lock(&xattrs->lock); + list_for_each_entry(xattr, &xattrs->head, list) { + if (strcmp(name, xattr->name)) + continue; + + ret = xattr->size; + if (buffer) { + if (size < xattr->size) + ret = -ERANGE; + else + memcpy(buffer, xattr->value, xattr->size); + } + break; + } + spin_unlock(&xattrs->lock); + return ret; +} + +static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags) +{ + struct simple_xattr *xattr; + struct simple_xattr *uninitialized_var(new_xattr); + int err = 0; + + /* value == NULL means remove */ + if (value) { + new_xattr = simple_xattr_alloc(value, size); + if (!new_xattr) + return -ENOMEM; + + new_xattr->name = kstrdup(name, GFP_KERNEL); + if (!new_xattr->name) { + kfree(new_xattr); + return -ENOMEM; + } + } + + spin_lock(&xattrs->lock); + list_for_each_entry(xattr, &xattrs->head, list) { + if (!strcmp(name, xattr->name)) { + if (flags & XATTR_CREATE) { + xattr = new_xattr; + err = -EEXIST; + } else if (new_xattr) { + list_replace(&xattr->list, &new_xattr->list); + } else { + list_del(&xattr->list); + } + goto out; + } + } + if (flags & XATTR_REPLACE) { + xattr = new_xattr; + err = -ENODATA; + } else { + list_add(&new_xattr->list, &xattrs->head); + xattr = NULL; + } +out: + spin_unlock(&xattrs->lock); + if (xattr) { + kfree(xattr->name); + kfree(xattr); + } + return err; + +} + +/** + * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems + * @xattrs: target simple_xattr list + * @name: name of the new extended attribute + * @value: value of the new xattr. If %NULL, will remove the attribute + * @size: size of the new xattr + * @flags: %XATTR_{CREATE|REPLACE} + * + * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails + * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist; + * otherwise, fails with -ENODATA. + * + * Returns 0 on success, -errno on failure. + */ +int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags) +{ + if (size == 0) + value = ""; /* empty EA, do not remove */ + return __simple_xattr_set(xattrs, name, value, size, flags); +} + +/* + * xattr REMOVE operation for in-memory/pseudo filesystems + */ +int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name) +{ + return __simple_xattr_set(xattrs, name, NULL, 0, XATTR_REPLACE); +} + +static bool xattr_is_trusted(const char *name) +{ + return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); +} + +/* + * xattr LIST operation for in-memory/pseudo filesystems + */ +ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer, + size_t size) +{ + bool trusted = capable(CAP_SYS_ADMIN); + struct simple_xattr *xattr; + size_t used = 0; + + spin_lock(&xattrs->lock); + list_for_each_entry(xattr, &xattrs->head, list) { + size_t len; + + /* skip "trusted." attributes for unprivileged callers */ + if (!trusted && xattr_is_trusted(xattr->name)) + continue; + + len = strlen(xattr->name) + 1; + used += len; + if (buffer) { + if (size < used) { + used = -ERANGE; + break; + } + memcpy(buffer, xattr->name, len); + buffer += len; + } + } + spin_unlock(&xattrs->lock); + + return used; +} + +/* + * Adds an extended attribute to the list + */ +void simple_xattr_list_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr) +{ + spin_lock(&xattrs->lock); + list_add(&new_xattr->list, &xattrs->head); + spin_unlock(&xattrs->lock); +} diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d7a9dd735e1e..933b7930b863 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -96,6 +96,7 @@ xfs_buf_lru_add( atomic_inc(&bp->b_hold); list_add_tail(&bp->b_lru, &btp->bt_lru); btp->bt_lru_nr++; + bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; } spin_unlock(&btp->bt_lru_lock); } @@ -154,7 +155,8 @@ xfs_buf_stale( struct xfs_buftarg *btp = bp->b_target; spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { + if (!list_empty(&bp->b_lru) && + !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { list_del_init(&bp->b_lru); btp->bt_lru_nr--; atomic_dec(&bp->b_hold); @@ -1501,6 +1503,7 @@ xfs_buftarg_shrink( */ list_move(&bp->b_lru, &dispose); btp->bt_lru_nr--; + bp->b_lru_flags |= _XBF_LRU_DISPOSE; } spin_unlock(&btp->bt_lru_lock); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d03b73b9604e..7c0b6a0a1557 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -38,27 +38,28 @@ typedef enum { XBRW_ZERO = 3, /* Zero target memory */ } xfs_buf_rw_t; -#define XBF_READ (1 << 0) /* buffer intended for reading from device */ -#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ -#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ -#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ /* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ +#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ +#define XBF_FUA (1 << 11)/* force cache write through mode */ +#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ /* flags used only as arguments to access routines */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ +#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ +#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ /* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ -#define _XBF_COMPOUND (1 << 23)/* compound buffer */ +#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ +#define _XBF_KMEM (1 << 21)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ +#define _XBF_COMPOUND (1 << 23)/* compound buffer */ +#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ typedef unsigned int xfs_buf_flags_t; @@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t; { XBF_SYNCIO, "SYNCIO" }, \ { XBF_FUA, "FUA" }, \ { XBF_FLUSH, "FLUSH" }, \ - { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ + { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" } + { _XBF_COMPOUND, "COMPOUND" }, \ + { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } typedef struct xfs_buftarg { dev_t bt_dev; @@ -124,7 +126,12 @@ typedef struct xfs_buf { xfs_buf_flags_t b_flags; /* status flags */ struct semaphore b_sema; /* semaphore for lockables */ + /* + * concurrent access to b_lru and b_lru_flags are protected by + * bt_lru_lock and not by b_sema + */ struct list_head b_lru; /* lru list */ + xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bdaf4cb9f4a2..001537f92caf 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -919,6 +919,7 @@ xfs_fs_put_super( struct xfs_mount *mp = XFS_M(sb); xfs_filestream_unmount(mp); + cancel_delayed_work_sync(&mp->m_sync_work); xfs_unmountfs(mp); xfs_syncd_stop(mp); xfs_freesb(mp); @@ -953,7 +954,7 @@ xfs_fs_sync_fs( * We schedule xfssyncd now (now that the disk is * active) instead of later (when it might not be). */ - flush_delayed_work_sync(&mp->m_sync_work); + flush_delayed_work(&mp->m_sync_work); } return 0; diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 96548176db80..9500caf15acf 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -475,7 +475,7 @@ xfs_flush_inodes( struct xfs_mount *mp = ip->i_mount; queue_work(xfs_syncd_wq, &mp->m_flush_work); - flush_work_sync(&mp->m_flush_work); + flush_work(&mp->m_flush_work); } STATIC void |