/* * (C) 2001 Clemson University and The University of Chicago * * See COPYING in top-level directory. */ #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-bufmap.h" /* * decode routine used by kmod to deal with the blob sent from * userspace for readdirs. The blob contains zero or more of these * sub-blobs: * __u32 - represents length of the character string that follows. * string - between 1 and ORANGEFS_NAME_MAX bytes long. * padding - (if needed) to cause the __u32 plus the string to be * eight byte aligned. * khandle - sizeof(khandle) bytes. */ static long decode_dirents(char *ptr, size_t size, struct orangefs_readdir_response_s *readdir) { int i; struct orangefs_readdir_response_s *rd = (struct orangefs_readdir_response_s *) ptr; char *buf = ptr; int khandle_size = sizeof(struct orangefs_khandle); size_t offset = offsetof(struct orangefs_readdir_response_s, dirent_array); /* 8 reflects eight byte alignment */ int smallest_blob = khandle_size + 8; __u32 len; int aligned_len; int sizeof_u32 = sizeof(__u32); long ret; gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size); /* size is = offset on empty dirs, > offset on non-empty dirs... */ if (size < offset) { gossip_err("%s: size:%zu: offset:%zu:\n", __func__, size, offset); ret = -EINVAL; goto out; } if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) { gossip_err("%s: size:%zu: dirent_outcount:%d:\n", __func__, size, readdir->orangefs_dirent_outcount); ret = -EINVAL; goto out; } readdir->token = rd->token; readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount; readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount, sizeof(*readdir->dirent_array), GFP_KERNEL); if (readdir->dirent_array == NULL) { gossip_err("%s: kcalloc failed.\n", __func__); ret = -ENOMEM; goto out; } buf += offset; size -= offset; for (i = 0; i < readdir->orangefs_dirent_outcount; i++) { if (size < smallest_blob) { gossip_err("%s: size:%zu: smallest_blob:%d:\n", __func__, size, smallest_blob); ret = -EINVAL; goto free; } len = *(__u32 *)buf; if ((len < 1) || (len > ORANGEFS_NAME_MAX)) { gossip_err("%s: len:%d:\n", __func__, len); ret = -EINVAL; goto free; } gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu: len:%d:\n", __func__, size, len); readdir->dirent_array[i].d_name = buf + sizeof_u32; readdir->dirent_array[i].d_length = len; /* * Calculate "aligned" length of this string and its * associated __u32 descriptor. */ aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7; gossip_debug(GOSSIP_DIR_DEBUG, "%s: aligned_len:%d:\n", __func__, aligned_len); /* * The end of the blob should coincide with the end * of the last sub-blob. */ if (size < aligned_len + khandle_size) { gossip_err("%s: ran off the end of the blob.\n", __func__); ret = -EINVAL; goto free; } size -= aligned_len + khandle_size; buf += aligned_len; readdir->dirent_array[i].khandle = *(struct orangefs_khandle *) buf; buf += khandle_size; } ret = buf - ptr; gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret); goto out; free: kfree(readdir->dirent_array); readdir->dirent_array = NULL; out: return ret; } /* * Read directory entries from an instance of an open directory. */ static int orangefs_readdir(struct file *file, struct dir_context *ctx) { struct orangefs_bufmap *bufmap = NULL; int ret = 0; int buffer_index; /* * ptoken supports Orangefs' distributed directory logic, added * in 2.9.2. */ __u64 *ptoken = file->private_data; __u64 pos = 0; ino_t ino = 0; struct dentry *dentry = file->f_path.dentry; struct orangefs_kernel_op_s *new_op = NULL; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode); int buffer_full = 0; struct orangefs_readdir_response_s readdir_response; void *dents_buf; int i = 0; int len = 0; ino_t current_ino = 0; char *current_entry = NULL; long bytes_decoded; gossip_debug(GOSSIP_DIR_DEBUG, "%s: ctx->pos:%lld, ptoken = %llu\n", __func__, lld(ctx->pos), llu(*ptoken)); pos = (__u64) ctx->pos; /* are we done? */ if (pos == ORANGEFS_READDIR_END) { gossip_debug(GOSSIP_DIR_DEBUG, "Skipping to termination path\n"); return 0; } gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir called on %s (pos=%llu)\n", dentry->d_name.name, llu(pos)); memset(&readdir_response, 0, sizeof(readdir_response)); new_op = op_alloc(ORANGEFS_VFS_OP_READDIR); if (!new_op) return -ENOMEM; /* * Only the indices are shared. No memory is actually shared, but the * mechanism is used. */ new_op->uses_shared_memory = 1; new_op->upcall.req.readdir.refn = orangefs_inode->refn; new_op->upcall.req.readdir.max_dirent_count = ORANGEFS_MAX_DIRENT_COUNT_READDIR; gossip_debug(GOSSIP_DIR_DEBUG, "%s: upcall.req.readdir.refn.khandle: %pU\n", __func__, &new_op->upcall.req.readdir.refn.khandle); new_op->upcall.req.readdir.token = *ptoken; get_new_buffer_index: ret = orangefs_readdir_index_get(&bufmap, &buffer_index); if (ret < 0) { gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n", ret); goto out_free_op; } new_op->upcall.req.readdir.buf_index = buffer_index; ret = service_operation(new_op, "orangefs_readdir", get_interruptible_flag(dentry->d_inode)); gossip_debug(GOSSIP_DIR_DEBUG, "Readdir downcall status is %d. ret:%d\n", new_op->downcall.status, ret); orangefs_readdir_index_put(buffer_index); if (ret == -EAGAIN && op_state_purged(new_op)) { /* Client-core indices are invalid after it restarted. */ gossip_debug(GOSSIP_DIR_DEBUG, "%s: Getting new buffer_index for retry of readdir..\n", __func__); goto get_new_buffer_index; } if (ret == -EIO && op_state_purged(new_op)) { gossip_err("%s: Client is down. Aborting readdir call.\n", __func__); goto out_slot; } if (ret < 0 || new_op->downcall.status != 0) { gossip_debug(GOSSIP_DIR_DEBUG, "Readdir request failed. Status:%d\n", new_op->downcall.status); if (ret >= 0) ret = new_op->downcall.status; goto out_slot; } dents_buf = new_op->downcall.trailer_buf; if (dents_buf == NULL) { gossip_err("Invalid NULL buffer in readdir response\n"); ret = -ENOMEM; goto out_slot; } bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size, &readdir_response); if (bytes_decoded < 0) { ret = bytes_decoded; gossip_err("Could not decode readdir from buffer %d\n", ret); goto out_vfree; } if (bytes_decoded != new_op->downcall.trailer_size) { gossip_err("orangefs_readdir: # bytes decoded (%ld) " "!= trailer size (%ld)\n", bytes_decoded, (long)new_op->downcall.trailer_size); ret = -EINVAL; goto out_destroy_handle; } /* * orangefs doesn't actually store dot and dot-dot, but * we need to have them represented. */ if (pos == 0) { ino = get_ino_from_khandle(dentry->d_inode); gossip_debug(GOSSIP_DIR_DEBUG, "%s: calling dir_emit of \".\" with pos = %llu\n", __func__, llu(pos)); ret = dir_emit(ctx, ".", 1, ino, DT_DIR); pos += 1; } if (pos == 1) { ino = get_parent_ino_from_dentry(dentry); gossip_debug(GOSSIP_DIR_DEBUG, "%s: calling dir_emit of \"..\" with pos = %llu\n", __func__, llu(pos)); ret = dir_emit(ctx, "..", 2, ino, DT_DIR); pos += 1; } /* * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around * to prevent "finding" dot and dot-dot on any iteration * other than the first. */ if (ctx->pos == ORANGEFS_ITERATE_NEXT) ctx->pos = 0; gossip_debug(GOSSIP_DIR_DEBUG, "%s: dirent_outcount:%d:\n", __func__, readdir_response.orangefs_dirent_outcount); for (i = ctx->pos; i < readdir_response.orangefs_dirent_outcount; i++) { len = readdir_response.dirent_array[i].d_length; current_entry = readdir_response.dirent_array[i].d_name; current_ino = orangefs_khandle_to_ino( &readdir_response.dirent_array[i].khandle); gossip_debug(GOSSIP_DIR_DEBUG, "calling dir_emit for %s with len %d" ", ctx->pos %ld\n", current_entry, len, (unsigned long)ctx->pos); /* * type is unknown. We don't return object type * in the dirent_array. This leaves getdents * clueless about type. */ ret = dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); if (!ret) break; ctx->pos++; gossip_debug(GOSSIP_DIR_DEBUG, "%s: ctx->pos:%lld\n", __func__, lld(ctx->pos)); } /* * we ran all the way through the last batch, set up for * getting another batch... */ if (ret) { *ptoken = readdir_response.token; ctx->pos = ORANGEFS_ITERATE_NEXT; } /* * Did we hit the end of the directory? */ if (readdir_response.token == ORANGEFS_READDIR_END && !buffer_full) { gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n"); ctx->pos = ORANGEFS_READDIR_END; } out_destroy_handle: /* kfree(NULL) is safe */ kfree(readdir_response.dirent_array); out_vfree: gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf); vfree(dents_buf); out_slot: orangefs_readdir_index_put(buffer_index); out_free_op: op_release(new_op); gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret); return ret; } static int orangefs_dir_open(struct inode *inode, struct file *file) { __u64 *ptoken; file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); if (!file->private_data) return -ENOMEM; ptoken = file->private_data; *ptoken = ORANGEFS_READDIR_START; return 0; } static int orangefs_dir_release(struct inode *inode, struct file *file) { orangefs_flush_inode(inode); kfree(file->private_data); return 0; } /** ORANGEFS implementation of VFS directory operations */ const struct file_operations orangefs_dir_operations = { .read = generic_read_dir, .iterate = orangefs_readdir, .open = orangefs_dir_open, .release = orangefs_dir_release, };