/* FS-Cache worker operation management routines * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * See Documentation/filesystems/caching/operations.txt */ #define FSCACHE_DEBUG_LEVEL OPERATION #include <linux/module.h> #include <linux/seq_file.h> #include <linux/slab.h> #include "internal.h" atomic_t fscache_op_debug_id; EXPORT_SYMBOL(fscache_op_debug_id); static void fscache_operation_dummy_cancel(struct fscache_operation *op) { } /** * fscache_operation_init - Do basic initialisation of an operation * @op: The operation to initialise * @release: The release function to assign * * Do basic initialisation of an operation. The caller must still set flags, * object and processor if needed. */ void fscache_operation_init(struct fscache_operation *op, fscache_operation_processor_t processor, fscache_operation_cancel_t cancel, fscache_operation_release_t release) { INIT_WORK(&op->work, fscache_op_work_func); atomic_set(&op->usage, 1); op->state = FSCACHE_OP_ST_INITIALISED; op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->processor = processor; op->cancel = cancel ?: fscache_operation_dummy_cancel; op->release = release; INIT_LIST_HEAD(&op->pend_link); fscache_stat(&fscache_n_op_initialised); } EXPORT_SYMBOL(fscache_operation_init); /** * fscache_enqueue_operation - Enqueue an operation for processing * @op: The operation to enqueue * * Enqueue an operation for processing by the FS-Cache thread pool. * * This will get its own ref on the object. */ void fscache_enqueue_operation(struct fscache_operation *op) { _enter("{OBJ%x OP%x,%u}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); ASSERT(list_empty(&op->pend_link)); ASSERT(op->processor != NULL); ASSERT(fscache_object_is_available(op->object)); ASSERTCMP(atomic_read(&op->usage), >, 0); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { case FSCACHE_OP_ASYNC: _debug("queue async"); atomic_inc(&op->usage); if (!queue_work(fscache_op_wq, &op->work)) fscache_put_operation(op); break; case FSCACHE_OP_MYTHREAD: _debug("queue for caller's attention"); break; default: pr_err("Unexpected op type %lx", op->flags); BUG(); break; } } EXPORT_SYMBOL(fscache_enqueue_operation); /* * start an op running */ static void fscache_run_op(struct fscache_object *object, struct fscache_operation *op) { ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); op->state = FSCACHE_OP_ST_IN_PROGRESS; object->n_in_progress++; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); if (op->processor) fscache_enqueue_operation(op); fscache_stat(&fscache_n_op_run); } /* * report an unexpected submission */ static void fscache_report_unexpected_submission(struct fscache_object *object, struct fscache_operation *op, const struct fscache_state *ostate) { static bool once_only; struct fscache_operation *p; unsigned n; if (once_only) return; once_only = true; kdebug("unexpected submission OP%x [OBJ%x %s]", op->debug_id, object->debug_id, object->state->name); kdebug("objstate=%s [%s]", object->state->name, ostate->name); kdebug("objflags=%lx", object->flags); kdebug("objevent=%lx [%lx]", object->events, object->event_mask); kdebug("ops=%u inp=%u exc=%u", object->n_ops, object->n_in_progress, object->n_exclusive); if (!list_empty(&object->pending_ops)) { n = 0; list_for_each_entry(p, &object->pending_ops, pend_link) { ASSERTCMP(p->object, ==, object); kdebug("%p %p", op->processor, op->release); n++; } kdebug("n=%u", n); } dump_stack(); } /* * submit an exclusive operation for an object * - other ops are excluded from running simultaneously with this one * - this gets any extra refs it needs on an op */ int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op) { const struct fscache_state *ostate; unsigned long flags; int ret; _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); ASSERTCMP(atomic_read(&op->usage), >, 0); spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); ASSERT(list_empty(&op->pend_link)); ostate = object->state; smp_rmb(); op->state = FSCACHE_OP_ST_PENDING; flags = READ_ONCE(object->flags); if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { fscache_stat(&fscache_n_op_rejected); op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else if (unlikely(fscache_cache_is_broken(object))) { op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -EIO; } else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ if (object->n_in_progress > 0) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); } else if (!list_empty(&object->pending_ops)) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); fscache_start_operations(object); } else { ASSERTCMP(object->n_in_progress, ==, 0); fscache_run_op(object, op); } /* need to issue a new write op after this */ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); ret = 0; } else if (flags & BIT(FSCACHE_OBJECT_IS_LOOKED_UP)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); ret = 0; } else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else { fscache_report_unexpected_submission(object, op, ostate); op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } spin_unlock(&object->lock); return ret; } /* * submit an operation for an object * - objects may be submitted only in the following states: * - during object creation (write ops may be submitted) * - whilst the object is active * - after an I/O error incurred in one of the two above states (op rejected) * - this gets any extra refs it needs on an op */ int fscache_submit_op(struct fscache_object *object, struct fscache_operation *op) { const struct fscache_state *ostate; unsigned long flags; int ret; _enter("{OBJ%x OP%x},{%u}", object->debug_id, op->debug_id, atomic_read(&op->usage)); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); ASSERTCMP(atomic_read(&op->usage), >, 0); spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); ASSERT(list_empty(&op->pend_link)); ostate = object->state; smp_rmb(); op->state = FSCACHE_OP_ST_PENDING; flags = READ_ONCE(object->flags); if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { fscache_stat(&fscache_n_op_rejected); op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else if (unlikely(fscache_cache_is_broken(object))) { op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -EIO; } else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) { op->object = object; object->n_ops++; if (object->n_exclusive > 0) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); } else if (!list_empty(&object->pending_ops)) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); fscache_start_operations(object); } else { ASSERTCMP(object->n_exclusive, ==, 0); fscache_run_op(object, op); } ret = 0; } else if (flags & BIT(FSCACHE_OBJECT_IS_LOOKED_UP)) { op->object = object; object->n_ops++; atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); ret = 0; } else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } spin_unlock(&object->lock); return ret; } /* * queue an object for withdrawal on error, aborting all following asynchronous * operations */ void fscache_abort_object(struct fscache_object *object) { _enter("{OBJ%x}", object->debug_id); fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); } /* * Jump start the operation processing on an object. The caller must hold * object->lock. */ void fscache_start_operations(struct fscache_object *object) { struct fscache_operation *op; bool stop = false; while (!list_empty(&object->pending_ops) && !stop) { op = list_entry(object->pending_ops.next, struct fscache_operation, pend_link); if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { if (object->n_in_progress > 0) break; stop = true; } list_del_init(&op->pend_link); fscache_run_op(object, op); /* the pending queue was holding a ref on the object */ fscache_put_operation(op); } ASSERTCMP(object->n_in_progress, <=, object->n_ops); _debug("woke %d ops on OBJ%x", object->n_in_progress, object->debug_id); } /* * cancel an operation that's pending on an object */ int fscache_cancel_op(struct fscache_operation *op, bool cancel_in_progress_op) { struct fscache_object *object = op->object; bool put = false; int ret; _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING); ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED); ASSERTCMP(atomic_read(&op->usage), >, 0); spin_lock(&object->lock); ret = -EBUSY; if (op->state == FSCACHE_OP_ST_PENDING) { ASSERT(!list_empty(&op->pend_link)); list_del_init(&op->pend_link); put = true; fscache_stat(&fscache_n_op_cancelled); op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); ret = 0; } else if (op->state == FSCACHE_OP_ST_IN_PROGRESS && cancel_in_progress_op) { ASSERTCMP(object->n_in_progress, >, 0); if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; object->n_in_progress--; if (object->n_in_progress == 0) fscache_start_operations(object); fscache_stat(&fscache_n_op_cancelled); op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); ret = 0; } if (put) fscache_put_operation(op); spin_unlock(&object->lock); _leave(" = %d", ret); return ret; } /* * Cancel all pending operations on an object */ void fscache_cancel_all_ops(struct fscache_object *object) { struct fscache_operation *op; _enter("OBJ%x", object->debug_id); spin_lock(&object->lock); while (!list_empty(&object->pending_ops)) { op = list_entry(object->pending_ops.next, struct fscache_operation, pend_link); fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); fscache_put_operation(op); cond_resched_lock(&object->lock); } spin_unlock(&object->lock); _leave(""); } /* * Record the completion or cancellation of an in-progress operation. */ void fscache_op_complete(struct fscache_operation *op, bool cancelled) { struct fscache_object *object = op->object; _enter("OBJ%x", object->debug_id); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); ASSERTCMP(object->n_in_progress, >, 0); ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), object->n_exclusive, >, 0); ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), object->n_in_progress, ==, 1); spin_lock(&object->lock); if (!cancelled) { op->state = FSCACHE_OP_ST_COMPLETE; } else { op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; } if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; object->n_in_progress--; if (object->n_in_progress == 0) fscache_start_operations(object); spin_unlock(&object->lock); _leave(""); } EXPORT_SYMBOL(fscache_op_complete); /* * release an operation * - queues pending ops if this is the last in-progress op */ void fscache_put_operation(struct fscache_operation *op) { struct fscache_object *object; struct fscache_cache *cache; _enter("{OBJ%x OP%x,%d}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); ASSERTCMP(atomic_read(&op->usage), >, 0); if (!atomic_dec_and_test(&op->usage)) return; _debug("PUT OP"); ASSERTIFCMP(op->state != FSCACHE_OP_ST_INITIALISED && op->state != FSCACHE_OP_ST_COMPLETE, op->state, ==, FSCACHE_OP_ST_CANCELLED); fscache_stat(&fscache_n_op_release); if (op->release) { op->release(op); op->release = NULL; } op->state = FSCACHE_OP_ST_DEAD; object = op->object; if (likely(object)) { if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) atomic_dec(&object->n_reads); if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) fscache_unuse_cookie(object); /* now... we may get called with the object spinlock held, so we * complete the cleanup here only if we can immediately acquire the * lock, and defer it otherwise */ if (!spin_trylock(&object->lock)) { _debug("defer put"); fscache_stat(&fscache_n_op_deferred_release); cache = object->cache; spin_lock(&cache->op_gc_list_lock); list_add_tail(&op->pend_link, &cache->op_gc_list); spin_unlock(&cache->op_gc_list_lock); schedule_work(&cache->op_gc); _leave(" [defer]"); return; } ASSERTCMP(object->n_ops, >, 0); object->n_ops--; if (object->n_ops == 0) fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); spin_unlock(&object->lock); } kfree(op); _leave(" [done]"); } EXPORT_SYMBOL(fscache_put_operation); /* * garbage collect operations that have had their release deferred */ void fscache_operation_gc(struct work_struct *work) { struct fscache_operation *op; struct fscache_object *object; struct fscache_cache *cache = container_of(work, struct fscache_cache, op_gc); int count = 0; _enter(""); do { spin_lock(&cache->op_gc_list_lock); if (list_empty(&cache->op_gc_list)) { spin_unlock(&cache->op_gc_list_lock); break; } op = list_entry(cache->op_gc_list.next, struct fscache_operation, pend_link); list_del(&op->pend_link); spin_unlock(&cache->op_gc_list_lock); object = op->object; spin_lock(&object->lock); _debug("GC DEFERRED REL OBJ%x OP%x", object->debug_id, op->debug_id); fscache_stat(&fscache_n_op_gc); ASSERTCMP(atomic_read(&op->usage), ==, 0); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD); ASSERTCMP(object->n_ops, >, 0); object->n_ops--; if (object->n_ops == 0) fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); spin_unlock(&object->lock); kfree(op); } while (count++ < 20); if (!list_empty(&cache->op_gc_list)) schedule_work(&cache->op_gc); _leave(""); } /* * execute an operation using fs_op_wq to provide processing context - * the caller holds a ref to this object, so we don't need to hold one */ void fscache_op_work_func(struct work_struct *work) { struct fscache_operation *op = container_of(work, struct fscache_operation, work); unsigned long start; _enter("{OBJ%x OP%x,%d}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); ASSERT(op->processor != NULL); start = jiffies; op->processor(op); fscache_hist(fscache_ops_histogram, start); fscache_put_operation(op); _leave(""); }