/* * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance * * Original mutex implementation started by Ingo Molnar: * * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar * * Wound/wait implementation: * Copyright (C) 2013 Canonical Ltd. * * Wound/ wait drop-in, actual wound-wait semantics and batching: * Copyright (C) 2016-2018 VMWare Inc. */ #include "ww_mutex.h" #include #include #ifndef WW_BUILTIN #undef EXPORT_SYMBOL_GPL #define EXPORT_SYMBOL_GPL(_a) #define MUTEX_FLAGS 0x07 #ifndef CONFIG_DEBUG_MUTEXES #define debug_ww_mutex_add_waiter(_a, _b, _c) #define debug_ww_mutex_wake_waiter(_a, _b) #define debug_ww_mutex_lock_common(_a, _b) #define mutex_remove_waiter(__lock, __waiter, __task) \ __list_del((__waiter)->list.prev, (__waiter)->list.next) #else static void debug_ww_mutex_add_waiter(struct ww_mutex *ww, struct mutex_waiter *waiter, struct task_struct *task) { SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock)); task->blocked_on = waiter; } static void debug_ww_mutex_wake_waiter(struct ww_mutex *ww, struct mutex_waiter *waiter) { struct mutex *lock = &ww->base; SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock)); DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); } static void debug_ww_mutex_lock_common(struct ww_mutex *ww, struct mutex_waiter *waiter) { memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); waiter->magic = waiter; INIT_LIST_HEAD(&waiter->list); } static void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task) { DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); DEBUG_LOCKS_WARN_ON(waiter->task != task); DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); task->blocked_on = NULL; list_del_init(&waiter->list); waiter->task = NULL; } #endif /** * ww_acquire_class_lock - Lock the global class spinlock unless batching * * @ww: The ww mutex. * @ww_ctx: The acquire context. * * Take the global class spinlock unless we're batching in which case the * global class spinlock was taken during batch begin. */ static void ww_acquire_class_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { if (!ww_ctx || !ww_ctx->batched) spin_lock(&ww->my_class->lock); } /** * ww_acquire_class_unlock - Unlock the global class spinlock unless batching * * @ww: The ww mutex. * @ww_ctx: The acquire context. * * Free the global class spinlock unless we're batching in which case the * global class spinlock is freed at batch end. */ static void ww_acquire_class_unlock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { if (!ww_ctx || !ww_ctx->batched) spin_unlock(&ww->my_class->lock); } static bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter) { return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; } /** * __ww_mutex_trylock - Trylock a ww_mutex * * @ww: The mutex to trylock * @ww_ctx: The acquire_ctx to register as locker or NULL. * @waiter: The waiter if a waiter is trying to lock, or NULL. * Return: true if lock succeeded. false otherwise. */ static bool __ww_mutex_trylock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter) { struct mutex *lock = &ww->base; lockdep_assert_held(&ww->my_class->lock); if (atomic_long_read(&lock->owner)) return false; /* * No lock stealing for now. If there are waiters, only the first * waiter is allowed to lock. */ if (!list_empty(&lock->wait_list) && !__mutex_waiter_is_first(lock, waiter)) return false; atomic_long_set(&lock->owner, (unsigned long) current); ww->ctx = ww_ctx; if (ww_ctx) ww_ctx->acquired++; return true; } static bool __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) { return a->stamp - b->stamp <= LONG_MAX && (a->stamp != b->stamp || a > b); } static struct task_struct *__owner_task(unsigned long owner) { return (struct task_struct *)(owner & ~MUTEX_FLAGS); } /** * ww_mutex_waiter_backoff - Whether to backoff if younger * * @us: Our acquire_ctx. * @other: other waiter or lock owner acquire_ctx. * Return: Whether to back off for another waiter or lock owner if we're * younger than the lock owner or other waiter. */ static bool ww_mutex_waiter_backoff(struct ww_acquire_ctx *us, struct ww_acquire_ctx *other) { return (us->my_class->is_wait_die || us->wounded) && us->acquired > 0 && !other->done_acquire; } /** * ww_mutex_backoff - Whether to backoff * * @us: Our acquire_ctx. * @other: other waiter or lock owner acquire_ctx. * Return: Whether to back off for another waiter or lock owner. */ static bool ww_mutex_backoff(struct ww_acquire_ctx *us, struct ww_acquire_ctx *other) { return other && ww_mutex_waiter_backoff(us, other) && us != other && __ww_ctx_stamp_after(us, other); } /** * ww_mutex_lock_backoff - Check backoff for lock owner and all other waiters. * * @us: Our acquire_ctx. * @ww: The ww_mutex considered. * Return: Whether to back off for any other waiters or lock owner. */ static bool ww_mutex_lock_backoff(struct ww_acquire_ctx *us, struct ww_mutex *ww) { struct mutex_waiter *cur; if (!us) return false; /* * Wounded contexts lazy-preempt before first wounded lock wait, so that * we have a lock to wait on after backoff. */ if (us->wounded) return true; /* Backoff for lock owner? */ if (ww_mutex_backoff(us, ww->ctx)) return true; /* Backoff for other waiters? */ list_for_each_entry(cur, &ww->base.wait_list, list) { if (ww_mutex_backoff(us, cur->ww_ctx)) return true; } return false; } static int __ww_mutex_add_waiter(struct mutex_waiter *waiter, struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { struct mutex *lock = &ww->base; struct mutex_waiter *cur; struct list_head *pos; bool is_wait_die = ww->my_class->is_wait_die; waiter->task = current; waiter->ww_ctx = ww_ctx; if (!ww_ctx) { list_add_tail(&waiter->list, &lock->wait_list); return 0; } /* * Add the waiter before the first waiter with a higher stamp. * Waiters without a context are skipped to avoid starving * them. */ pos = &lock->wait_list; list_for_each_entry_reverse(cur, &lock->wait_list, list) { if (!cur->ww_ctx) continue; /* Early backoff for other waiter */ if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { if (ww_mutex_waiter_backoff(ww_ctx, cur->ww_ctx)) return -EDEADLK; break; } pos = &cur->list; /* * Other waiter needs to backoff for us. * Wake up the waiter so that it gets a chance to back * off. */ if (ww_mutex_waiter_backoff(cur->ww_ctx, ww_ctx)) { debug_ww_mutex_wake_waiter(ww, cur); wake_up_process(cur->task); } } list_add_tail(&waiter->list, pos); /* Need to wound the lock owner? */ if (!is_wait_die && ww->ctx && __ww_ctx_stamp_after(ww->ctx, ww_ctx) && ww_ctx->acquired > 0){ ww->ctx->wounded = true; /* * Wake up the lock owner in case it's sleeping on * another ww_mutex.. */ wake_up_process(__owner_task(atomic_long_read(&lock->owner))); } return 0; } /** * ww_mutex_wake_first_waiter - Wake first waiter if any. * * @ww: The ww_mutex on which to wake first waiter. */ static void ww_mutex_wake_first_waiter(struct ww_mutex *ww) { struct mutex_waiter *cur; struct mutex *lock = &ww->base; if (!list_empty(&lock->wait_list)) { cur = list_first_entry(&lock->wait_list, struct mutex_waiter, list); debug_ww_mutex_wake_waiter(ww, cur); wake_up_process(cur->task); } } static int __ww_mutex_lock(struct ww_mutex *ww, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, struct ww_acquire_ctx *ww_ctx) { struct mutex_waiter waiter; int ret = 0; lockdep_assert_held(&ww->my_class->lock); if (ww_ctx) { /* * If we've backed off when wounded, there are no more * acquired locks, and we can clear the wounded flag. */ if (ww_ctx->acquired == 0) ww_ctx->wounded = false; if (ww->ctx == ww_ctx) { ret = -EALREADY; goto err_early_backoff; } } if (__ww_mutex_trylock(ww, ww_ctx, &waiter)) goto skip_wait; debug_ww_mutex_lock_common(ww, &waiter); debug_ww_mutex_add_waiter(ww, &waiter, current); lock_contended(&ww->base.dep_map, ip); ret = __ww_mutex_add_waiter(&waiter, ww, ww_ctx); if (ret) goto err_early_backoff; set_current_state(state); for (;;) { if (__ww_mutex_trylock(ww, ww_ctx, &waiter)) break; if (unlikely(signal_pending_state(state, current))) { ret = -EINTR; goto err; } if (ww_mutex_lock_backoff(ww_ctx, ww)) { ret = -EDEADLK; goto err; } spin_unlock(&ww->my_class->lock); schedule(); spin_lock(&ww->my_class->lock); set_current_state(state); } set_current_state(TASK_RUNNING); mutex_remove_waiter(&ww->base, &waiter, current); skip_wait: lock_acquired(&ww->base.dep_map, ip); return 0; err: set_current_state(TASK_RUNNING); mutex_remove_waiter(&ww->base, &waiter, current); ww_mutex_wake_first_waiter(ww); err_early_backoff: lock_release(&ww->base.dep_map, !!ww_ctx, ip); return ret; } static void __ww_mutex_unlock(struct ww_mutex *ww, unsigned long ip) { struct mutex *lock = &ww->base; lockdep_assert_held(&ww->my_class->lock); lock_release(&lock->dep_map, !!ww->ctx, ip); if (ww->ctx) ww->ctx->acquired--; ww->ctx = NULL; atomic_long_set(&lock->owner, 0); ww_mutex_wake_first_waiter(ww); } #ifdef CONFIG_DEBUG_LOCK_ALLOC static int ww_mutex_deadlock_injection(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned tmp; if (ww_ctx->deadlock_inject_countdown-- == 0) { tmp = ww_ctx->deadlock_inject_interval; if (tmp > UINT_MAX/4) tmp = UINT_MAX; else tmp = tmp*2 + tmp + tmp/2; ww_ctx->deadlock_inject_interval = tmp; ww_ctx->deadlock_inject_countdown = tmp; ww_mutex_unlock(ww); return -EDEADLK; } return 0; #endif /* CONFIG_DEBUG_WW_MUTEX_SLOWPATH */ } /** * ww_class_lock_annotate - lockdep annotation at locking time. * * @ww: The ww_mutex * @ww_ctx: The acquire ctx or NULL * @ip: The caller stack */ static void ww_class_lock_annotate(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx, unsigned long ip) { if (!ww_ctx || !ww_ctx->batched) { /* Annotate wait lock before the spinlock. */ lock_acquire(&ww->base.dep_map, 0, 0, 0, 1, ww_ctx ? &ww_ctx->dep_map : NULL, ip); } else { /* * OK, We'd like to annotate ww trylocks under the class * spinlock, but since each trylock becomes a lockdep level, * we'd quickly run out of levels. And we can't annotate * nested trylocks, since apparently lockdep can't cope * with that. So cheat lockdep and fake a class spinlock * release and annotate a wating ww lock... */ lock_release(&ww->my_class->lock.dep_map, 0, ip); lock_acquire(&ww->base.dep_map, 0, 0, 0, 1, &ww_ctx->dep_map, ip); lock_acquire(&ww->my_class->lock.dep_map, 0, 1, 0, 1, NULL, ip); } } int ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { int ret; ww_class_lock_annotate(ww, ww_ctx, _RET_IP_); ww_acquire_class_lock(ww, ww_ctx); ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_, ww_ctx); ww_acquire_class_unlock(ww, ww_ctx); if (!ret && ww_ctx && ww_ctx->acquired > 1) return ww_mutex_deadlock_injection(ww, ww_ctx); return ret; } EXPORT_SYMBOL_GPL(ww_mutex_lock); int ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { int ret; ww_class_lock_annotate(ww, ww_ctx, _RET_IP_); ww_acquire_class_lock(ww, ww_ctx); ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_, ww_ctx); ww_acquire_class_unlock(ww, ww_ctx); if (!ret && ww_ctx && ww_ctx->acquired > 1) return ww_mutex_deadlock_injection(ww, ww_ctx); return ret; } EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); #else /* CONFIG_DEBUG_LOCK_ALLOC */ int ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { int ret; ww_acquire_class_lock(ww, ww_ctx); ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx); ww_acquire_class_unlock(ww, ww_ctx); return ret; } EXPORT_SYMBOL_GPL(ww_mutex_lock); int ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { int ret; ww_acquire_class_lock(ww, ww_ctx); ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx); ww_acquire_class_unlock(ww, ww_ctx); return ret; } EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); #endif /* CONFIG_DEBUG_LOCK_ALLOC */ /** * ww_mutex_unlock_batched - Replacement to be used for batched unlock * * @ww: The mutex to unlock */ void ww_mutex_unlock_batched(struct ww_mutex *ww) { __ww_mutex_unlock(ww, _RET_IP_); } EXPORT_SYMBOL_GPL(ww_mutex_unlock_batched); void ww_mutex_unlock(struct ww_mutex *ww) { struct ww_acquire_ctx *ww_ctx = ww->ctx; ww_acquire_class_lock(ww, ww_ctx); __ww_mutex_unlock(ww, _RET_IP_); ww_acquire_class_unlock(ww, ww_ctx); } EXPORT_SYMBOL_GPL(ww_mutex_unlock); #ifdef WW_BATCHING void ww_acquire_batch_begin(struct ww_acquire_ctx *ww_ctx) { #ifdef CONFIG_DEBUG_MUTEXES WARN_ON(ww_ctx->batched); #endif spin_lock(&ww_ctx->my_class->lock); ww_ctx->batched = true; } EXPORT_SYMBOL_GPL(ww_acquire_batch_begin); void ww_acquire_batch_end(struct ww_acquire_ctx *ww_ctx) { #ifdef CONFIG_DEBUG_MUTEXES WARN_ON(!ww_ctx->batched); #endif ww_ctx->batched = false; spin_unlock(&ww_ctx->my_class->lock); } EXPORT_SYMBOL_GPL(ww_acquire_batch_end); #endif int ww_mutex_trylock(struct ww_mutex *ww) { bool locked; spin_lock(&ww->my_class->lock); locked = __ww_mutex_trylock(ww, NULL, NULL); spin_unlock(&ww->my_class->lock); if (locked) lock_acquire(&ww->base.dep_map, 0, 1, 0, 1, NULL, _RET_IP_); return locked; } EXPORT_SYMBOL_GPL(ww_mutex_trylock); #endif