/*
 * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance
 *
 * Original mutex implementation started by Ingo Molnar:
 *
 *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
 *
 * Wound/wait implementation:
 *  Copyright (C) 2013 Canonical Ltd.
 *
 * Wound/ wait drop-in, actual wound-wait semantics and batching:
 *  Copyright (C) 2016-2018 VMWare Inc.
 */

#include "ww_mutex.h"
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>

#ifndef WW_BUILTIN

#undef EXPORT_SYMBOL_GPL
#define EXPORT_SYMBOL_GPL(_a)
#define MUTEX_FLAGS		0x07

#ifndef CONFIG_DEBUG_MUTEXES
#define debug_ww_mutex_add_waiter(_a, _b, _c)
#define debug_ww_mutex_wake_waiter(_a, _b)
#define debug_ww_mutex_lock_common(_a, _b)
#define mutex_remove_waiter(__lock, __waiter, __task) \
	__list_del((__waiter)->list.prev, (__waiter)->list.next)
#else
static void debug_ww_mutex_add_waiter(struct ww_mutex *ww,
				      struct mutex_waiter *waiter,
				      struct task_struct *task)
{
	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
	task->blocked_on = waiter;
}

static void debug_ww_mutex_wake_waiter(struct ww_mutex *ww,
				       struct mutex_waiter *waiter)
{
	struct mutex *lock = &ww->base;

	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
	DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
	DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
}

static void debug_ww_mutex_lock_common(struct ww_mutex *ww,
				       struct mutex_waiter *waiter)
{
	memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
	waiter->magic = waiter;
	INIT_LIST_HEAD(&waiter->list);
}

static void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
				struct task_struct *task)
{
	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
	DEBUG_LOCKS_WARN_ON(waiter->task != task);
	DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
	task->blocked_on = NULL;

	list_del_init(&waiter->list);
	waiter->task = NULL;
}
#endif


/**
 * ww_acquire_class_lock - Lock the global class spinlock unless batching
 *
 * @ww: The ww mutex.
 * @ww_ctx: The acquire context.
 *
 * Take the global class spinlock unless we're batching in which case the
 * global class spinlock was taken during batch begin.
 */
static void ww_acquire_class_lock(struct ww_mutex *ww,
				  struct ww_acquire_ctx *ww_ctx)
{
	if (!ww_ctx || !ww_ctx->batched)
		spin_lock(&ww->my_class->lock);
}


/**
 * ww_acquire_class_unlock - Unlock the global class spinlock unless batching
 *
 * @ww: The ww mutex.
 * @ww_ctx: The acquire context.
 *
 * Free the global class spinlock unless we're batching in which case the
 * global class spinlock is freed at batch end.
 */
static void ww_acquire_class_unlock(struct ww_mutex *ww,
				    struct ww_acquire_ctx *ww_ctx)
{
	if (!ww_ctx || !ww_ctx->batched)
		spin_unlock(&ww->my_class->lock);
}

static bool __mutex_waiter_is_first(struct mutex *lock,
				    struct mutex_waiter *waiter)
{
	return list_first_entry(&lock->wait_list, struct mutex_waiter, list) ==
		waiter;
}


/**
 * __ww_mutex_trylock - Trylock a ww_mutex
 *
 * @ww: The mutex to trylock
 * @ww_ctx: The acquire_ctx to register as locker or NULL.
 * @waiter: The waiter if a waiter is trying to lock, or NULL.
 * Return: true if lock succeeded. false otherwise.
 */
static bool __ww_mutex_trylock(struct ww_mutex *ww,
			       struct ww_acquire_ctx *ww_ctx,
			       struct mutex_waiter *waiter)
{
	struct mutex *lock = &ww->base;

	lockdep_assert_held(&ww->my_class->lock);

	if (atomic_long_read(&lock->owner))
		return false;

	/*
	 * No lock stealing for now. If there are waiters, only the first
	 * waiter is allowed to lock.
	 */
	if (!list_empty(&lock->wait_list) &&
	    !__mutex_waiter_is_first(lock, waiter))
		return false;

	atomic_long_set(&lock->owner, (unsigned long) current);
	ww->ctx = ww_ctx;
	if (ww_ctx)
		ww_ctx->acquired++;

	return true;
}

static bool
__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
{
	return a->stamp - b->stamp <= LONG_MAX &&
	       (a->stamp != b->stamp || a > b);
}

static struct task_struct *__owner_task(unsigned long owner)
{
	return (struct task_struct *)(owner & ~MUTEX_FLAGS);
}


/**
 * ww_mutex_waiter_backoff - Whether to backoff if younger
 *
 * @us: Our acquire_ctx.
 * @other: other waiter or lock owner acquire_ctx.
 * Return: Whether to back off for another waiter or lock owner if we're
 * younger than the lock owner or other waiter.
 */
static bool ww_mutex_waiter_backoff(struct ww_acquire_ctx *us,
				    struct ww_acquire_ctx *other)
{
  return (us->my_class->is_wait_die || us->wounded) && us->acquired > 0 &&
	  !other->done_acquire;
}


/**
 * ww_mutex_backoff - Whether to backoff
 *
 * @us: Our acquire_ctx.
 * @other: other waiter or lock owner acquire_ctx.
 * Return: Whether to back off for another waiter or lock owner.
 */
static bool ww_mutex_backoff(struct ww_acquire_ctx *us,
			     struct ww_acquire_ctx *other)
{
	return other && ww_mutex_waiter_backoff(us, other) &&
		us != other && __ww_ctx_stamp_after(us, other);
}


/**
 * ww_mutex_lock_backoff - Check backoff for lock owner and all other waiters.
 *
 * @us: Our acquire_ctx.
 * @ww: The ww_mutex considered.
 * Return: Whether to back off for any other waiters or lock owner.
 */
static bool ww_mutex_lock_backoff(struct ww_acquire_ctx *us,
				  struct ww_mutex *ww)
{
	struct mutex_waiter *cur;

	if (!us)
		return false;

	/*
	 * Wounded contexts lazy-preempt before first wounded lock wait, so that
	 * we have a lock to wait on after backoff.
	 */
	if (us->wounded)
		return true;

	/* Backoff for lock owner? */
	if (ww_mutex_backoff(us, ww->ctx))
		return true;

	/* Backoff for other waiters? */
	list_for_each_entry(cur, &ww->base.wait_list, list) {
		if (ww_mutex_backoff(us, cur->ww_ctx))
			return true;
	}

	return false;
}

static int
__ww_mutex_add_waiter(struct mutex_waiter *waiter,
		      struct ww_mutex *ww,
		      struct ww_acquire_ctx *ww_ctx)
{
	struct mutex *lock = &ww->base;
	struct mutex_waiter *cur;
	struct list_head *pos;
	bool is_wait_die = ww->my_class->is_wait_die;

	waiter->task = current;
	waiter->ww_ctx = ww_ctx;

	if (!ww_ctx) {
		list_add_tail(&waiter->list, &lock->wait_list);
		return 0;
	}

	/*
	 * Add the waiter before the first waiter with a higher stamp.
	 * Waiters without a context are skipped to avoid starving
	 * them.
	 */
	pos = &lock->wait_list;
	list_for_each_entry_reverse(cur, &lock->wait_list, list) {
		if (!cur->ww_ctx)
			continue;

		/* Early backoff for other waiter */
		if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
			if (ww_mutex_waiter_backoff(ww_ctx, cur->ww_ctx))
				return -EDEADLK;

			break;
		}

		pos = &cur->list;

		/*
		 * Other waiter needs to backoff for us.
		 * Wake up the waiter so that it gets a chance to back
		 * off.
		 */
		if (ww_mutex_waiter_backoff(cur->ww_ctx, ww_ctx)) {
			debug_ww_mutex_wake_waiter(ww, cur);
			wake_up_process(cur->task);
		}
	}

	list_add_tail(&waiter->list, pos);

	/* Need to wound the lock owner? */
	if (!is_wait_die && ww->ctx && __ww_ctx_stamp_after(ww->ctx, ww_ctx) &&
	    ww_ctx->acquired > 0){
		ww->ctx->wounded = true;

		/*
		 * Wake up the lock owner in case it's sleeping on
		 * another ww_mutex..
		 */
		wake_up_process(__owner_task(atomic_long_read(&lock->owner)));
	}

	return 0;
}


/**
 * ww_mutex_wake_first_waiter - Wake first waiter if any.
 *
 * @ww: The ww_mutex on which to wake first waiter.
 */
static void ww_mutex_wake_first_waiter(struct ww_mutex *ww)
{
	struct mutex_waiter *cur;
	struct mutex *lock = &ww->base;

	if (!list_empty(&lock->wait_list)) {
		cur = list_first_entry(&lock->wait_list, struct mutex_waiter,
				       list);
		debug_ww_mutex_wake_waiter(ww, cur);
		wake_up_process(cur->task);
	}
}

static int __ww_mutex_lock(struct ww_mutex *ww, long state,
			   unsigned int subclass,
			   struct lockdep_map *nest_lock,
			   unsigned long ip,
			   struct ww_acquire_ctx *ww_ctx)
{
	struct mutex_waiter waiter;
	int ret = 0;

	lockdep_assert_held(&ww->my_class->lock);

	if (ww_ctx) {
		/*
		 * If we've backed off when wounded, there are no more
		 * acquired locks, and we can clear the wounded flag.
		 */
		if (ww_ctx->acquired == 0)
			ww_ctx->wounded = false;

		if (ww->ctx == ww_ctx) {
			ret = -EALREADY;
			goto err_early_backoff;
		}
	}

	if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
		goto skip_wait;

	debug_ww_mutex_lock_common(ww, &waiter);
	debug_ww_mutex_add_waiter(ww, &waiter, current);

	lock_contended(&ww->base.dep_map, ip);

	ret = __ww_mutex_add_waiter(&waiter, ww, ww_ctx);
	if (ret)
		goto err_early_backoff;

	set_current_state(state);
	for (;;) {
		if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
			break;

		if (unlikely(signal_pending_state(state, current))) {
			ret = -EINTR;
			goto err;
		}

		if (ww_mutex_lock_backoff(ww_ctx, ww)) {
			ret = -EDEADLK;
			goto err;
		}

		spin_unlock(&ww->my_class->lock);
		schedule();
		spin_lock(&ww->my_class->lock);
		set_current_state(state);
	}
	set_current_state(TASK_RUNNING);
	mutex_remove_waiter(&ww->base, &waiter, current);
skip_wait:
	lock_acquired(&ww->base.dep_map, ip);
	return 0;

err:
	set_current_state(TASK_RUNNING);
	mutex_remove_waiter(&ww->base, &waiter, current);
	ww_mutex_wake_first_waiter(ww);
err_early_backoff:
	lock_release(&ww->base.dep_map, !!ww_ctx, ip);
	return ret;
}

static void __ww_mutex_unlock(struct ww_mutex *ww, unsigned long ip)
{
	struct mutex *lock = &ww->base;

	lockdep_assert_held(&ww->my_class->lock);
	lock_release(&lock->dep_map, !!ww->ctx, ip);
	if (ww->ctx)
		ww->ctx->acquired--;
	ww->ctx = NULL;
	atomic_long_set(&lock->owner, 0);
	ww_mutex_wake_first_waiter(ww);
}

#ifdef CONFIG_DEBUG_LOCK_ALLOC
static int ww_mutex_deadlock_injection(struct ww_mutex *ww,
				       struct ww_acquire_ctx *ww_ctx)
{
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
	unsigned tmp;

	if (ww_ctx->deadlock_inject_countdown-- == 0) {
		tmp = ww_ctx->deadlock_inject_interval;
		if (tmp > UINT_MAX/4)
			tmp = UINT_MAX;
		else
			tmp = tmp*2 + tmp + tmp/2;

		ww_ctx->deadlock_inject_interval = tmp;
		ww_ctx->deadlock_inject_countdown = tmp;

		ww_mutex_unlock(ww);

		return -EDEADLK;
	}

	return 0;
#endif /* CONFIG_DEBUG_WW_MUTEX_SLOWPATH */
}

/**
 * ww_class_lock_annotate - lockdep annotation at locking time.
 *
 * @ww: The ww_mutex
 * @ww_ctx: The acquire ctx or NULL
 * @ip: The caller stack
 */
static void  ww_class_lock_annotate(struct ww_mutex *ww,
					   struct ww_acquire_ctx *ww_ctx,
					   unsigned long ip)
{
	if (!ww_ctx || !ww_ctx->batched) {
		/* Annotate wait lock before the spinlock. */
		lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
			     ww_ctx ? &ww_ctx->dep_map : NULL, ip);
	} else {
		/*
		 * OK, We'd like to annotate ww trylocks under the class
		 * spinlock, but since each trylock becomes a lockdep level,
		 * we'd quickly run out of levels. And we can't annotate
		 * nested trylocks, since apparently lockdep can't cope
		 * with that. So cheat lockdep and fake a class spinlock
		 * release and annotate a wating ww lock...
		 */
		lock_release(&ww->my_class->lock.dep_map, 0, ip);
		lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
			     &ww_ctx->dep_map, ip);
		lock_acquire(&ww->my_class->lock.dep_map, 0, 1, 0, 1, NULL, ip);
	}
}

int
ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
{
	int ret;

	ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
	ww_acquire_class_lock(ww, ww_ctx);
	ret =  __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE,
			       0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
			       ww_ctx);
	ww_acquire_class_unlock(ww, ww_ctx);
	if (!ret && ww_ctx && ww_ctx->acquired > 1)
		return ww_mutex_deadlock_injection(ww, ww_ctx);

	return ret;
}
EXPORT_SYMBOL_GPL(ww_mutex_lock);


int
ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
{
	int ret;

	ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
	ww_acquire_class_lock(ww, ww_ctx);
	ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE,
			      0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
			      ww_ctx);
	ww_acquire_class_unlock(ww, ww_ctx);
	if (!ret && ww_ctx && ww_ctx->acquired > 1)
		return ww_mutex_deadlock_injection(ww, ww_ctx);

	return ret;
}
EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);

#else /* CONFIG_DEBUG_LOCK_ALLOC */

int
ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
{
	int ret;

	ww_acquire_class_lock(ww, ww_ctx);
	ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
	ww_acquire_class_unlock(ww, ww_ctx);

	return ret;
}
EXPORT_SYMBOL_GPL(ww_mutex_lock);


int
ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
{
	int ret;

	ww_acquire_class_lock(ww, ww_ctx);
	ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
	ww_acquire_class_unlock(ww, ww_ctx);

	return ret;
}
EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);

#endif  /* CONFIG_DEBUG_LOCK_ALLOC */

/**
 * ww_mutex_unlock_batched - Replacement to be used for batched unlock
 *
 * @ww: The mutex to unlock
 */
void
ww_mutex_unlock_batched(struct ww_mutex *ww)
{

	__ww_mutex_unlock(ww, _RET_IP_);
}
EXPORT_SYMBOL_GPL(ww_mutex_unlock_batched);

void
ww_mutex_unlock(struct ww_mutex *ww)
{
	struct ww_acquire_ctx *ww_ctx = ww->ctx;

	ww_acquire_class_lock(ww, ww_ctx);
	__ww_mutex_unlock(ww, _RET_IP_);
	ww_acquire_class_unlock(ww, ww_ctx);
}
EXPORT_SYMBOL_GPL(ww_mutex_unlock);

#ifdef WW_BATCHING
void  ww_acquire_batch_begin(struct ww_acquire_ctx *ww_ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
	WARN_ON(ww_ctx->batched);
#endif
	spin_lock(&ww_ctx->my_class->lock);
	ww_ctx->batched = true;
}
EXPORT_SYMBOL_GPL(ww_acquire_batch_begin);

void  ww_acquire_batch_end(struct ww_acquire_ctx *ww_ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
	WARN_ON(!ww_ctx->batched);
#endif
	ww_ctx->batched = false;
	spin_unlock(&ww_ctx->my_class->lock);
}
EXPORT_SYMBOL_GPL(ww_acquire_batch_end);
#endif

int  ww_mutex_trylock(struct ww_mutex *ww)
{
	bool locked;

        spin_lock(&ww->my_class->lock);
	locked = __ww_mutex_trylock(ww, NULL, NULL);
	spin_unlock(&ww->my_class->lock);
	if (locked)
		lock_acquire(&ww->base.dep_map, 0, 1, 0, 1, NULL, _RET_IP_);

	return locked;
}
EXPORT_SYMBOL_GPL(ww_mutex_trylock);

#endif