summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c50
1 files changed, 32 insertions, 18 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 13d1d3e95b88..508319039dce 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -97,6 +97,8 @@ xlog_alloc_buffer(
struct xlog *log,
int nbblks)
{
+ int align_mask = xfs_buftarg_dma_alignment(log->l_targ);
+
/*
* Pass log block 0 since we don't have an addr yet, buffer will be
* verified on read.
@@ -125,7 +127,7 @@ xlog_alloc_buffer(
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
+ return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL);
}
/*
@@ -1960,7 +1962,7 @@ xlog_recover_buffer_pass1(
}
}
- bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
+ bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
bcp->bc_blkno = buf_f->blf_blkno;
bcp->bc_len = buf_f->blf_len;
bcp->bc_refcount = 1;
@@ -2930,7 +2932,7 @@ xlog_recover_inode_pass2(
if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
in_f = item->ri_buf[0].i_addr;
} else {
- in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP);
+ in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
need_free = 1;
error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
if (error)
@@ -4161,7 +4163,7 @@ xlog_recover_add_item(
{
xlog_recover_item_t *item;
- item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
+ item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
INIT_LIST_HEAD(&item->ri_list);
list_add_tail(&item->ri_list, head);
}
@@ -4201,7 +4203,7 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len;
- ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP);
+ ptr = kmem_realloc(old_ptr, len + old_len, 0);
memcpy(&ptr[old_len], dp, len);
item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -4261,7 +4263,7 @@ xlog_recover_add_to_trans(
return 0;
}
- ptr = kmem_alloc(len, KM_SLEEP);
+ ptr = kmem_alloc(len, 0);
memcpy(ptr, dp, len);
in_f = (struct xfs_inode_log_format *)ptr;
@@ -4289,7 +4291,7 @@ xlog_recover_add_to_trans(
item->ri_total = in_f->ilf_size;
item->ri_buf =
kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
- KM_SLEEP);
+ 0);
}
ASSERT(item->ri_total > item->ri_cnt);
/* Description region is ri_buf[0] */
@@ -4423,7 +4425,7 @@ xlog_recover_ophdr_to_trans(
* This is a new transaction so allocate a new recovery container to
* hold the recovery ops that will follow.
*/
- trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP);
+ trans = kmem_zalloc(sizeof(struct xlog_recover), 0);
trans->r_log_tid = tid;
trans->r_lsn = be64_to_cpu(rhead->h_lsn);
INIT_LIST_HEAD(&trans->r_itemq);
@@ -5022,16 +5024,27 @@ xlog_recover_process_one_iunlink(
}
/*
- * xlog_iunlink_recover
+ * Recover AGI unlinked lists
+ *
+ * This is called during recovery to process any inodes which we unlinked but
+ * not freed when the system crashed. These inodes will be on the lists in the
+ * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
+ * any inodes found on the lists. Each inode is removed from the lists when it
+ * has been fully truncated and is freed. The freeing of the inode and its
+ * removal from the list must be atomic.
+ *
+ * If everything we touch in the agi processing loop is already in memory, this
+ * loop can hold the cpu for a long time. It runs without lock contention,
+ * memory allocation contention, the need wait for IO, etc, and so will run
+ * until we either run out of inodes to process, run low on memory or we run out
+ * of log space.
*
- * This is called during recovery to process any inodes which
- * we unlinked but not freed when the system crashed. These
- * inodes will be on the lists in the AGI blocks. What we do
- * here is scan all the AGIs and fully truncate and free any
- * inodes found on the lists. Each inode is removed from the
- * lists when it has been fully truncated and is freed. The
- * freeing of the inode and its removal from the list must be
- * atomic.
+ * This behaviour is bad for latency on single CPU and non-preemptible kernels,
+ * and can prevent other filesytem work (such as CIL pushes) from running. This
+ * can lead to deadlocks if the recovery process runs out of log reservation
+ * space. Hence we need to yield the CPU when there is other kernel work
+ * scheduled on this CPU to ensure other scheduled work can run without undue
+ * latency.
*/
STATIC void
xlog_recover_process_iunlinks(
@@ -5078,6 +5091,7 @@ xlog_recover_process_iunlinks(
while (agino != NULLAGINO) {
agino = xlog_recover_process_one_iunlink(mp,
agno, agino, bucket);
+ cond_resched();
}
}
xfs_buf_rele(agibp);
@@ -5527,7 +5541,7 @@ xlog_do_log_recovery(
*/
log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
sizeof(struct list_head),
- KM_SLEEP);
+ 0);
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);