summaryrefslogtreecommitdiff
path: root/fs/ceph/mds_client.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r--fs/ceph/mds_client.c110
1 files changed, 82 insertions, 28 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 920e9f048bd8..a8a8f84f3bbf 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -639,7 +639,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
s->s_renew_seq = 0;
INIT_LIST_HEAD(&s->s_caps);
s->s_nr_caps = 0;
- s->s_trim_caps = 0;
refcount_set(&s->s_ref, 1);
INIT_LIST_HEAD(&s->s_waiting);
INIT_LIST_HEAD(&s->s_unsafe);
@@ -1270,6 +1269,7 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
{
struct ceph_mds_request *req;
struct rb_node *p;
+ struct ceph_inode_info *ci;
dout("cleanup_session_requests mds%d\n", session->s_mds);
mutex_lock(&mdsc->mutex);
@@ -1278,6 +1278,16 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
struct ceph_mds_request, r_unsafe_item);
pr_warn_ratelimited(" dropping unsafe request %llu\n",
req->r_tid);
+ if (req->r_target_inode) {
+ /* dropping unsafe change of inode's attributes */
+ ci = ceph_inode(req->r_target_inode);
+ errseq_set(&ci->i_meta_err, -EIO);
+ }
+ if (req->r_unsafe_dir) {
+ /* dropping unsafe directory operation */
+ ci = ceph_inode(req->r_unsafe_dir);
+ errseq_set(&ci->i_meta_err, -EIO);
+ }
__unregister_request(mdsc, req);
}
/* zero r_attempts, so kick_requests() will re-send requests */
@@ -1370,7 +1380,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
struct ceph_inode_info *ci = ceph_inode(inode);
LIST_HEAD(to_remove);
- bool drop = false;
+ bool dirty_dropped = false;
bool invalidate = false;
dout("removing cap %p, ci is %p, inode is %p\n",
@@ -1383,9 +1393,12 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf;
struct ceph_mds_client *mdsc = fsc->mdsc;
- if (ci->i_wrbuffer_ref > 0 &&
- READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
- invalidate = true;
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (inode->i_data.nrpages > 0)
+ invalidate = true;
+ if (ci->i_wrbuffer_ref > 0)
+ mapping_set_error(&inode->i_data, -EIO);
+ }
while (!list_empty(&ci->i_cap_flush_list)) {
cf = list_first_entry(&ci->i_cap_flush_list,
@@ -1405,7 +1418,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
inode, ceph_ino(inode));
ci->i_dirty_caps = 0;
list_del_init(&ci->i_dirty_item);
- drop = true;
+ dirty_dropped = true;
}
if (!list_empty(&ci->i_flushing_item)) {
pr_warn_ratelimited(
@@ -1415,10 +1428,22 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
ci->i_flushing_caps = 0;
list_del_init(&ci->i_flushing_item);
mdsc->num_cap_flushing--;
- drop = true;
+ dirty_dropped = true;
}
spin_unlock(&mdsc->cap_dirty_lock);
+ if (dirty_dropped) {
+ errseq_set(&ci->i_meta_err, -EIO);
+
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
+ }
+
if (atomic_read(&ci->i_filelock_ref) > 0) {
/* make further file lock syscall return -EIO */
ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
@@ -1430,15 +1455,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
-
- if (drop &&
- ci->i_wrbuffer_ref_head == 0 &&
- ci->i_wr_ref == 0 &&
- ci->i_dirty_caps == 0 &&
- ci->i_flushing_caps == 0) {
- ceph_put_snap_context(ci->i_head_snapc);
- ci->i_head_snapc = NULL;
- }
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
@@ -1452,7 +1468,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
wake_up_all(&ci->i_cap_wq);
if (invalidate)
ceph_queue_invalidate(inode);
- if (drop)
+ if (dirty_dropped)
iput(inode);
return 0;
}
@@ -1705,11 +1721,11 @@ out:
*/
static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
{
- struct ceph_mds_session *session = arg;
+ int *remaining = arg;
struct ceph_inode_info *ci = ceph_inode(inode);
int used, wanted, oissued, mine;
- if (session->s_trim_caps <= 0)
+ if (*remaining <= 0)
return -1;
spin_lock(&ci->i_ceph_lock);
@@ -1746,7 +1762,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
if (oissued) {
/* we aren't the only cap.. just remove us */
__ceph_remove_cap(cap, true);
- session->s_trim_caps--;
+ (*remaining)--;
} else {
struct dentry *dentry;
/* try dropping referring dentries */
@@ -1758,7 +1774,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
d_prune_aliases(inode);
count = atomic_read(&inode->i_count);
if (count == 1)
- session->s_trim_caps--;
+ (*remaining)--;
dout("trim_caps_cb %p cap %p pruned, count now %d\n",
inode, cap, count);
} else {
@@ -1784,12 +1800,12 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
dout("trim_caps mds%d start: %d / %d, trim %d\n",
session->s_mds, session->s_nr_caps, max_caps, trim_caps);
if (trim_caps > 0) {
- session->s_trim_caps = trim_caps;
- ceph_iterate_session_caps(session, trim_caps_cb, session);
+ int remaining = trim_caps;
+
+ ceph_iterate_session_caps(session, trim_caps_cb, &remaining);
dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
session->s_mds, session->s_nr_caps, max_caps,
- trim_caps - session->s_trim_caps);
- session->s_trim_caps = 0;
+ trim_caps - remaining);
}
ceph_flush_cap_releases(mdsc, session);
@@ -3015,18 +3031,23 @@ bad:
pr_err("mdsc_handle_forward decode error err=%d\n", err);
}
-static int __decode_and_drop_session_metadata(void **p, void *end)
+static int __decode_session_metadata(void **p, void *end,
+ bool *blacklisted)
{
/* map<string,string> */
u32 n;
+ bool err_str;
ceph_decode_32_safe(p, end, n, bad);
while (n-- > 0) {
u32 len;
ceph_decode_32_safe(p, end, len, bad);
ceph_decode_need(p, end, len, bad);
+ err_str = !strncmp(*p, "error_string", len);
*p += len;
ceph_decode_32_safe(p, end, len, bad);
ceph_decode_need(p, end, len, bad);
+ if (err_str && strnstr(*p, "blacklisted", len))
+ *blacklisted = true;
*p += len;
}
return 0;
@@ -3050,6 +3071,7 @@ static void handle_session(struct ceph_mds_session *session,
u64 seq;
unsigned long features = 0;
int wake = 0;
+ bool blacklisted = false;
/* decode */
ceph_decode_need(&p, end, sizeof(*h), bad);
@@ -3062,7 +3084,7 @@ static void handle_session(struct ceph_mds_session *session,
if (msg_version >= 3) {
u32 len;
/* version >= 2, metadata */
- if (__decode_and_drop_session_metadata(&p, end) < 0)
+ if (__decode_session_metadata(&p, end, &blacklisted) < 0)
goto bad;
/* version >= 3, feature bits */
ceph_decode_32_safe(&p, end, len, bad);
@@ -3149,6 +3171,8 @@ static void handle_session(struct ceph_mds_session *session,
session->s_state = CEPH_MDS_SESSION_REJECTED;
cleanup_session_requests(mdsc, session);
remove_session_caps(session);
+ if (blacklisted)
+ mdsc->fsc->blacklisted = true;
wake = 2; /* for good measure */
break;
@@ -3998,7 +4022,27 @@ static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
mutex_unlock(&mdsc->mutex);
}
+static void maybe_recover_session(struct ceph_mds_client *mdsc)
+{
+ struct ceph_fs_client *fsc = mdsc->fsc;
+
+ if (!ceph_test_mount_opt(fsc, CLEANRECOVER))
+ return;
+
+ if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED)
+ return;
+
+ if (!READ_ONCE(fsc->blacklisted))
+ return;
+
+ if (fsc->last_auto_reconnect &&
+ time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
+ return;
+ pr_info("auto reconnect after blacklisted\n");
+ fsc->last_auto_reconnect = jiffies;
+ ceph_force_reconnect(fsc->sb);
+}
/*
* delayed work -- periodically trim expired leases, renew caps with mds
@@ -4044,7 +4088,9 @@ static void delayed_work(struct work_struct *work)
pr_info("mds%d hung\n", s->s_mds);
}
}
- if (s->s_state < CEPH_MDS_SESSION_OPEN) {
+ if (s->s_state == CEPH_MDS_SESSION_NEW ||
+ s->s_state == CEPH_MDS_SESSION_RESTARTING ||
+ s->s_state == CEPH_MDS_SESSION_REJECTED) {
/* this mds is failed or recovering, just wait */
ceph_put_mds_session(s);
continue;
@@ -4072,6 +4118,8 @@ static void delayed_work(struct work_struct *work)
ceph_trim_snapid_map(mdsc);
+ maybe_recover_session(mdsc);
+
schedule_delayed(mdsc);
}
@@ -4355,7 +4403,12 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
session = __ceph_lookup_mds_session(mdsc, mds);
if (!session)
continue;
+
+ if (session->s_state == CEPH_MDS_SESSION_REJECTED)
+ __unregister_session(mdsc, session);
+ __wake_requests(mdsc, &session->s_waiting);
mutex_unlock(&mdsc->mutex);
+
mutex_lock(&session->s_mutex);
__close_session(mdsc, session);
if (session->s_state == CEPH_MDS_SESSION_CLOSING) {
@@ -4364,6 +4417,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
}
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
+
mutex_lock(&mdsc->mutex);
kick_requests(mdsc, mds);
}