From 5966121241b10a32396d770a0b39a41441511a8c Mon Sep 17 00:00:00 2001 From: "tsutomu.owa@toshiba.co.jp" Date: Tue, 12 Sep 2017 08:56:08 +0000 Subject: [PATCH] DLM: retry rcom when dlm_wait_function is timed out. If a node sends a DLM_RCOM_STATUS command and an error occurs on the receiving side, the DLM_RCOM_STATUS_REPLY response may not be returned. We retransmitted the DLM_RCOM_STATUS command so that we do not wait for an infinite response. Signed-off-by: Tadashi Miyauchi Signed-off-by: Tsutomu Owa Signed-off-by: David Teigland --- fs/dlm/rcom.c | 6 ++++++ fs/dlm/recover.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index f3f5e72a29ba..4ff061de927e 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -155,6 +155,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) goto out; } +retry: error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, sizeof(struct rcom_status), &rc, &mh); if (error) @@ -169,6 +170,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) error = dlm_wait_function(ls, &rcom_response); disallow_sync_reply(ls); + if (error == -ETIMEDOUT) + goto retry; if (error) goto out; @@ -276,6 +279,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) ls->ls_recover_nodeid = nodeid; +retry: error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh); if (error) goto out; @@ -288,6 +292,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) error = dlm_wait_function(ls, &rcom_response); disallow_sync_reply(ls); + if (error == -ETIMEDOUT) + goto retry; out: return error; } diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index eaea789bf97d..ce2aa54ca2e2 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -52,6 +52,10 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) dlm_config.ci_recover_timer * HZ); if (rv) break; + if (test_bit(LSFL_RCOM_WAIT, &ls->ls_flags)) { + log_debug(ls, "dlm_wait_function timed out"); + return -ETIMEDOUT; + } } if (dlm_recovery_stopped(ls)) {