12522fe45SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2e7fd4179SDavid Teigland /******************************************************************************
3e7fd4179SDavid Teigland *******************************************************************************
4e7fd4179SDavid Teigland **
5e7fd4179SDavid Teigland ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
660f98d18SDavid Teigland ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7e7fd4179SDavid Teigland **
8e7fd4179SDavid Teigland **
9e7fd4179SDavid Teigland *******************************************************************************
10e7fd4179SDavid Teigland ******************************************************************************/
11e7fd4179SDavid Teigland
12e7fd4179SDavid Teigland #include "dlm_internal.h"
13e7fd4179SDavid Teigland #include "lockspace.h"
14e7fd4179SDavid Teigland #include "member.h"
15e7fd4179SDavid Teigland #include "dir.h"
16e7fd4179SDavid Teigland #include "ast.h"
17e7fd4179SDavid Teigland #include "recover.h"
18e7fd4179SDavid Teigland #include "lowcomms.h"
19e7fd4179SDavid Teigland #include "lock.h"
20e7fd4179SDavid Teigland #include "requestqueue.h"
21e7fd4179SDavid Teigland #include "recoverd.h"
22e7fd4179SDavid Teigland
23e7fd4179SDavid Teigland
24e7fd4179SDavid Teigland /* If the start for which we're re-enabling locking (seq) has been superseded
25c36258b5SDavid Teigland by a newer stop (ls_recover_seq), we need to leave locking disabled.
26c36258b5SDavid Teigland
27c36258b5SDavid Teigland We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
28c36258b5SDavid Teigland locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
29c36258b5SDavid Teigland enables locking and clears the requestqueue between a and b. */
30e7fd4179SDavid Teigland
enable_locking(struct dlm_ls * ls,uint64_t seq)31e7fd4179SDavid Teigland static int enable_locking(struct dlm_ls *ls, uint64_t seq)
32e7fd4179SDavid Teigland {
33e7fd4179SDavid Teigland int error = -EINTR;
34e7fd4179SDavid Teigland
35c36258b5SDavid Teigland down_write(&ls->ls_recv_active);
36c36258b5SDavid Teigland
37e7fd4179SDavid Teigland spin_lock(&ls->ls_recover_lock);
38e7fd4179SDavid Teigland if (ls->ls_recover_seq == seq) {
39e7fd4179SDavid Teigland set_bit(LSFL_RUNNING, &ls->ls_flags);
40c36258b5SDavid Teigland /* unblocks processes waiting to enter the dlm */
41e7fd4179SDavid Teigland up_write(&ls->ls_in_recovery);
42475f230cSDavid Teigland clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
43e7fd4179SDavid Teigland error = 0;
44e7fd4179SDavid Teigland }
45e7fd4179SDavid Teigland spin_unlock(&ls->ls_recover_lock);
46c36258b5SDavid Teigland
47c36258b5SDavid Teigland up_write(&ls->ls_recv_active);
48e7fd4179SDavid Teigland return error;
49e7fd4179SDavid Teigland }
50e7fd4179SDavid Teigland
ls_recover(struct dlm_ls * ls,struct dlm_recover * rv)51e7fd4179SDavid Teigland static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
52e7fd4179SDavid Teigland {
53e7fd4179SDavid Teigland unsigned long start;
54e7fd4179SDavid Teigland int error, neg = 0;
55e7fd4179SDavid Teigland
56075f0177SDavid Teigland log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq);
57e7fd4179SDavid Teigland
5890135925SDavid Teigland mutex_lock(&ls->ls_recoverd_active);
59e7fd4179SDavid Teigland
6023e8e1aaSDavid Teigland dlm_callback_suspend(ls);
61e7fd4179SDavid Teigland
62c04fecb4SDavid Teigland dlm_clear_toss(ls);
6385f0379aSDavid Teigland
6485f0379aSDavid Teigland /*
65e7fd4179SDavid Teigland * This list of root rsb's will be the basis of most of the recovery
66e7fd4179SDavid Teigland * routines.
67e7fd4179SDavid Teigland */
68e7fd4179SDavid Teigland
69e7fd4179SDavid Teigland dlm_create_root_list(ls);
70e7fd4179SDavid Teigland
71e7fd4179SDavid Teigland /*
72e7fd4179SDavid Teigland * Add or remove nodes from the lockspace's ls_nodes list.
73ca8031d9SAlexander Aring *
74ca8031d9SAlexander Aring * Due to the fact that we must report all membership changes to lsops
75ca8031d9SAlexander Aring * or midcomms layer, it is not permitted to abort ls_recover() until
76ca8031d9SAlexander Aring * this is done.
77e7fd4179SDavid Teigland */
78e7fd4179SDavid Teigland
79e7fd4179SDavid Teigland error = dlm_recover_members(ls, rv, &neg);
80e7fd4179SDavid Teigland if (error) {
81075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_members error %d", error);
82e7fd4179SDavid Teigland goto fail;
83e7fd4179SDavid Teigland }
84f95a34c6SDavid Teigland
85c04fecb4SDavid Teigland dlm_recover_dir_nodeid(ls);
86c04fecb4SDavid Teigland
87c04fecb4SDavid Teigland ls->ls_recover_dir_sent_res = 0;
88c04fecb4SDavid Teigland ls->ls_recover_dir_sent_msg = 0;
894875647aSDavid Teigland ls->ls_recover_locks_in = 0;
904875647aSDavid Teigland
91f95a34c6SDavid Teigland dlm_set_recover_status(ls, DLM_RS_NODES);
92f95a34c6SDavid Teigland
93*c4f4e135SAlexander Aring error = dlm_recover_members_wait(ls, rv->seq);
94f95a34c6SDavid Teigland if (error) {
95075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_members_wait error %d", error);
96f95a34c6SDavid Teigland goto fail;
97f95a34c6SDavid Teigland }
98f95a34c6SDavid Teigland
99e7fd4179SDavid Teigland start = jiffies;
100e7fd4179SDavid Teigland
101e7fd4179SDavid Teigland /*
102e7fd4179SDavid Teigland * Rebuild our own share of the directory by collecting from all other
103e7fd4179SDavid Teigland * nodes their master rsb names that hash to us.
104e7fd4179SDavid Teigland */
105e7fd4179SDavid Teigland
106*c4f4e135SAlexander Aring error = dlm_recover_directory(ls, rv->seq);
107e7fd4179SDavid Teigland if (error) {
108075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_directory error %d", error);
109e7fd4179SDavid Teigland goto fail;
110e7fd4179SDavid Teigland }
111e7fd4179SDavid Teigland
112f95a34c6SDavid Teigland dlm_set_recover_status(ls, DLM_RS_DIR);
113e7fd4179SDavid Teigland
114*c4f4e135SAlexander Aring error = dlm_recover_directory_wait(ls, rv->seq);
115e7fd4179SDavid Teigland if (error) {
116075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_directory_wait error %d", error);
117e7fd4179SDavid Teigland goto fail;
118e7fd4179SDavid Teigland }
119e7fd4179SDavid Teigland
120075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_directory %u out %u messages",
121c04fecb4SDavid Teigland ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg);
122c04fecb4SDavid Teigland
123e7fd4179SDavid Teigland /*
124e7fd4179SDavid Teigland * We may have outstanding operations that are waiting for a reply from
125e7fd4179SDavid Teigland * a failed node. Mark these to be resent after recovery. Unlock and
126e7fd4179SDavid Teigland * cancel ops can just be completed.
127e7fd4179SDavid Teigland */
128e7fd4179SDavid Teigland
129e7fd4179SDavid Teigland dlm_recover_waiters_pre(ls);
130e7fd4179SDavid Teigland
131e10249b1SAlexander Aring if (dlm_recovery_stopped(ls)) {
132aee742c9SAlexander Aring error = -EINTR;
133e7fd4179SDavid Teigland goto fail;
134aee742c9SAlexander Aring }
135e7fd4179SDavid Teigland
136e7fd4179SDavid Teigland if (neg || dlm_no_directory(ls)) {
137e7fd4179SDavid Teigland /*
138e7fd4179SDavid Teigland * Clear lkb's for departed nodes.
139e7fd4179SDavid Teigland */
140e7fd4179SDavid Teigland
1414875647aSDavid Teigland dlm_recover_purge(ls);
142e7fd4179SDavid Teigland
143e7fd4179SDavid Teigland /*
144e7fd4179SDavid Teigland * Get new master nodeid's for rsb's that were mastered on
145e7fd4179SDavid Teigland * departed nodes.
146e7fd4179SDavid Teigland */
147e7fd4179SDavid Teigland
148*c4f4e135SAlexander Aring error = dlm_recover_masters(ls, rv->seq);
149e7fd4179SDavid Teigland if (error) {
150075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_masters error %d", error);
151e7fd4179SDavid Teigland goto fail;
152e7fd4179SDavid Teigland }
153e7fd4179SDavid Teigland
154e7fd4179SDavid Teigland /*
155e7fd4179SDavid Teigland * Send our locks on remastered rsb's to the new masters.
156e7fd4179SDavid Teigland */
157e7fd4179SDavid Teigland
158*c4f4e135SAlexander Aring error = dlm_recover_locks(ls, rv->seq);
159e7fd4179SDavid Teigland if (error) {
160075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_locks error %d", error);
161e7fd4179SDavid Teigland goto fail;
162e7fd4179SDavid Teigland }
163e7fd4179SDavid Teigland
164f95a34c6SDavid Teigland dlm_set_recover_status(ls, DLM_RS_LOCKS);
165f95a34c6SDavid Teigland
166*c4f4e135SAlexander Aring error = dlm_recover_locks_wait(ls, rv->seq);
167e7fd4179SDavid Teigland if (error) {
168075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
169e7fd4179SDavid Teigland goto fail;
170e7fd4179SDavid Teigland }
171e7fd4179SDavid Teigland
172075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_locks %u in",
1734875647aSDavid Teigland ls->ls_recover_locks_in);
1744875647aSDavid Teigland
175e7fd4179SDavid Teigland /*
176e7fd4179SDavid Teigland * Finalize state in master rsb's now that all locks can be
177e7fd4179SDavid Teigland * checked. This includes conversion resolution and lvb
178e7fd4179SDavid Teigland * settings.
179e7fd4179SDavid Teigland */
180e7fd4179SDavid Teigland
181e7fd4179SDavid Teigland dlm_recover_rsbs(ls);
18291c0dc93SDavid Teigland } else {
18391c0dc93SDavid Teigland /*
18491c0dc93SDavid Teigland * Other lockspace members may be going through the "neg" steps
18591c0dc93SDavid Teigland * while also adding us to the lockspace, in which case they'll
1864b77f2c9SDavid Teigland * be doing the recover_locks (RS_LOCKS) barrier.
18791c0dc93SDavid Teigland */
18891c0dc93SDavid Teigland dlm_set_recover_status(ls, DLM_RS_LOCKS);
1894b77f2c9SDavid Teigland
190*c4f4e135SAlexander Aring error = dlm_recover_locks_wait(ls, rv->seq);
1914b77f2c9SDavid Teigland if (error) {
192075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
1934b77f2c9SDavid Teigland goto fail;
1944b77f2c9SDavid Teigland }
195e7fd4179SDavid Teigland }
196e7fd4179SDavid Teigland
197e7fd4179SDavid Teigland dlm_release_root_list(ls);
198e7fd4179SDavid Teigland
1992896ee37SDavid Teigland /*
2002896ee37SDavid Teigland * Purge directory-related requests that are saved in requestqueue.
2012896ee37SDavid Teigland * All dir requests from before recovery are invalid now due to the dir
2022896ee37SDavid Teigland * rebuild and will be resent by the requesting nodes.
2032896ee37SDavid Teigland */
2042896ee37SDavid Teigland
2052896ee37SDavid Teigland dlm_purge_requestqueue(ls);
2062896ee37SDavid Teigland
207e7fd4179SDavid Teigland dlm_set_recover_status(ls, DLM_RS_DONE);
208f95a34c6SDavid Teigland
209*c4f4e135SAlexander Aring error = dlm_recover_done_wait(ls, rv->seq);
210e7fd4179SDavid Teigland if (error) {
211075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_done_wait error %d", error);
212e7fd4179SDavid Teigland goto fail;
213e7fd4179SDavid Teigland }
214e7fd4179SDavid Teigland
215e7fd4179SDavid Teigland dlm_clear_members_gone(ls);
216e7fd4179SDavid Teigland
21723e8e1aaSDavid Teigland dlm_callback_resume(ls);
21823e8e1aaSDavid Teigland
219e7fd4179SDavid Teigland error = enable_locking(ls, rv->seq);
220e7fd4179SDavid Teigland if (error) {
221075f0177SDavid Teigland log_rinfo(ls, "enable_locking error %d", error);
222e7fd4179SDavid Teigland goto fail;
223e7fd4179SDavid Teigland }
224e7fd4179SDavid Teigland
225e7fd4179SDavid Teigland error = dlm_process_requestqueue(ls);
226e7fd4179SDavid Teigland if (error) {
227075f0177SDavid Teigland log_rinfo(ls, "dlm_process_requestqueue error %d", error);
228e7fd4179SDavid Teigland goto fail;
229e7fd4179SDavid Teigland }
230e7fd4179SDavid Teigland
231e7fd4179SDavid Teigland error = dlm_recover_waiters_post(ls);
232e7fd4179SDavid Teigland if (error) {
233075f0177SDavid Teigland log_rinfo(ls, "dlm_recover_waiters_post error %d", error);
234e7fd4179SDavid Teigland goto fail;
235e7fd4179SDavid Teigland }
236e7fd4179SDavid Teigland
2374875647aSDavid Teigland dlm_recover_grant(ls);
238e7fd4179SDavid Teigland
239075f0177SDavid Teigland log_rinfo(ls, "dlm_recover %llu generation %u done: %u ms",
24060f98d18SDavid Teigland (unsigned long long)rv->seq, ls->ls_generation,
241e7fd4179SDavid Teigland jiffies_to_msecs(jiffies - start));
24290135925SDavid Teigland mutex_unlock(&ls->ls_recoverd_active);
243e7fd4179SDavid Teigland
244e7fd4179SDavid Teigland return 0;
245e7fd4179SDavid Teigland
246e7fd4179SDavid Teigland fail:
247e7fd4179SDavid Teigland dlm_release_root_list(ls);
24890135925SDavid Teigland mutex_unlock(&ls->ls_recoverd_active);
249682bb91bSAlexander Aring
250e7fd4179SDavid Teigland return error;
251e7fd4179SDavid Teigland }
252e7fd4179SDavid Teigland
2532cdc98aaSDavid Teigland /* The dlm_ls_start() that created the rv we take here may already have been
2542cdc98aaSDavid Teigland stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
2552cdc98aaSDavid Teigland flag set. */
2562cdc98aaSDavid Teigland
do_ls_recovery(struct dlm_ls * ls)257e7fd4179SDavid Teigland static void do_ls_recovery(struct dlm_ls *ls)
258e7fd4179SDavid Teigland {
259e7fd4179SDavid Teigland struct dlm_recover *rv = NULL;
2603182599fSAlexander Aring int error;
261e7fd4179SDavid Teigland
262e7fd4179SDavid Teigland spin_lock(&ls->ls_recover_lock);
263e7fd4179SDavid Teigland rv = ls->ls_recover_args;
264e7fd4179SDavid Teigland ls->ls_recover_args = NULL;
2652cdc98aaSDavid Teigland if (rv && ls->ls_recover_seq == rv->seq)
266475f230cSDavid Teigland clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
267e7fd4179SDavid Teigland spin_unlock(&ls->ls_recover_lock);
268e7fd4179SDavid Teigland
269e7fd4179SDavid Teigland if (rv) {
2703182599fSAlexander Aring error = ls_recover(ls, rv);
2713182599fSAlexander Aring switch (error) {
2723182599fSAlexander Aring case 0:
2733182599fSAlexander Aring ls->ls_recovery_result = 0;
2743182599fSAlexander Aring complete(&ls->ls_recovery_done);
2753182599fSAlexander Aring
2763182599fSAlexander Aring dlm_lsop_recover_done(ls);
2773182599fSAlexander Aring break;
2783182599fSAlexander Aring case -EINTR:
2793182599fSAlexander Aring /* if recovery was interrupted -EINTR we wait for the next
2803182599fSAlexander Aring * ls_recover() iteration until it hopefully succeeds.
2813182599fSAlexander Aring */
2823182599fSAlexander Aring log_rinfo(ls, "%s %llu interrupted and should be queued to run again",
2833182599fSAlexander Aring __func__, (unsigned long long)rv->seq);
2843182599fSAlexander Aring break;
2853182599fSAlexander Aring default:
2863182599fSAlexander Aring log_rinfo(ls, "%s %llu error %d", __func__,
2873182599fSAlexander Aring (unsigned long long)rv->seq, error);
2883182599fSAlexander Aring
2893182599fSAlexander Aring /* let new_lockspace() get aware of critical error */
2903182599fSAlexander Aring ls->ls_recovery_result = error;
2913182599fSAlexander Aring complete(&ls->ls_recovery_done);
2923182599fSAlexander Aring break;
2933182599fSAlexander Aring }
2943182599fSAlexander Aring
29560f98d18SDavid Teigland kfree(rv->nodes);
296e7fd4179SDavid Teigland kfree(rv);
297e7fd4179SDavid Teigland }
298e7fd4179SDavid Teigland }
299e7fd4179SDavid Teigland
dlm_recoverd(void * arg)300e7fd4179SDavid Teigland static int dlm_recoverd(void *arg)
301e7fd4179SDavid Teigland {
302e7fd4179SDavid Teigland struct dlm_ls *ls;
303e7fd4179SDavid Teigland
304e7fd4179SDavid Teigland ls = dlm_find_lockspace_local(arg);
3055f88f1eaSDavid Teigland if (!ls) {
3065f88f1eaSDavid Teigland log_print("dlm_recoverd: no lockspace %p", arg);
3075f88f1eaSDavid Teigland return -1;
3085f88f1eaSDavid Teigland }
309e7fd4179SDavid Teigland
310475f230cSDavid Teigland down_write(&ls->ls_in_recovery);
311475f230cSDavid Teigland set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
312475f230cSDavid Teigland wake_up(&ls->ls_recover_lock_wait);
313475f230cSDavid Teigland
314e412f920Stsutomu.owa@toshiba.co.jp while (1) {
315e412f920Stsutomu.owa@toshiba.co.jp /*
316e412f920Stsutomu.owa@toshiba.co.jp * We call kthread_should_stop() after set_current_state().
317e412f920Stsutomu.owa@toshiba.co.jp * This is because it works correctly if kthread_stop() is
318e412f920Stsutomu.owa@toshiba.co.jp * called just before set_current_state().
319e412f920Stsutomu.owa@toshiba.co.jp */
320e7fd4179SDavid Teigland set_current_state(TASK_INTERRUPTIBLE);
321e412f920Stsutomu.owa@toshiba.co.jp if (kthread_should_stop()) {
322e412f920Stsutomu.owa@toshiba.co.jp set_current_state(TASK_RUNNING);
323e412f920Stsutomu.owa@toshiba.co.jp break;
324e412f920Stsutomu.owa@toshiba.co.jp }
325475f230cSDavid Teigland if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
3269e1b0211SGuoqing Jiang !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
3279e1b0211SGuoqing Jiang if (kthread_should_stop())
3289e1b0211SGuoqing Jiang break;
329e7fd4179SDavid Teigland schedule();
3309e1b0211SGuoqing Jiang }
331e7fd4179SDavid Teigland set_current_state(TASK_RUNNING);
332e7fd4179SDavid Teigland
333475f230cSDavid Teigland if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
334475f230cSDavid Teigland down_write(&ls->ls_in_recovery);
335475f230cSDavid Teigland set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
336475f230cSDavid Teigland wake_up(&ls->ls_recover_lock_wait);
337475f230cSDavid Teigland }
338475f230cSDavid Teigland
339475f230cSDavid Teigland if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
340e7fd4179SDavid Teigland do_ls_recovery(ls);
341e7fd4179SDavid Teigland }
342e7fd4179SDavid Teigland
343475f230cSDavid Teigland if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
344475f230cSDavid Teigland up_write(&ls->ls_in_recovery);
345475f230cSDavid Teigland
346e7fd4179SDavid Teigland dlm_put_lockspace(ls);
347e7fd4179SDavid Teigland return 0;
348e7fd4179SDavid Teigland }
349e7fd4179SDavid Teigland
dlm_recoverd_start(struct dlm_ls * ls)350e7fd4179SDavid Teigland int dlm_recoverd_start(struct dlm_ls *ls)
351e7fd4179SDavid Teigland {
352e7fd4179SDavid Teigland struct task_struct *p;
353e7fd4179SDavid Teigland int error = 0;
354e7fd4179SDavid Teigland
355e7fd4179SDavid Teigland p = kthread_run(dlm_recoverd, ls, "dlm_recoverd");
356e7fd4179SDavid Teigland if (IS_ERR(p))
357e7fd4179SDavid Teigland error = PTR_ERR(p);
358e7fd4179SDavid Teigland else
359e7fd4179SDavid Teigland ls->ls_recoverd_task = p;
360e7fd4179SDavid Teigland return error;
361e7fd4179SDavid Teigland }
362e7fd4179SDavid Teigland
dlm_recoverd_stop(struct dlm_ls * ls)363e7fd4179SDavid Teigland void dlm_recoverd_stop(struct dlm_ls *ls)
364e7fd4179SDavid Teigland {
365e7fd4179SDavid Teigland kthread_stop(ls->ls_recoverd_task);
366e7fd4179SDavid Teigland }
367e7fd4179SDavid Teigland
dlm_recoverd_suspend(struct dlm_ls * ls)368e7fd4179SDavid Teigland void dlm_recoverd_suspend(struct dlm_ls *ls)
369e7fd4179SDavid Teigland {
370f6db1b8eSDavid Teigland wake_up(&ls->ls_wait_general);
37190135925SDavid Teigland mutex_lock(&ls->ls_recoverd_active);
372e7fd4179SDavid Teigland }
373e7fd4179SDavid Teigland
dlm_recoverd_resume(struct dlm_ls * ls)374e7fd4179SDavid Teigland void dlm_recoverd_resume(struct dlm_ls *ls)
375e7fd4179SDavid Teigland {
37690135925SDavid Teigland mutex_unlock(&ls->ls_recoverd_active);
377e7fd4179SDavid Teigland }
378e7fd4179SDavid Teigland
379