Commit | Line | Data |
---|---|---|
e7fd4179 DT |
1 | /****************************************************************************** |
2 | ******************************************************************************* | |
3 | ** | |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
60f98d18 | 5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. |
e7fd4179 DT |
6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | |
9 | ** of the GNU General Public License v.2. | |
10 | ** | |
11 | ******************************************************************************* | |
12 | ******************************************************************************/ | |
13 | ||
14 | #include "dlm_internal.h" | |
15 | #include "lockspace.h" | |
16 | #include "member.h" | |
17 | #include "dir.h" | |
18 | #include "ast.h" | |
19 | #include "recover.h" | |
20 | #include "lowcomms.h" | |
21 | #include "lock.h" | |
22 | #include "requestqueue.h" | |
23 | #include "recoverd.h" | |
24 | ||
25 | ||
26 | /* If the start for which we're re-enabling locking (seq) has been superseded | |
c36258b5 DT |
27 | by a newer stop (ls_recover_seq), we need to leave locking disabled. |
28 | ||
29 | We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees | |
30 | locking stopped and b) adds a message to the requestqueue, but dlm_recoverd | |
31 | enables locking and clears the requestqueue between a and b. */ | |
e7fd4179 DT |
32 | |
33 | static int enable_locking(struct dlm_ls *ls, uint64_t seq) | |
34 | { | |
35 | int error = -EINTR; | |
36 | ||
c36258b5 DT |
37 | down_write(&ls->ls_recv_active); |
38 | ||
e7fd4179 DT |
39 | spin_lock(&ls->ls_recover_lock); |
40 | if (ls->ls_recover_seq == seq) { | |
41 | set_bit(LSFL_RUNNING, &ls->ls_flags); | |
c36258b5 | 42 | /* unblocks processes waiting to enter the dlm */ |
e7fd4179 DT |
43 | up_write(&ls->ls_in_recovery); |
44 | error = 0; | |
45 | } | |
46 | spin_unlock(&ls->ls_recover_lock); | |
c36258b5 DT |
47 | |
48 | up_write(&ls->ls_recv_active); | |
e7fd4179 DT |
49 | return error; |
50 | } | |
51 | ||
52 | static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |
53 | { | |
54 | unsigned long start; | |
55 | int error, neg = 0; | |
56 | ||
6d40c4a7 | 57 | log_debug(ls, "dlm_recover %llu", (unsigned long long)rv->seq); |
e7fd4179 | 58 | |
90135925 | 59 | mutex_lock(&ls->ls_recoverd_active); |
e7fd4179 | 60 | |
23e8e1aa | 61 | dlm_callback_suspend(ls); |
e7fd4179 | 62 | |
c04fecb4 | 63 | dlm_clear_toss(ls); |
e7fd4179 DT |
64 | |
65 | /* | |
85f0379a DT |
66 | * This list of root rsb's will be the basis of most of the recovery |
67 | * routines. | |
e7fd4179 DT |
68 | */ |
69 | ||
85f0379a | 70 | dlm_create_root_list(ls); |
e7fd4179 DT |
71 | |
72 | /* | |
73 | * Add or remove nodes from the lockspace's ls_nodes list. | |
e7fd4179 DT |
74 | */ |
75 | ||
76 | error = dlm_recover_members(ls, rv, &neg); | |
77 | if (error) { | |
f95a34c6 | 78 | log_debug(ls, "dlm_recover_members error %d", error); |
e7fd4179 DT |
79 | goto fail; |
80 | } | |
f95a34c6 | 81 | |
c04fecb4 DT |
82 | dlm_recover_dir_nodeid(ls); |
83 | ||
84 | ls->ls_recover_dir_sent_res = 0; | |
85 | ls->ls_recover_dir_sent_msg = 0; | |
4875647a DT |
86 | ls->ls_recover_locks_in = 0; |
87 | ||
f95a34c6 DT |
88 | dlm_set_recover_status(ls, DLM_RS_NODES); |
89 | ||
90 | error = dlm_recover_members_wait(ls); | |
91 | if (error) { | |
92 | log_debug(ls, "dlm_recover_members_wait error %d", error); | |
93 | goto fail; | |
94 | } | |
95 | ||
e7fd4179 DT |
96 | start = jiffies; |
97 | ||
98 | /* | |
99 | * Rebuild our own share of the directory by collecting from all other | |
100 | * nodes their master rsb names that hash to us. | |
101 | */ | |
102 | ||
103 | error = dlm_recover_directory(ls); | |
104 | if (error) { | |
f95a34c6 | 105 | log_debug(ls, "dlm_recover_directory error %d", error); |
e7fd4179 DT |
106 | goto fail; |
107 | } | |
108 | ||
f95a34c6 | 109 | dlm_set_recover_status(ls, DLM_RS_DIR); |
e7fd4179 DT |
110 | |
111 | error = dlm_recover_directory_wait(ls); | |
112 | if (error) { | |
f95a34c6 | 113 | log_debug(ls, "dlm_recover_directory_wait error %d", error); |
e7fd4179 DT |
114 | goto fail; |
115 | } | |
116 | ||
c04fecb4 DT |
117 | log_debug(ls, "dlm_recover_directory %u out %u messages", |
118 | ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); | |
119 | ||
e7fd4179 DT |
120 | /* |
121 | * We may have outstanding operations that are waiting for a reply from | |
122 | * a failed node. Mark these to be resent after recovery. Unlock and | |
123 | * cancel ops can just be completed. | |
124 | */ | |
125 | ||
126 | dlm_recover_waiters_pre(ls); | |
127 | ||
128 | error = dlm_recovery_stopped(ls); | |
129 | if (error) | |
130 | goto fail; | |
131 | ||
132 | if (neg || dlm_no_directory(ls)) { | |
133 | /* | |
134 | * Clear lkb's for departed nodes. | |
135 | */ | |
136 | ||
4875647a | 137 | dlm_recover_purge(ls); |
e7fd4179 DT |
138 | |
139 | /* | |
140 | * Get new master nodeid's for rsb's that were mastered on | |
141 | * departed nodes. | |
142 | */ | |
143 | ||
144 | error = dlm_recover_masters(ls); | |
145 | if (error) { | |
f95a34c6 | 146 | log_debug(ls, "dlm_recover_masters error %d", error); |
e7fd4179 DT |
147 | goto fail; |
148 | } | |
149 | ||
150 | /* | |
151 | * Send our locks on remastered rsb's to the new masters. | |
152 | */ | |
153 | ||
154 | error = dlm_recover_locks(ls); | |
155 | if (error) { | |
f95a34c6 | 156 | log_debug(ls, "dlm_recover_locks error %d", error); |
e7fd4179 DT |
157 | goto fail; |
158 | } | |
159 | ||
f95a34c6 DT |
160 | dlm_set_recover_status(ls, DLM_RS_LOCKS); |
161 | ||
e7fd4179 DT |
162 | error = dlm_recover_locks_wait(ls); |
163 | if (error) { | |
f95a34c6 | 164 | log_debug(ls, "dlm_recover_locks_wait error %d", error); |
e7fd4179 DT |
165 | goto fail; |
166 | } | |
167 | ||
4875647a DT |
168 | log_debug(ls, "dlm_recover_locks %u in", |
169 | ls->ls_recover_locks_in); | |
170 | ||
e7fd4179 DT |
171 | /* |
172 | * Finalize state in master rsb's now that all locks can be | |
173 | * checked. This includes conversion resolution and lvb | |
174 | * settings. | |
175 | */ | |
176 | ||
177 | dlm_recover_rsbs(ls); | |
91c0dc93 DT |
178 | } else { |
179 | /* | |
180 | * Other lockspace members may be going through the "neg" steps | |
181 | * while also adding us to the lockspace, in which case they'll | |
4b77f2c9 | 182 | * be doing the recover_locks (RS_LOCKS) barrier. |
91c0dc93 DT |
183 | */ |
184 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | |
4b77f2c9 DT |
185 | |
186 | error = dlm_recover_locks_wait(ls); | |
187 | if (error) { | |
f95a34c6 | 188 | log_debug(ls, "dlm_recover_locks_wait error %d", error); |
4b77f2c9 DT |
189 | goto fail; |
190 | } | |
e7fd4179 DT |
191 | } |
192 | ||
193 | dlm_release_root_list(ls); | |
194 | ||
2896ee37 DT |
195 | /* |
196 | * Purge directory-related requests that are saved in requestqueue. | |
197 | * All dir requests from before recovery are invalid now due to the dir | |
198 | * rebuild and will be resent by the requesting nodes. | |
199 | */ | |
200 | ||
201 | dlm_purge_requestqueue(ls); | |
202 | ||
e7fd4179 | 203 | dlm_set_recover_status(ls, DLM_RS_DONE); |
f95a34c6 | 204 | |
e7fd4179 DT |
205 | error = dlm_recover_done_wait(ls); |
206 | if (error) { | |
f95a34c6 | 207 | log_debug(ls, "dlm_recover_done_wait error %d", error); |
e7fd4179 DT |
208 | goto fail; |
209 | } | |
210 | ||
211 | dlm_clear_members_gone(ls); | |
212 | ||
3ae1acf9 DT |
213 | dlm_adjust_timeouts(ls); |
214 | ||
23e8e1aa DT |
215 | dlm_callback_resume(ls); |
216 | ||
e7fd4179 DT |
217 | error = enable_locking(ls, rv->seq); |
218 | if (error) { | |
f95a34c6 | 219 | log_debug(ls, "enable_locking error %d", error); |
e7fd4179 DT |
220 | goto fail; |
221 | } | |
222 | ||
223 | error = dlm_process_requestqueue(ls); | |
224 | if (error) { | |
f95a34c6 | 225 | log_debug(ls, "dlm_process_requestqueue error %d", error); |
e7fd4179 DT |
226 | goto fail; |
227 | } | |
228 | ||
229 | error = dlm_recover_waiters_post(ls); | |
230 | if (error) { | |
f95a34c6 | 231 | log_debug(ls, "dlm_recover_waiters_post error %d", error); |
e7fd4179 DT |
232 | goto fail; |
233 | } | |
234 | ||
4875647a | 235 | dlm_recover_grant(ls); |
e7fd4179 | 236 | |
6d40c4a7 | 237 | log_debug(ls, "dlm_recover %llu generation %u done: %u ms", |
60f98d18 | 238 | (unsigned long long)rv->seq, ls->ls_generation, |
e7fd4179 | 239 | jiffies_to_msecs(jiffies - start)); |
90135925 | 240 | mutex_unlock(&ls->ls_recoverd_active); |
e7fd4179 | 241 | |
60f98d18 | 242 | dlm_lsop_recover_done(ls); |
e7fd4179 DT |
243 | return 0; |
244 | ||
245 | fail: | |
246 | dlm_release_root_list(ls); | |
6d40c4a7 | 247 | log_debug(ls, "dlm_recover %llu error %d", |
57adf7ee | 248 | (unsigned long long)rv->seq, error); |
90135925 | 249 | mutex_unlock(&ls->ls_recoverd_active); |
e7fd4179 DT |
250 | return error; |
251 | } | |
252 | ||
2cdc98aa DT |
253 | /* The dlm_ls_start() that created the rv we take here may already have been |
254 | stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP | |
255 | flag set. */ | |
256 | ||
e7fd4179 DT |
257 | static void do_ls_recovery(struct dlm_ls *ls) |
258 | { | |
259 | struct dlm_recover *rv = NULL; | |
260 | ||
261 | spin_lock(&ls->ls_recover_lock); | |
262 | rv = ls->ls_recover_args; | |
263 | ls->ls_recover_args = NULL; | |
2cdc98aa DT |
264 | if (rv && ls->ls_recover_seq == rv->seq) |
265 | clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | |
e7fd4179 DT |
266 | spin_unlock(&ls->ls_recover_lock); |
267 | ||
268 | if (rv) { | |
269 | ls_recover(ls, rv); | |
60f98d18 | 270 | kfree(rv->nodes); |
e7fd4179 DT |
271 | kfree(rv); |
272 | } | |
273 | } | |
274 | ||
275 | static int dlm_recoverd(void *arg) | |
276 | { | |
277 | struct dlm_ls *ls; | |
278 | ||
279 | ls = dlm_find_lockspace_local(arg); | |
5f88f1ea DT |
280 | if (!ls) { |
281 | log_print("dlm_recoverd: no lockspace %p", arg); | |
282 | return -1; | |
283 | } | |
e7fd4179 DT |
284 | |
285 | while (!kthread_should_stop()) { | |
286 | set_current_state(TASK_INTERRUPTIBLE); | |
287 | if (!test_bit(LSFL_WORK, &ls->ls_flags)) | |
288 | schedule(); | |
289 | set_current_state(TASK_RUNNING); | |
290 | ||
291 | if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) | |
292 | do_ls_recovery(ls); | |
293 | } | |
294 | ||
295 | dlm_put_lockspace(ls); | |
296 | return 0; | |
297 | } | |
298 | ||
299 | void dlm_recoverd_kick(struct dlm_ls *ls) | |
300 | { | |
301 | set_bit(LSFL_WORK, &ls->ls_flags); | |
302 | wake_up_process(ls->ls_recoverd_task); | |
303 | } | |
304 | ||
305 | int dlm_recoverd_start(struct dlm_ls *ls) | |
306 | { | |
307 | struct task_struct *p; | |
308 | int error = 0; | |
309 | ||
310 | p = kthread_run(dlm_recoverd, ls, "dlm_recoverd"); | |
311 | if (IS_ERR(p)) | |
312 | error = PTR_ERR(p); | |
313 | else | |
314 | ls->ls_recoverd_task = p; | |
315 | return error; | |
316 | } | |
317 | ||
318 | void dlm_recoverd_stop(struct dlm_ls *ls) | |
319 | { | |
320 | kthread_stop(ls->ls_recoverd_task); | |
321 | } | |
322 | ||
323 | void dlm_recoverd_suspend(struct dlm_ls *ls) | |
324 | { | |
f6db1b8e | 325 | wake_up(&ls->ls_wait_general); |
90135925 | 326 | mutex_lock(&ls->ls_recoverd_active); |
e7fd4179 DT |
327 | } |
328 | ||
329 | void dlm_recoverd_resume(struct dlm_ls *ls) | |
330 | { | |
90135925 | 331 | mutex_unlock(&ls->ls_recoverd_active); |
e7fd4179 DT |
332 | } |
333 |