Commit | Line | Data |
---|---|---|
8e854e9c GR |
1 | /* |
2 | * Copyright (C) 2015, SUSE | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2, or (at your option) | |
7 | * any later version. | |
8 | * | |
9 | */ | |
10 | ||
11 | ||
12 | #include <linux/module.h> | |
47741b7c GR |
13 | #include <linux/dlm.h> |
14 | #include <linux/sched.h> | |
15 | #include "md.h" | |
edb39c9d | 16 | #include "md-cluster.h" |
47741b7c GR |
17 | |
18 | #define LVB_SIZE 64 | |
19 | ||
20 | struct dlm_lock_resource { | |
21 | dlm_lockspace_t *ls; | |
22 | struct dlm_lksb lksb; | |
23 | char *name; /* lock name. */ | |
24 | uint32_t flags; /* flags to pass to dlm_lock() */ | |
47741b7c | 25 | struct completion completion; /* completion for synchronized locking */ |
c4ce867f GR |
26 | void (*bast)(void *arg, int mode); /* blocking AST function pointer*/ |
27 | struct mddev *mddev; /* pointing back to mddev. */ | |
28 | }; | |
29 | ||
30 | struct md_cluster_info { | |
31 | /* dlm lock space and resources for clustered raid. */ | |
32 | dlm_lockspace_t *lockspace; | |
cf921cc1 GR |
33 | int slot_number; |
34 | struct completion completion; | |
c4ce867f GR |
35 | struct dlm_lock_resource *sb_lock; |
36 | struct mutex sb_mutex; | |
47741b7c GR |
37 | }; |
38 | ||
39 | static void sync_ast(void *arg) | |
40 | { | |
41 | struct dlm_lock_resource *res; | |
42 | ||
43 | res = (struct dlm_lock_resource *) arg; | |
44 | complete(&res->completion); | |
45 | } | |
46 | ||
47 | static int dlm_lock_sync(struct dlm_lock_resource *res, int mode) | |
48 | { | |
49 | int ret = 0; | |
50 | ||
51 | init_completion(&res->completion); | |
52 | ret = dlm_lock(res->ls, mode, &res->lksb, | |
53 | res->flags, res->name, strlen(res->name), | |
54 | 0, sync_ast, res, res->bast); | |
55 | if (ret) | |
56 | return ret; | |
57 | wait_for_completion(&res->completion); | |
58 | return res->lksb.sb_status; | |
59 | } | |
60 | ||
61 | static int dlm_unlock_sync(struct dlm_lock_resource *res) | |
62 | { | |
63 | return dlm_lock_sync(res, DLM_LOCK_NL); | |
64 | } | |
65 | ||
c4ce867f | 66 | static struct dlm_lock_resource *lockres_init(struct mddev *mddev, |
47741b7c GR |
67 | char *name, void (*bastfn)(void *arg, int mode), int with_lvb) |
68 | { | |
69 | struct dlm_lock_resource *res = NULL; | |
70 | int ret, namelen; | |
c4ce867f | 71 | struct md_cluster_info *cinfo = mddev->cluster_info; |
47741b7c GR |
72 | |
73 | res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL); | |
74 | if (!res) | |
75 | return NULL; | |
c4ce867f GR |
76 | res->ls = cinfo->lockspace; |
77 | res->mddev = mddev; | |
47741b7c GR |
78 | namelen = strlen(name); |
79 | res->name = kzalloc(namelen + 1, GFP_KERNEL); | |
80 | if (!res->name) { | |
81 | pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name); | |
82 | goto out_err; | |
83 | } | |
84 | strlcpy(res->name, name, namelen + 1); | |
85 | if (with_lvb) { | |
86 | res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL); | |
87 | if (!res->lksb.sb_lvbptr) { | |
88 | pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name); | |
89 | goto out_err; | |
90 | } | |
91 | res->flags = DLM_LKF_VALBLK; | |
92 | } | |
93 | ||
94 | if (bastfn) | |
95 | res->bast = bastfn; | |
96 | ||
97 | res->flags |= DLM_LKF_EXPEDITE; | |
98 | ||
99 | ret = dlm_lock_sync(res, DLM_LOCK_NL); | |
100 | if (ret) { | |
101 | pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name); | |
102 | goto out_err; | |
103 | } | |
104 | res->flags &= ~DLM_LKF_EXPEDITE; | |
105 | res->flags |= DLM_LKF_CONVERT; | |
106 | ||
107 | return res; | |
108 | out_err: | |
109 | kfree(res->lksb.sb_lvbptr); | |
110 | kfree(res->name); | |
111 | kfree(res); | |
112 | return NULL; | |
113 | } | |
114 | ||
115 | static void lockres_free(struct dlm_lock_resource *res) | |
116 | { | |
117 | if (!res) | |
118 | return; | |
119 | ||
120 | init_completion(&res->completion); | |
121 | dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res); | |
122 | wait_for_completion(&res->completion); | |
123 | ||
124 | kfree(res->name); | |
125 | kfree(res->lksb.sb_lvbptr); | |
126 | kfree(res); | |
127 | } | |
8e854e9c | 128 | |
c4ce867f GR |
129 | static char *pretty_uuid(char *dest, char *src) |
130 | { | |
131 | int i, len = 0; | |
132 | ||
133 | for (i = 0; i < 16; i++) { | |
134 | if (i == 4 || i == 6 || i == 8 || i == 10) | |
135 | len += sprintf(dest + len, "-"); | |
136 | len += sprintf(dest + len, "%02x", (__u8)src[i]); | |
137 | } | |
138 | return dest; | |
139 | } | |
140 | ||
cf921cc1 GR |
141 | static void recover_prep(void *arg) |
142 | { | |
143 | } | |
144 | ||
145 | static void recover_slot(void *arg, struct dlm_slot *slot) | |
146 | { | |
147 | struct mddev *mddev = arg; | |
148 | struct md_cluster_info *cinfo = mddev->cluster_info; | |
149 | ||
150 | pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n", | |
151 | mddev->bitmap_info.cluster_name, | |
152 | slot->nodeid, slot->slot, | |
153 | cinfo->slot_number); | |
154 | } | |
155 | ||
156 | static void recover_done(void *arg, struct dlm_slot *slots, | |
157 | int num_slots, int our_slot, | |
158 | uint32_t generation) | |
159 | { | |
160 | struct mddev *mddev = arg; | |
161 | struct md_cluster_info *cinfo = mddev->cluster_info; | |
162 | ||
163 | cinfo->slot_number = our_slot; | |
164 | complete(&cinfo->completion); | |
165 | } | |
166 | ||
167 | static const struct dlm_lockspace_ops md_ls_ops = { | |
168 | .recover_prep = recover_prep, | |
169 | .recover_slot = recover_slot, | |
170 | .recover_done = recover_done, | |
171 | }; | |
172 | ||
edb39c9d GR |
173 | static int join(struct mddev *mddev, int nodes) |
174 | { | |
c4ce867f | 175 | struct md_cluster_info *cinfo; |
cf921cc1 | 176 | int ret, ops_rv; |
c4ce867f GR |
177 | char str[64]; |
178 | ||
179 | if (!try_module_get(THIS_MODULE)) | |
180 | return -ENOENT; | |
181 | ||
182 | cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL); | |
183 | if (!cinfo) | |
184 | return -ENOMEM; | |
185 | ||
cf921cc1 GR |
186 | init_completion(&cinfo->completion); |
187 | ||
188 | mutex_init(&cinfo->sb_mutex); | |
189 | mddev->cluster_info = cinfo; | |
190 | ||
c4ce867f GR |
191 | memset(str, 0, 64); |
192 | pretty_uuid(str, mddev->uuid); | |
cf921cc1 GR |
193 | ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name, |
194 | DLM_LSFL_FS, LVB_SIZE, | |
195 | &md_ls_ops, mddev, &ops_rv, &cinfo->lockspace); | |
c4ce867f GR |
196 | if (ret) |
197 | goto err; | |
cf921cc1 | 198 | wait_for_completion(&cinfo->completion); |
b97e9257 GR |
199 | if (nodes <= cinfo->slot_number) { |
200 | pr_err("md-cluster: Slot allotted(%d) greater than available slots(%d)", cinfo->slot_number - 1, | |
201 | nodes); | |
202 | ret = -ERANGE; | |
203 | goto err; | |
204 | } | |
c4ce867f GR |
205 | cinfo->sb_lock = lockres_init(mddev, "cmd-super", |
206 | NULL, 0); | |
207 | if (!cinfo->sb_lock) { | |
208 | ret = -ENOMEM; | |
209 | goto err; | |
210 | } | |
edb39c9d | 211 | return 0; |
c4ce867f GR |
212 | err: |
213 | if (cinfo->lockspace) | |
214 | dlm_release_lockspace(cinfo->lockspace, 2); | |
cf921cc1 | 215 | mddev->cluster_info = NULL; |
c4ce867f GR |
216 | kfree(cinfo); |
217 | module_put(THIS_MODULE); | |
218 | return ret; | |
edb39c9d GR |
219 | } |
220 | ||
221 | static int leave(struct mddev *mddev) | |
222 | { | |
c4ce867f GR |
223 | struct md_cluster_info *cinfo = mddev->cluster_info; |
224 | ||
225 | if (!cinfo) | |
226 | return 0; | |
227 | lockres_free(cinfo->sb_lock); | |
228 | dlm_release_lockspace(cinfo->lockspace, 2); | |
edb39c9d GR |
229 | return 0; |
230 | } | |
231 | ||
cf921cc1 GR |
232 | /* slot_number(): Returns the MD slot number to use |
233 | * DLM starts the slot numbers from 1, wheras cluster-md | |
234 | * wants the number to be from zero, so we deduct one | |
235 | */ | |
236 | static int slot_number(struct mddev *mddev) | |
237 | { | |
238 | struct md_cluster_info *cinfo = mddev->cluster_info; | |
239 | ||
240 | return cinfo->slot_number - 1; | |
241 | } | |
242 | ||
edb39c9d GR |
243 | static struct md_cluster_operations cluster_ops = { |
244 | .join = join, | |
245 | .leave = leave, | |
cf921cc1 | 246 | .slot_number = slot_number, |
edb39c9d GR |
247 | }; |
248 | ||
8e854e9c GR |
249 | static int __init cluster_init(void) |
250 | { | |
251 | pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n"); | |
252 | pr_info("Registering Cluster MD functions\n"); | |
edb39c9d | 253 | register_md_cluster_operations(&cluster_ops, THIS_MODULE); |
8e854e9c GR |
254 | return 0; |
255 | } | |
256 | ||
257 | static void cluster_exit(void) | |
258 | { | |
edb39c9d | 259 | unregister_md_cluster_operations(); |
8e854e9c GR |
260 | } |
261 | ||
262 | module_init(cluster_init); | |
263 | module_exit(cluster_exit); | |
264 | MODULE_LICENSE("GPL"); | |
265 | MODULE_DESCRIPTION("Clustering support for MD"); |