Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
6a5b99a4 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 | 19 | * |
d7e09d03 PT |
20 | * GPL HEADER END |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
1dc563a6 | 26 | * Copyright (c) 2011, 2015, Intel Corporation. |
d7e09d03 PT |
27 | */ |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
30 | * Lustre is a trademark of Sun Microsystems, Inc. | |
31 | * | |
32 | * lustre/fld/fld_request.c | |
33 | * | |
34 | * FLD (Fids Location Database) | |
35 | * | |
36 | * Author: Yury Umanets <umka@clusterfs.com> | |
37 | */ | |
38 | ||
39 | #define DEBUG_SUBSYSTEM S_FLD | |
40 | ||
9fdaf8c0 | 41 | #include "../../include/linux/libcfs/libcfs.h" |
0e9ad0ef GKH |
42 | #include <linux/module.h> |
43 | #include <asm/div64.h> | |
d7e09d03 | 44 | |
0e9ad0ef GKH |
45 | #include "../include/obd.h" |
46 | #include "../include/obd_class.h" | |
47 | #include "../include/lustre_ver.h" | |
48 | #include "../include/obd_support.h" | |
49 | #include "../include/lprocfs_status.h" | |
d7e09d03 | 50 | |
0e9ad0ef GKH |
51 | #include "../include/lustre_req_layout.h" |
52 | #include "../include/lustre_fld.h" | |
53 | #include "../include/lustre_mdc.h" | |
d7e09d03 PT |
54 | #include "fld_internal.h" |
55 | ||
114acca8 | 56 | static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq) |
d7e09d03 PT |
57 | { |
58 | LASSERT(fld->lcf_count > 0); | |
59 | return do_div(seq, fld->lcf_count); | |
60 | } | |
61 | ||
62 | static struct lu_fld_target * | |
114acca8 | 63 | fld_rrb_scan(struct lu_client_fld *fld, u64 seq) |
d7e09d03 PT |
64 | { |
65 | struct lu_fld_target *target; | |
66 | int hash; | |
d7e09d03 PT |
67 | |
68 | /* Because almost all of special sequence located in MDT0, | |
69 | * it should go to index 0 directly, instead of calculating | |
70 | * hash again, and also if other MDTs is not being connected, | |
71 | * the fld lookup requests(for seq on MDT0) should not be | |
52581b89 OD |
72 | * blocked because of other MDTs |
73 | */ | |
d7e09d03 PT |
74 | if (fid_seq_is_norm(seq)) |
75 | hash = fld_rrb_hash(fld, seq); | |
76 | else | |
77 | hash = 0; | |
78 | ||
b7fb222b | 79 | again: |
d7e09d03 PT |
80 | list_for_each_entry(target, &fld->lcf_targets, ft_chain) { |
81 | if (target->ft_idx == hash) | |
0a3bdb00 | 82 | return target; |
d7e09d03 PT |
83 | } |
84 | ||
b7fb222b | 85 | if (hash != 0) { |
86 | /* It is possible the remote target(MDT) are not connected to | |
87 | * with client yet, so we will refer this to MDT0, which should | |
52581b89 OD |
88 | * be connected during mount |
89 | */ | |
b7fb222b | 90 | hash = 0; |
91 | goto again; | |
92 | } | |
93 | ||
55f5a824 | 94 | CERROR("%s: Can't find target by hash %d (seq %#llx). Targets (%d):\n", |
cf677593 | 95 | fld->lcf_name, hash, seq, fld->lcf_count); |
d7e09d03 PT |
96 | |
97 | list_for_each_entry(target, &fld->lcf_targets, ft_chain) { | |
6ac49ca5 | 98 | const char *srv_name = target->ft_srv ? |
d7e09d03 | 99 | target->ft_srv->lsf_name : "<null>"; |
6ac49ca5 | 100 | const char *exp_name = target->ft_exp ? |
d7e09d03 PT |
101 | (char *)target->ft_exp->exp_obd->obd_uuid.uuid : |
102 | "<null>"; | |
103 | ||
b0f5aad5 | 104 | CERROR(" exp: 0x%p (%s), srv: 0x%p (%s), idx: %llu\n", |
d7e09d03 PT |
105 | target->ft_exp, exp_name, target->ft_srv, |
106 | srv_name, target->ft_idx); | |
107 | } | |
108 | ||
109 | /* | |
110 | * If target is not found, there is logical error anyway, so here is | |
111 | * LBUG() to catch this situation. | |
112 | */ | |
113 | LBUG(); | |
0a3bdb00 | 114 | return NULL; |
d7e09d03 PT |
115 | } |
116 | ||
117 | struct lu_fld_hash fld_hash[] = { | |
118 | { | |
119 | .fh_name = "RRB", | |
120 | .fh_hash_func = fld_rrb_hash, | |
121 | .fh_scan_func = fld_rrb_scan | |
122 | }, | |
123 | { | |
ea7893bb | 124 | NULL, |
d7e09d03 PT |
125 | } |
126 | }; | |
127 | ||
128 | static struct lu_fld_target * | |
114acca8 | 129 | fld_client_get_target(struct lu_client_fld *fld, u64 seq) |
d7e09d03 PT |
130 | { |
131 | struct lu_fld_target *target; | |
d7e09d03 | 132 | |
6ac49ca5 | 133 | LASSERT(fld->lcf_hash); |
d7e09d03 PT |
134 | |
135 | spin_lock(&fld->lcf_lock); | |
136 | target = fld->lcf_hash->fh_scan_func(fld, seq); | |
137 | spin_unlock(&fld->lcf_lock); | |
138 | ||
6ac49ca5 | 139 | if (target) { |
55f5a824 | 140 | CDEBUG(D_INFO, "%s: Found target (idx %llu) by seq %#llx\n", |
b0f5aad5 | 141 | fld->lcf_name, target->ft_idx, seq); |
d7e09d03 PT |
142 | } |
143 | ||
0a3bdb00 | 144 | return target; |
d7e09d03 PT |
145 | } |
146 | ||
147 | /* | |
148 | * Add export to FLD. This is usually done by CMM and LMV as they are main users | |
149 | * of FLD module. | |
150 | */ | |
151 | int fld_client_add_target(struct lu_client_fld *fld, | |
152 | struct lu_fld_target *tar) | |
153 | { | |
154 | const char *name; | |
155 | struct lu_fld_target *target, *tmp; | |
d7e09d03 | 156 | |
6ac49ca5 | 157 | LASSERT(tar); |
d7e09d03 | 158 | name = fld_target_name(tar); |
6ac49ca5 OD |
159 | LASSERT(name); |
160 | LASSERT(tar->ft_srv || tar->ft_exp); | |
d7e09d03 PT |
161 | |
162 | if (fld->lcf_flags != LUSTRE_FLD_INIT) { | |
b0f5aad5 | 163 | CERROR("%s: Attempt to add target %s (idx %llu) on fly - skip it\n", |
cf677593 | 164 | fld->lcf_name, name, tar->ft_idx); |
0a3bdb00 | 165 | return 0; |
d7e09d03 | 166 | } |
cb6ec7f6 | 167 | CDEBUG(D_INFO, "%s: Adding target %s (idx %llu)\n", |
cf677593 | 168 | fld->lcf_name, name, tar->ft_idx); |
d7e09d03 | 169 | |
2e651013 | 170 | target = kzalloc(sizeof(*target), GFP_NOFS); |
812f2059 | 171 | if (!target) |
0a3bdb00 | 172 | return -ENOMEM; |
d7e09d03 PT |
173 | |
174 | spin_lock(&fld->lcf_lock); | |
175 | list_for_each_entry(tmp, &fld->lcf_targets, ft_chain) { | |
176 | if (tmp->ft_idx == tar->ft_idx) { | |
177 | spin_unlock(&fld->lcf_lock); | |
2e651013 | 178 | kfree(target); |
b0f5aad5 | 179 | CERROR("Target %s exists in FLD and known as %s:#%llu\n", |
d7e09d03 | 180 | name, fld_target_name(tmp), tmp->ft_idx); |
0a3bdb00 | 181 | return -EEXIST; |
d7e09d03 PT |
182 | } |
183 | } | |
184 | ||
185 | target->ft_exp = tar->ft_exp; | |
6ac49ca5 | 186 | if (target->ft_exp) |
d7e09d03 PT |
187 | class_export_get(target->ft_exp); |
188 | target->ft_srv = tar->ft_srv; | |
189 | target->ft_idx = tar->ft_idx; | |
190 | ||
cf677593 | 191 | list_add_tail(&target->ft_chain, &fld->lcf_targets); |
d7e09d03 PT |
192 | |
193 | fld->lcf_count++; | |
194 | spin_unlock(&fld->lcf_lock); | |
195 | ||
0a3bdb00 | 196 | return 0; |
d7e09d03 PT |
197 | } |
198 | EXPORT_SYMBOL(fld_client_add_target); | |
199 | ||
200 | /* Remove export from FLD */ | |
201 | int fld_client_del_target(struct lu_client_fld *fld, __u64 idx) | |
202 | { | |
203 | struct lu_fld_target *target, *tmp; | |
d7e09d03 PT |
204 | |
205 | spin_lock(&fld->lcf_lock); | |
cf677593 | 206 | list_for_each_entry_safe(target, tmp, &fld->lcf_targets, ft_chain) { |
d7e09d03 PT |
207 | if (target->ft_idx == idx) { |
208 | fld->lcf_count--; | |
209 | list_del(&target->ft_chain); | |
210 | spin_unlock(&fld->lcf_lock); | |
211 | ||
6ac49ca5 | 212 | if (target->ft_exp) |
d7e09d03 PT |
213 | class_export_put(target->ft_exp); |
214 | ||
2e651013 | 215 | kfree(target); |
0a3bdb00 | 216 | return 0; |
d7e09d03 PT |
217 | } |
218 | } | |
219 | spin_unlock(&fld->lcf_lock); | |
0a3bdb00 | 220 | return -ENOENT; |
d7e09d03 PT |
221 | } |
222 | EXPORT_SYMBOL(fld_client_del_target); | |
223 | ||
82765049 | 224 | static struct dentry *fld_debugfs_dir; |
e62e5d92 | 225 | |
82765049 | 226 | static int fld_client_debugfs_init(struct lu_client_fld *fld) |
d7e09d03 PT |
227 | { |
228 | int rc; | |
d7e09d03 | 229 | |
82765049 DE |
230 | fld->lcf_debugfs_entry = ldebugfs_register(fld->lcf_name, |
231 | fld_debugfs_dir, | |
232 | NULL, NULL); | |
d7e09d03 | 233 | |
82765049 DE |
234 | if (IS_ERR_OR_NULL(fld->lcf_debugfs_entry)) { |
235 | CERROR("%s: LdebugFS failed in fld-init\n", fld->lcf_name); | |
236 | rc = fld->lcf_debugfs_entry ? PTR_ERR(fld->lcf_debugfs_entry) | |
237 | : -ENOMEM; | |
238 | fld->lcf_debugfs_entry = NULL; | |
0a3bdb00 | 239 | return rc; |
d7e09d03 PT |
240 | } |
241 | ||
82765049 DE |
242 | rc = ldebugfs_add_vars(fld->lcf_debugfs_entry, |
243 | fld_client_debugfs_list, fld); | |
d7e09d03 | 244 | if (rc) { |
82765049 | 245 | CERROR("%s: Can't init FLD debufs, rc %d\n", fld->lcf_name, rc); |
89180ca7 | 246 | goto out_cleanup; |
d7e09d03 PT |
247 | } |
248 | ||
0a3bdb00 | 249 | return 0; |
d7e09d03 PT |
250 | |
251 | out_cleanup: | |
82765049 | 252 | fld_client_debugfs_fini(fld); |
d7e09d03 PT |
253 | return rc; |
254 | } | |
255 | ||
82765049 | 256 | void fld_client_debugfs_fini(struct lu_client_fld *fld) |
d7e09d03 | 257 | { |
82765049 DE |
258 | if (!IS_ERR_OR_NULL(fld->lcf_debugfs_entry)) |
259 | ldebugfs_remove(&fld->lcf_debugfs_entry); | |
d7e09d03 | 260 | } |
82765049 | 261 | EXPORT_SYMBOL(fld_client_debugfs_fini); |
d7e09d03 PT |
262 | |
263 | static inline int hash_is_sane(int hash) | |
264 | { | |
265 | return (hash >= 0 && hash < ARRAY_SIZE(fld_hash)); | |
266 | } | |
267 | ||
268 | int fld_client_init(struct lu_client_fld *fld, | |
269 | const char *prefix, int hash) | |
270 | { | |
271 | int cache_size, cache_threshold; | |
272 | int rc; | |
d7e09d03 | 273 | |
d7e09d03 PT |
274 | snprintf(fld->lcf_name, sizeof(fld->lcf_name), |
275 | "cli-%s", prefix); | |
276 | ||
277 | if (!hash_is_sane(hash)) { | |
278 | CERROR("%s: Wrong hash function %#x\n", | |
279 | fld->lcf_name, hash); | |
0a3bdb00 | 280 | return -EINVAL; |
d7e09d03 PT |
281 | } |
282 | ||
283 | fld->lcf_count = 0; | |
284 | spin_lock_init(&fld->lcf_lock); | |
285 | fld->lcf_hash = &fld_hash[hash]; | |
286 | fld->lcf_flags = LUSTRE_FLD_INIT; | |
287 | INIT_LIST_HEAD(&fld->lcf_targets); | |
288 | ||
289 | cache_size = FLD_CLIENT_CACHE_SIZE / | |
290 | sizeof(struct fld_cache_entry); | |
291 | ||
292 | cache_threshold = cache_size * | |
293 | FLD_CLIENT_CACHE_THRESHOLD / 100; | |
294 | ||
295 | fld->lcf_cache = fld_cache_init(fld->lcf_name, | |
296 | cache_size, cache_threshold); | |
297 | if (IS_ERR(fld->lcf_cache)) { | |
298 | rc = PTR_ERR(fld->lcf_cache); | |
299 | fld->lcf_cache = NULL; | |
89180ca7 | 300 | goto out; |
d7e09d03 PT |
301 | } |
302 | ||
82765049 | 303 | rc = fld_client_debugfs_init(fld); |
d7e09d03 | 304 | if (rc) |
89180ca7 | 305 | goto out; |
d7e09d03 PT |
306 | out: |
307 | if (rc) | |
308 | fld_client_fini(fld); | |
309 | else | |
310 | CDEBUG(D_INFO, "%s: Using \"%s\" hash\n", | |
311 | fld->lcf_name, fld->lcf_hash->fh_name); | |
312 | return rc; | |
313 | } | |
314 | EXPORT_SYMBOL(fld_client_init); | |
315 | ||
316 | void fld_client_fini(struct lu_client_fld *fld) | |
317 | { | |
318 | struct lu_fld_target *target, *tmp; | |
d7e09d03 PT |
319 | |
320 | spin_lock(&fld->lcf_lock); | |
cf677593 | 321 | list_for_each_entry_safe(target, tmp, &fld->lcf_targets, ft_chain) { |
d7e09d03 PT |
322 | fld->lcf_count--; |
323 | list_del(&target->ft_chain); | |
6ac49ca5 | 324 | if (target->ft_exp) |
d7e09d03 | 325 | class_export_put(target->ft_exp); |
2e651013 | 326 | kfree(target); |
d7e09d03 PT |
327 | } |
328 | spin_unlock(&fld->lcf_lock); | |
329 | ||
6ac49ca5 | 330 | if (fld->lcf_cache) { |
d7e09d03 PT |
331 | if (!IS_ERR(fld->lcf_cache)) |
332 | fld_cache_fini(fld->lcf_cache); | |
333 | fld->lcf_cache = NULL; | |
334 | } | |
d7e09d03 PT |
335 | } |
336 | EXPORT_SYMBOL(fld_client_fini); | |
337 | ||
338 | int fld_client_rpc(struct obd_export *exp, | |
b78c2b9b | 339 | struct lu_seq_range *range, __u32 fld_op, |
340 | struct ptlrpc_request **reqp) | |
d7e09d03 | 341 | { |
b78c2b9b | 342 | struct ptlrpc_request *req = NULL; |
d7e09d03 PT |
343 | struct lu_seq_range *prange; |
344 | __u32 *op; | |
b78c2b9b | 345 | int rc = 0; |
d7e09d03 | 346 | struct obd_import *imp; |
d7e09d03 | 347 | |
6ac49ca5 | 348 | LASSERT(exp); |
d7e09d03 PT |
349 | |
350 | imp = class_exp2cliimp(exp); | |
b78c2b9b | 351 | switch (fld_op) { |
352 | case FLD_QUERY: | |
353 | req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, | |
354 | LUSTRE_MDS_VERSION, FLD_QUERY); | |
355 | if (!req) | |
356 | return -ENOMEM; | |
357 | ||
358 | /* | |
359 | * XXX: only needed when talking to old server(< 2.6), it should | |
360 | * be removed when < 2.6 server is not supported | |
361 | */ | |
362 | op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC); | |
363 | *op = FLD_LOOKUP; | |
364 | ||
365 | if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) | |
366 | req->rq_allow_replay = 1; | |
367 | break; | |
368 | case FLD_READ: | |
369 | req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_READ, | |
370 | LUSTRE_MDS_VERSION, FLD_READ); | |
371 | if (!req) | |
372 | return -ENOMEM; | |
373 | ||
374 | req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA, | |
375 | RCL_SERVER, PAGE_SIZE); | |
376 | break; | |
377 | default: | |
378 | rc = -EINVAL; | |
379 | break; | |
380 | } | |
381 | if (rc) | |
382 | return rc; | |
d7e09d03 PT |
383 | |
384 | prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD); | |
385 | *prange = *range; | |
d7e09d03 PT |
386 | ptlrpc_request_set_replen(req); |
387 | req->rq_request_portal = FLD_REQUEST_PORTAL; | |
a3310525 | 388 | req->rq_reply_portal = MDC_REPLY_PORTAL; |
d7e09d03 PT |
389 | ptlrpc_at_set_req_timeout(req); |
390 | ||
1d5d5ec1 | 391 | obd_get_request_slot(&exp->exp_obd->u.cli); |
d7e09d03 | 392 | rc = ptlrpc_queue_wait(req); |
1d5d5ec1 | 393 | obd_put_request_slot(&exp->exp_obd->u.cli); |
d7e09d03 | 394 | if (rc) |
89180ca7 | 395 | goto out_req; |
d7e09d03 | 396 | |
b78c2b9b | 397 | if (fld_op == FLD_QUERY) { |
398 | prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD); | |
399 | if (!prange) { | |
400 | rc = -EFAULT; | |
401 | goto out_req; | |
402 | } | |
403 | *range = *prange; | |
89180ca7 | 404 | } |
b78c2b9b | 405 | |
d7e09d03 | 406 | out_req: |
b78c2b9b | 407 | if (rc || !reqp) { |
408 | ptlrpc_req_finished(req); | |
409 | req = NULL; | |
410 | } | |
411 | ||
412 | if (reqp) | |
413 | *reqp = req; | |
414 | ||
d7e09d03 PT |
415 | return rc; |
416 | } | |
417 | ||
114acca8 | 418 | int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds, |
d7e09d03 PT |
419 | __u32 flags, const struct lu_env *env) |
420 | { | |
421 | struct lu_seq_range res = { 0 }; | |
422 | struct lu_fld_target *target; | |
423 | int rc; | |
d7e09d03 PT |
424 | |
425 | fld->lcf_flags |= LUSTRE_FLD_RUN; | |
426 | ||
427 | rc = fld_cache_lookup(fld->lcf_cache, seq, &res); | |
428 | if (rc == 0) { | |
429 | *mds = res.lsr_index; | |
0a3bdb00 | 430 | return 0; |
d7e09d03 PT |
431 | } |
432 | ||
433 | /* Can not find it in the cache */ | |
434 | target = fld_client_get_target(fld, seq); | |
6ac49ca5 | 435 | LASSERT(target); |
d7e09d03 | 436 | |
55f5a824 | 437 | CDEBUG(D_INFO, "%s: Lookup fld entry (seq: %#llx) on target %s (idx %llu)\n", |
cf677593 | 438 | fld->lcf_name, seq, fld_target_name(target), target->ft_idx); |
d7e09d03 PT |
439 | |
440 | res.lsr_start = seq; | |
441 | fld_range_set_type(&res, flags); | |
b78c2b9b | 442 | rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL); |
d7e09d03 PT |
443 | |
444 | if (rc == 0) { | |
445 | *mds = res.lsr_index; | |
446 | ||
447 | fld_cache_insert(fld->lcf_cache, &res); | |
448 | } | |
0a3bdb00 | 449 | return rc; |
d7e09d03 PT |
450 | } |
451 | EXPORT_SYMBOL(fld_client_lookup); | |
452 | ||
453 | void fld_client_flush(struct lu_client_fld *fld) | |
454 | { | |
455 | fld_cache_flush(fld->lcf_cache); | |
456 | } | |
457 | EXPORT_SYMBOL(fld_client_flush); | |
e62e5d92 | 458 | |
e0f94113 | 459 | static int __init fld_init(void) |
e62e5d92 | 460 | { |
82765049 DE |
461 | fld_debugfs_dir = ldebugfs_register(LUSTRE_FLD_NAME, |
462 | debugfs_lustre_root, | |
463 | NULL, NULL); | |
464 | return PTR_ERR_OR_ZERO(fld_debugfs_dir); | |
e62e5d92 LX |
465 | } |
466 | ||
e0f94113 | 467 | static void __exit fld_exit(void) |
e62e5d92 | 468 | { |
82765049 DE |
469 | if (!IS_ERR_OR_NULL(fld_debugfs_dir)) |
470 | ldebugfs_remove(&fld_debugfs_dir); | |
e62e5d92 LX |
471 | } |
472 | ||
a0455471 | 473 | MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); |
57878e17 | 474 | MODULE_DESCRIPTION("Lustre FID Location Database"); |
5b0e50b9 | 475 | MODULE_VERSION(LUSTRE_VERSION_STRING); |
e62e5d92 LX |
476 | MODULE_LICENSE("GPL"); |
477 | ||
e0f94113 AD |
478 | module_init(fld_init) |
479 | module_exit(fld_exit) |