Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
6a5b99a4 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 | 19 | * |
d7e09d03 PT |
20 | * GPL HEADER END |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
26 | * Copyright (c) 2012, Intel Corporation. | |
27 | */ | |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
30 | * Lustre is a trademark of Sun Microsystems, Inc. | |
31 | * | |
32 | * lnet/lnet/lib-md.c | |
33 | * | |
34 | * Memory Descriptor management routines | |
35 | */ | |
36 | ||
37 | #define DEBUG_SUBSYSTEM S_LNET | |
38 | ||
9fdaf8c0 | 39 | #include "../../include/linux/lnet/lib-lnet.h" |
d7e09d03 PT |
40 | |
41 | /* must be called with lnet_res_lock held */ | |
42 | void | |
43 | lnet_md_unlink(lnet_libmd_t *md) | |
44 | { | |
5fd88337 | 45 | if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) { |
d7e09d03 PT |
46 | /* first unlink attempt... */ |
47 | lnet_me_t *me = md->md_me; | |
48 | ||
49 | md->md_flags |= LNET_MD_FLAG_ZOMBIE; | |
50 | ||
4420cfd3 JS |
51 | /* |
52 | * Disassociate from ME (if any), | |
242c7b52 | 53 | * and unlink it if it was created |
4420cfd3 JS |
54 | * with LNET_UNLINK |
55 | */ | |
06ace26e | 56 | if (me) { |
d7e09d03 PT |
57 | /* detach MD from portal */ |
58 | lnet_ptl_detach_md(me, md); | |
59 | if (me->me_unlink == LNET_UNLINK) | |
60 | lnet_me_unlink(me); | |
61 | } | |
62 | ||
63 | /* ensure all future handle lookups fail */ | |
64 | lnet_res_lh_invalidate(&md->md_lh); | |
65 | } | |
66 | ||
5fd88337 | 67 | if (md->md_refcount) { |
d7e09d03 PT |
68 | CDEBUG(D_NET, "Queueing unlink of md %p\n", md); |
69 | return; | |
70 | } | |
71 | ||
72 | CDEBUG(D_NET, "Unlinking md %p\n", md); | |
73 | ||
06ace26e | 74 | if (md->md_eq) { |
7e7ab095 | 75 | int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie); |
d7e09d03 PT |
76 | |
77 | LASSERT(*md->md_eq->eq_refs[cpt] > 0); | |
78 | (*md->md_eq->eq_refs[cpt])--; | |
79 | } | |
80 | ||
81 | LASSERT(!list_empty(&md->md_list)); | |
82 | list_del_init(&md->md_list); | |
d9c90615 | 83 | lnet_md_free(md); |
d7e09d03 PT |
84 | } |
85 | ||
86 | static int | |
87 | lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink) | |
88 | { | |
7e7ab095 | 89 | int i; |
d7e09d03 | 90 | unsigned int niov; |
7e7ab095 | 91 | int total_length = 0; |
d7e09d03 PT |
92 | |
93 | lmd->md_me = NULL; | |
94 | lmd->md_start = umd->start; | |
95 | lmd->md_offset = 0; | |
96 | lmd->md_max_size = umd->max_size; | |
97 | lmd->md_options = umd->options; | |
98 | lmd->md_user_ptr = umd->user_ptr; | |
99 | lmd->md_eq = NULL; | |
100 | lmd->md_threshold = umd->threshold; | |
101 | lmd->md_refcount = 0; | |
102 | lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0; | |
103 | ||
5fd88337 JS |
104 | if (umd->options & LNET_MD_IOVEC) { |
105 | if (umd->options & LNET_MD_KIOV) /* Can't specify both */ | |
d7e09d03 PT |
106 | return -EINVAL; |
107 | ||
d3d3d37a JS |
108 | niov = umd->length; |
109 | lmd->md_niov = umd->length; | |
d7e09d03 | 110 | memcpy(lmd->md_iov.iov, umd->start, |
fc8b040d | 111 | niov * sizeof(lmd->md_iov.iov[0])); |
d7e09d03 PT |
112 | |
113 | for (i = 0; i < (int)niov; i++) { | |
114 | /* We take the base address on trust */ | |
242c7b52 JL |
115 | /* invalid length */ |
116 | if (lmd->md_iov.iov[i].iov_len <= 0) | |
d7e09d03 PT |
117 | return -EINVAL; |
118 | ||
119 | total_length += lmd->md_iov.iov[i].iov_len; | |
120 | } | |
121 | ||
122 | lmd->md_length = total_length; | |
123 | ||
5fd88337 | 124 | if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */ |
d7e09d03 | 125 | (umd->max_size < 0 || |
be82d9b2 | 126 | umd->max_size > total_length)) /* illegal max_size */ |
d7e09d03 PT |
127 | return -EINVAL; |
128 | ||
5fd88337 | 129 | } else if (umd->options & LNET_MD_KIOV) { |
d3d3d37a JS |
130 | niov = umd->length; |
131 | lmd->md_niov = umd->length; | |
d7e09d03 | 132 | memcpy(lmd->md_iov.kiov, umd->start, |
fc8b040d | 133 | niov * sizeof(lmd->md_iov.kiov[0])); |
d7e09d03 PT |
134 | |
135 | for (i = 0; i < (int)niov; i++) { | |
136 | /* We take the page pointer on trust */ | |
65ffc679 AV |
137 | if (lmd->md_iov.kiov[i].bv_offset + |
138 | lmd->md_iov.kiov[i].bv_len > PAGE_SIZE) | |
d7e09d03 PT |
139 | return -EINVAL; /* invalid length */ |
140 | ||
65ffc679 | 141 | total_length += lmd->md_iov.kiov[i].bv_len; |
d7e09d03 PT |
142 | } |
143 | ||
144 | lmd->md_length = total_length; | |
145 | ||
5fd88337 | 146 | if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ |
d7e09d03 | 147 | (umd->max_size < 0 || |
be82d9b2 | 148 | umd->max_size > total_length)) /* illegal max_size */ |
d7e09d03 PT |
149 | return -EINVAL; |
150 | } else { /* contiguous */ | |
151 | lmd->md_length = umd->length; | |
d3d3d37a JS |
152 | niov = 1; |
153 | lmd->md_niov = 1; | |
d7e09d03 PT |
154 | lmd->md_iov.iov[0].iov_base = umd->start; |
155 | lmd->md_iov.iov[0].iov_len = umd->length; | |
156 | ||
5fd88337 | 157 | if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ |
d7e09d03 | 158 | (umd->max_size < 0 || |
be82d9b2 | 159 | umd->max_size > (int)umd->length)) /* illegal max_size */ |
d7e09d03 PT |
160 | return -EINVAL; |
161 | } | |
162 | ||
163 | return 0; | |
164 | } | |
165 | ||
166 | /* must be called with resource lock held */ | |
167 | static int | |
168 | lnet_md_link(lnet_libmd_t *md, lnet_handle_eq_t eq_handle, int cpt) | |
169 | { | |
170 | struct lnet_res_container *container = the_lnet.ln_md_containers[cpt]; | |
171 | ||
4420cfd3 JS |
172 | /* |
173 | * NB we are passed an allocated, but inactive md. | |
d7e09d03 PT |
174 | * if we return success, caller may lnet_md_unlink() it. |
175 | * otherwise caller may only lnet_md_free() it. | |
176 | */ | |
4420cfd3 JS |
177 | /* |
178 | * This implementation doesn't know how to create START events or | |
d7e09d03 | 179 | * disable END events. Best to LASSERT our caller is compliant so |
4420cfd3 JS |
180 | * we find out quickly... |
181 | */ | |
182 | /* | |
183 | * TODO - reevaluate what should be here in light of | |
d7e09d03 PT |
184 | * the removal of the start and end events |
185 | * maybe there we shouldn't even allow LNET_EQ_NONE!) | |
06ace26e | 186 | * LASSERT(!eq); |
d7e09d03 PT |
187 | */ |
188 | if (!LNetHandleIsInvalid(eq_handle)) { | |
189 | md->md_eq = lnet_handle2eq(&eq_handle); | |
190 | ||
1f01063f | 191 | if (!md->md_eq) |
d7e09d03 PT |
192 | return -ENOENT; |
193 | ||
194 | (*md->md_eq->eq_refs[cpt])++; | |
195 | } | |
196 | ||
197 | lnet_res_lh_initialize(container, &md->md_lh); | |
198 | ||
199 | LASSERT(list_empty(&md->md_list)); | |
200 | list_add(&md->md_list, &container->rec_active); | |
201 | ||
202 | return 0; | |
203 | } | |
204 | ||
205 | /* must be called with lnet_res_lock held */ | |
206 | void | |
207 | lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd) | |
208 | { | |
209 | /* NB this doesn't copy out all the iov entries so when a | |
210 | * discontiguous MD is copied out, the target gets to know the | |
211 | * original iov pointer (in start) and the number of entries it had | |
212 | * and that's all. | |
213 | */ | |
214 | umd->start = lmd->md_start; | |
5fd88337 JS |
215 | umd->length = !(lmd->md_options & |
216 | (LNET_MD_IOVEC | LNET_MD_KIOV)) ? | |
d7e09d03 PT |
217 | lmd->md_length : lmd->md_niov; |
218 | umd->threshold = lmd->md_threshold; | |
219 | umd->max_size = lmd->md_max_size; | |
220 | umd->options = lmd->md_options; | |
221 | umd->user_ptr = lmd->md_user_ptr; | |
222 | lnet_eq2handle(&umd->eq_handle, lmd->md_eq); | |
223 | } | |
224 | ||
f526b20a | 225 | static int |
d7e09d03 PT |
226 | lnet_md_validate(lnet_md_t *umd) |
227 | { | |
5fd88337 | 228 | if (!umd->start && umd->length) { |
d7e09d03 PT |
229 | CERROR("MD start pointer can not be NULL with length %u\n", |
230 | umd->length); | |
231 | return -EINVAL; | |
232 | } | |
233 | ||
5fd88337 | 234 | if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) && |
d7e09d03 PT |
235 | umd->length > LNET_MAX_IOV) { |
236 | CERROR("Invalid option: too many fragments %u, %d max\n", | |
237 | umd->length, LNET_MAX_IOV); | |
238 | return -EINVAL; | |
239 | } | |
240 | ||
241 | return 0; | |
242 | } | |
243 | ||
244 | /** | |
245 | * Create a memory descriptor and attach it to a ME | |
246 | * | |
247 | * \param meh A handle for a ME to associate the new MD with. | |
248 | * \param umd Provides initial values for the user-visible parts of a MD. | |
249 | * Other than its use for initialization, there is no linkage between this | |
250 | * structure and the MD maintained by the LNet. | |
251 | * \param unlink A flag to indicate whether the MD is automatically unlinked | |
252 | * when it becomes inactive, either because the operation threshold drops to | |
253 | * zero or because the available memory becomes less than \a umd.max_size. | |
254 | * (Note that the check for unlinking a MD only occurs after the completion | |
255 | * of a successful operation on the MD.) The value LNET_UNLINK enables auto | |
256 | * unlinking; the value LNET_RETAIN disables it. | |
257 | * \param handle On successful returns, a handle to the newly created MD is | |
258 | * saved here. This handle can be used later in LNetMDUnlink(). | |
259 | * | |
260 | * \retval 0 On success. | |
261 | * \retval -EINVAL If \a umd is not valid. | |
262 | * \retval -ENOMEM If new MD cannot be allocated. | |
263 | * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a | |
264 | * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by | |
265 | * calling LNetInvalidateHandle() on it. | |
266 | * \retval -EBUSY If the ME pointed to by \a meh is already associated with | |
267 | * a MD. | |
268 | */ | |
269 | int | |
270 | LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, | |
271 | lnet_unlink_t unlink, lnet_handle_md_t *handle) | |
272 | { | |
fc8b040d JL |
273 | LIST_HEAD(matches); |
274 | LIST_HEAD(drops); | |
7e7ab095 MS |
275 | struct lnet_me *me; |
276 | struct lnet_libmd *md; | |
277 | int cpt; | |
278 | int rc; | |
d7e09d03 | 279 | |
fc8b040d | 280 | LASSERT(the_lnet.ln_refcount > 0); |
d7e09d03 | 281 | |
5fd88337 | 282 | if (lnet_md_validate(&umd)) |
d7e09d03 PT |
283 | return -EINVAL; |
284 | ||
5fd88337 | 285 | if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { |
d7e09d03 PT |
286 | CERROR("Invalid option: no MD_OP set\n"); |
287 | return -EINVAL; | |
288 | } | |
289 | ||
290 | md = lnet_md_alloc(&umd); | |
1f01063f | 291 | if (!md) |
d7e09d03 PT |
292 | return -ENOMEM; |
293 | ||
294 | rc = lnet_md_build(md, &umd, unlink); | |
295 | cpt = lnet_cpt_of_cookie(meh.cookie); | |
296 | ||
297 | lnet_res_lock(cpt); | |
5fd88337 | 298 | if (rc) |
d7e09d03 PT |
299 | goto failed; |
300 | ||
301 | me = lnet_handle2me(&meh); | |
1f01063f | 302 | if (!me) |
d7e09d03 | 303 | rc = -ENOENT; |
06ace26e | 304 | else if (me->me_md) |
d7e09d03 PT |
305 | rc = -EBUSY; |
306 | else | |
307 | rc = lnet_md_link(md, umd.eq_handle, cpt); | |
308 | ||
5fd88337 | 309 | if (rc) |
d7e09d03 PT |
310 | goto failed; |
311 | ||
4420cfd3 JS |
312 | /* |
313 | * attach this MD to portal of ME and check if it matches any | |
314 | * blocked msgs on this portal | |
315 | */ | |
d7e09d03 PT |
316 | lnet_ptl_attach_md(me, md, &matches, &drops); |
317 | ||
318 | lnet_md2handle(handle, md); | |
319 | ||
320 | lnet_res_unlock(cpt); | |
321 | ||
322 | lnet_drop_delayed_msg_list(&drops, "Bad match"); | |
323 | lnet_recv_delayed_msg_list(&matches); | |
324 | ||
325 | return 0; | |
326 | ||
327 | failed: | |
d9c90615 | 328 | lnet_md_free(md); |
d7e09d03 PT |
329 | |
330 | lnet_res_unlock(cpt); | |
331 | return rc; | |
332 | } | |
333 | EXPORT_SYMBOL(LNetMDAttach); | |
334 | ||
335 | /** | |
336 | * Create a "free floating" memory descriptor - a MD that is not associated | |
337 | * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations. | |
338 | * | |
339 | * \param umd,unlink See the discussion for LNetMDAttach(). | |
340 | * \param handle On successful returns, a handle to the newly created MD is | |
341 | * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(), | |
342 | * and LNetGet() operations. | |
343 | * | |
344 | * \retval 0 On success. | |
345 | * \retval -EINVAL If \a umd is not valid. | |
346 | * \retval -ENOMEM If new MD cannot be allocated. | |
347 | * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that | |
348 | * it's OK to supply a NULL \a umd.eq_handle by calling | |
349 | * LNetInvalidateHandle() on it. | |
350 | */ | |
351 | int | |
352 | LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) | |
353 | { | |
7e7ab095 MS |
354 | lnet_libmd_t *md; |
355 | int cpt; | |
356 | int rc; | |
d7e09d03 | 357 | |
fc8b040d | 358 | LASSERT(the_lnet.ln_refcount > 0); |
d7e09d03 | 359 | |
5fd88337 | 360 | if (lnet_md_validate(&umd)) |
d7e09d03 PT |
361 | return -EINVAL; |
362 | ||
5fd88337 | 363 | if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { |
d7e09d03 PT |
364 | CERROR("Invalid option: GET|PUT illegal on active MDs\n"); |
365 | return -EINVAL; | |
366 | } | |
367 | ||
368 | md = lnet_md_alloc(&umd); | |
1f01063f | 369 | if (!md) |
d7e09d03 PT |
370 | return -ENOMEM; |
371 | ||
372 | rc = lnet_md_build(md, &umd, unlink); | |
373 | ||
374 | cpt = lnet_res_lock_current(); | |
5fd88337 | 375 | if (rc) |
d7e09d03 PT |
376 | goto failed; |
377 | ||
378 | rc = lnet_md_link(md, umd.eq_handle, cpt); | |
5fd88337 | 379 | if (rc) |
d7e09d03 PT |
380 | goto failed; |
381 | ||
382 | lnet_md2handle(handle, md); | |
383 | ||
384 | lnet_res_unlock(cpt); | |
385 | return 0; | |
386 | ||
387 | failed: | |
d9c90615 | 388 | lnet_md_free(md); |
d7e09d03 PT |
389 | |
390 | lnet_res_unlock(cpt); | |
391 | return rc; | |
392 | } | |
393 | EXPORT_SYMBOL(LNetMDBind); | |
394 | ||
395 | /** | |
396 | * Unlink the memory descriptor from any ME it may be linked to and release | |
dee2857e IH |
397 | * the internal resources associated with it. As a result, active messages |
398 | * associated with the MD may get aborted. | |
d7e09d03 PT |
399 | * |
400 | * This function does not free the memory region associated with the MD; | |
401 | * i.e., the memory the user allocated for this MD. If the ME associated with | |
402 | * this MD is not NULL and was created with auto unlink enabled, the ME is | |
403 | * unlinked as well (see LNetMEAttach()). | |
404 | * | |
405 | * Explicitly unlinking a MD via this function call has the same behavior as | |
406 | * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK | |
407 | * is generated in the latter case. | |
408 | * | |
409 | * An unlinked event can be reported in two ways: | |
410 | * - If there's no pending operations on the MD, it's unlinked immediately | |
411 | * and an LNET_EVENT_UNLINK event is logged before this function returns. | |
412 | * - Otherwise, the MD is only marked for deletion when this function | |
413 | * returns, and the unlinked event will be piggybacked on the event of | |
414 | * the completion of the last operation by setting the unlinked field of | |
415 | * the event. No dedicated LNET_EVENT_UNLINK event is generated. | |
416 | * | |
417 | * Note that in both cases the unlinked field of the event is always set; no | |
418 | * more event will happen on the MD after such an event is logged. | |
419 | * | |
420 | * \param mdh A handle for the MD to be unlinked. | |
421 | * | |
422 | * \retval 0 On success. | |
423 | * \retval -ENOENT If \a mdh does not point to a valid MD object. | |
424 | */ | |
425 | int | |
fc8b040d | 426 | LNetMDUnlink(lnet_handle_md_t mdh) |
d7e09d03 | 427 | { |
7e7ab095 MS |
428 | lnet_event_t ev; |
429 | lnet_libmd_t *md; | |
430 | int cpt; | |
d7e09d03 | 431 | |
d7e09d03 PT |
432 | LASSERT(the_lnet.ln_refcount > 0); |
433 | ||
434 | cpt = lnet_cpt_of_cookie(mdh.cookie); | |
435 | lnet_res_lock(cpt); | |
436 | ||
437 | md = lnet_handle2md(&mdh); | |
1f01063f | 438 | if (!md) { |
d7e09d03 PT |
439 | lnet_res_unlock(cpt); |
440 | return -ENOENT; | |
441 | } | |
442 | ||
dee2857e | 443 | md->md_flags |= LNET_MD_FLAG_ABORTED; |
4420cfd3 JS |
444 | /* |
445 | * If the MD is busy, lnet_md_unlink just marks it for deletion, and | |
dee2857e | 446 | * when the LND is done, the completion event flags that the MD was |
4420cfd3 JS |
447 | * unlinked. Otherwise, we enqueue an event now... |
448 | */ | |
5fd88337 | 449 | if (md->md_eq && !md->md_refcount) { |
d7e09d03 PT |
450 | lnet_build_unlink_event(md, &ev); |
451 | lnet_eq_enqueue_event(md->md_eq, &ev); | |
452 | } | |
453 | ||
454 | lnet_md_unlink(md); | |
455 | ||
456 | lnet_res_unlock(cpt); | |
457 | return 0; | |
458 | } | |
459 | EXPORT_SYMBOL(LNetMDUnlink); |