Commit | Line | Data |
---|---|---|
a0ce85f5 CL |
1 | /* |
2 | * Copyright (c) 2015 Oracle. All rights reserved. | |
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | |
4 | */ | |
5 | ||
6 | /* Lightweight memory registration using Fast Registration Work | |
7 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | |
8 | * | |
9 | * FRWR features ordered asynchronous registration and deregistration | |
10 | * of arbitrarily sized memory regions. This is the fastest and safest | |
11 | * but most complex memory registration mode. | |
12 | */ | |
13 | ||
c14d86e5 CL |
14 | /* Normal operation |
15 | * | |
16 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG | |
17 | * Work Request (frmr_op_map). When the RDMA operation is finished, this | |
18 | * Memory Region is invalidated using a LOCAL_INV Work Request | |
19 | * (frmr_op_unmap). | |
20 | * | |
21 | * Typically these Work Requests are not signaled, and neither are RDMA | |
22 | * SEND Work Requests (with the exception of signaling occasionally to | |
23 | * prevent provider work queue overflows). This greatly reduces HCA | |
24 | * interrupt workload. | |
25 | * | |
26 | * As an optimization, frwr_op_unmap marks MRs INVALID before the | |
27 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on | |
28 | * rb_mws immediately so that no work (like managing a linked list | |
29 | * under a spinlock) is needed in the completion upcall. | |
30 | * | |
31 | * But this means that frwr_op_map() can occasionally encounter an MR | |
32 | * that is INVALID but the LOCAL_INV WR has not completed. Work Queue | |
33 | * ordering prevents a subsequent FAST_REG WR from executing against | |
34 | * that MR while it is still being invalidated. | |
35 | */ | |
36 | ||
37 | /* Transport recovery | |
38 | * | |
39 | * ->op_map and the transport connect worker cannot run at the same | |
40 | * time, but ->op_unmap can fire while the transport connect worker | |
41 | * is running. Thus MR recovery is handled in ->op_map, to guarantee | |
42 | * that recovered MRs are owned by a sending RPC, and not one where | |
43 | * ->op_unmap could fire at the same time transport reconnect is | |
44 | * being done. | |
45 | * | |
46 | * When the underlying transport disconnects, MRs are left in one of | |
47 | * three states: | |
48 | * | |
49 | * INVALID: The MR was not in use before the QP entered ERROR state. | |
50 | * (Or, the LOCAL_INV WR has not completed or flushed yet). | |
51 | * | |
52 | * STALE: The MR was being registered or unregistered when the QP | |
53 | * entered ERROR state, and the pending WR was flushed. | |
54 | * | |
55 | * VALID: The MR was registered before the QP entered ERROR state. | |
56 | * | |
57 | * When frwr_op_map encounters STALE and VALID MRs, they are recovered | |
58 | * with ib_dereg_mr and then are re-initialized. Beause MR recovery | |
59 | * allocates fresh resources, it is deferred to a workqueue, and the | |
60 | * recovered MRs are placed back on the rb_mws list when recovery is | |
61 | * complete. frwr_op_map allocates another MR for the current RPC while | |
62 | * the broken MR is reset. | |
63 | * | |
64 | * To ensure that frwr_op_map doesn't encounter an MR that is marked | |
65 | * INVALID but that is about to be flushed due to a previous transport | |
66 | * disconnect, the transport connect worker attempts to drain all | |
67 | * pending send queue WRs before the transport is reconnected. | |
68 | */ | |
69 | ||
a0ce85f5 CL |
70 | #include "xprt_rdma.h" |
71 | ||
72 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | |
73 | # define RPCDBG_FACILITY RPCDBG_TRANS | |
74 | #endif | |
75 | ||
b54054ca CL |
76 | bool |
77 | frwr_is_supported(struct rpcrdma_ia *ia) | |
78 | { | |
79 | struct ib_device_attr *attrs = &ia->ri_device->attrs; | |
80 | ||
81 | if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) | |
82 | goto out_not_supported; | |
83 | if (attrs->max_fast_reg_page_list_len == 0) | |
84 | goto out_not_supported; | |
85 | return true; | |
86 | ||
87 | out_not_supported: | |
88 | pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n", | |
89 | ia->ri_device->name); | |
90 | return false; | |
91 | } | |
92 | ||
d48b1d29 | 93 | static int |
e2ac236c | 94 | frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) |
d48b1d29 | 95 | { |
e2ac236c | 96 | unsigned int depth = ia->ri_max_frmr_depth; |
d48b1d29 CL |
97 | struct rpcrdma_frmr *f = &r->frmr; |
98 | int rc; | |
99 | ||
e2ac236c | 100 | f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth); |
d48b1d29 CL |
101 | if (IS_ERR(f->fr_mr)) |
102 | goto out_mr_err; | |
103 | ||
104 | r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); | |
105 | if (!r->mw_sg) | |
106 | goto out_list_err; | |
107 | ||
108 | sg_init_table(r->mw_sg, depth); | |
109 | init_completion(&f->fr_linv_done); | |
110 | return 0; | |
111 | ||
112 | out_mr_err: | |
113 | rc = PTR_ERR(f->fr_mr); | |
114 | dprintk("RPC: %s: ib_alloc_mr status %i\n", | |
115 | __func__, rc); | |
116 | return rc; | |
117 | ||
118 | out_list_err: | |
119 | rc = -ENOMEM; | |
120 | dprintk("RPC: %s: sg allocation failure\n", | |
121 | __func__); | |
122 | ib_dereg_mr(f->fr_mr); | |
123 | return rc; | |
124 | } | |
125 | ||
126 | static void | |
e2ac236c | 127 | frwr_op_release_mr(struct rpcrdma_mw *r) |
d48b1d29 CL |
128 | { |
129 | int rc; | |
130 | ||
9d6b0409 CL |
131 | /* Ensure MW is not on any rl_registered list */ |
132 | if (!list_empty(&r->mw_list)) | |
133 | list_del(&r->mw_list); | |
134 | ||
d48b1d29 CL |
135 | rc = ib_dereg_mr(r->frmr.fr_mr); |
136 | if (rc) | |
137 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", | |
138 | r, rc); | |
139 | kfree(r->mw_sg); | |
e2ac236c | 140 | kfree(r); |
d48b1d29 CL |
141 | } |
142 | ||
d7a21c1b CL |
143 | static int |
144 | __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) | |
145 | { | |
146 | struct rpcrdma_frmr *f = &r->frmr; | |
147 | int rc; | |
148 | ||
149 | rc = ib_dereg_mr(f->fr_mr); | |
150 | if (rc) { | |
151 | pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", | |
152 | rc, r); | |
153 | return rc; | |
154 | } | |
155 | ||
156 | f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, | |
157 | ia->ri_max_frmr_depth); | |
158 | if (IS_ERR(f->fr_mr)) { | |
159 | pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", | |
160 | PTR_ERR(f->fr_mr), r); | |
161 | return PTR_ERR(f->fr_mr); | |
162 | } | |
163 | ||
164 | dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); | |
165 | f->fr_state = FRMR_IS_INVALID; | |
166 | return 0; | |
167 | } | |
168 | ||
505bbe64 CL |
169 | /* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. |
170 | * | |
171 | * There's no recovery if this fails. The FRMR is abandoned, but | |
172 | * remains in rb_all. It will be cleaned up when the transport is | |
173 | * destroyed. | |
174 | */ | |
660bb497 | 175 | static void |
505bbe64 | 176 | frwr_op_recover_mr(struct rpcrdma_mw *mw) |
660bb497 | 177 | { |
564471d2 | 178 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; |
660bb497 | 179 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
660bb497 CL |
180 | int rc; |
181 | ||
182 | rc = __frwr_reset_mr(ia, mw); | |
564471d2 | 183 | ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); |
2ffc871a CL |
184 | if (rc) |
185 | goto out_release; | |
951e721c | 186 | |
505bbe64 CL |
187 | rpcrdma_put_mw(r_xprt, mw); |
188 | r_xprt->rx_stats.mrs_recovered++; | |
2ffc871a CL |
189 | return; |
190 | ||
191 | out_release: | |
192 | pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); | |
193 | r_xprt->rx_stats.mrs_orphaned++; | |
194 | ||
195 | spin_lock(&r_xprt->rx_buf.rb_mwlock); | |
196 | list_del(&mw->mw_all); | |
197 | spin_unlock(&r_xprt->rx_buf.rb_mwlock); | |
198 | ||
199 | frwr_op_release_mr(mw); | |
951e721c CL |
200 | } |
201 | ||
3968cb58 CL |
202 | static int |
203 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |
204 | struct rpcrdma_create_data_internal *cdata) | |
205 | { | |
3968cb58 CL |
206 | int depth, delta; |
207 | ||
208 | ia->ri_max_frmr_depth = | |
209 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | |
e3e45b1b | 210 | ia->ri_device->attrs.max_fast_reg_page_list_len); |
3968cb58 CL |
211 | dprintk("RPC: %s: device's max FR page list len = %u\n", |
212 | __func__, ia->ri_max_frmr_depth); | |
213 | ||
214 | /* Add room for frmr register and invalidate WRs. | |
215 | * 1. FRMR reg WR for head | |
216 | * 2. FRMR invalidate WR for head | |
217 | * 3. N FRMR reg WRs for pagelist | |
218 | * 4. N FRMR invalidate WRs for pagelist | |
219 | * 5. FRMR reg WR for tail | |
220 | * 6. FRMR invalidate WR for tail | |
221 | * 7. The RDMA_SEND WR | |
222 | */ | |
223 | depth = 7; | |
224 | ||
225 | /* Calculate N if the device max FRMR depth is smaller than | |
226 | * RPCRDMA_MAX_DATA_SEGS. | |
227 | */ | |
228 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | |
229 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | |
230 | do { | |
231 | depth += 2; /* FRMR reg + invalidate */ | |
232 | delta -= ia->ri_max_frmr_depth; | |
233 | } while (delta > 0); | |
234 | } | |
235 | ||
236 | ep->rep_attr.cap.max_send_wr *= depth; | |
e3e45b1b OG |
237 | if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) { |
238 | cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth; | |
3968cb58 CL |
239 | if (!cdata->max_requests) |
240 | return -EINVAL; | |
241 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | |
242 | depth; | |
243 | } | |
244 | ||
302d3deb CL |
245 | rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, |
246 | RPCRDMA_MAX_DATA_SEGS / | |
247 | ia->ri_max_frmr_depth)); | |
3968cb58 CL |
248 | return 0; |
249 | } | |
250 | ||
1c9351ee CL |
251 | /* FRWR mode conveys a list of pages per chunk segment. The |
252 | * maximum length of that list is the FRWR page list depth. | |
253 | */ | |
254 | static size_t | |
255 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |
256 | { | |
257 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
258 | ||
259 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | |
94931746 | 260 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); |
1c9351ee CL |
261 | } |
262 | ||
2fa8f88d CL |
263 | static void |
264 | __frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr, | |
265 | const char *wr) | |
266 | { | |
267 | frmr->fr_state = FRMR_IS_STALE; | |
268 | if (wc->status != IB_WC_WR_FLUSH_ERR) | |
269 | pr_err("rpcrdma: %s: %s (%u/0x%x)\n", | |
270 | wr, ib_wc_status_msg(wc->status), | |
271 | wc->status, wc->vendor_err); | |
272 | } | |
273 | ||
274 | /** | |
275 | * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC | |
276 | * @cq: completion queue (ignored) | |
277 | * @wc: completed WR | |
c9918ff5 | 278 | * |
c9918ff5 | 279 | */ |
e46ac34c | 280 | static void |
2fa8f88d | 281 | frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) |
e46ac34c | 282 | { |
2fa8f88d CL |
283 | struct rpcrdma_frmr *frmr; |
284 | struct ib_cqe *cqe; | |
c9918ff5 | 285 | |
2fa8f88d CL |
286 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
287 | if (wc->status != IB_WC_SUCCESS) { | |
288 | cqe = wc->wr_cqe; | |
289 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | |
290 | __frwr_sendcompletion_flush(wc, frmr, "fastreg"); | |
291 | } | |
e46ac34c CL |
292 | } |
293 | ||
2fa8f88d CL |
294 | /** |
295 | * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC | |
296 | * @cq: completion queue (ignored) | |
297 | * @wc: completed WR | |
298 | * | |
299 | */ | |
c9918ff5 | 300 | static void |
2fa8f88d | 301 | frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) |
c9918ff5 | 302 | { |
2fa8f88d CL |
303 | struct rpcrdma_frmr *frmr; |
304 | struct ib_cqe *cqe; | |
c9918ff5 | 305 | |
2fa8f88d CL |
306 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
307 | if (wc->status != IB_WC_SUCCESS) { | |
308 | cqe = wc->wr_cqe; | |
309 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | |
310 | __frwr_sendcompletion_flush(wc, frmr, "localinv"); | |
311 | } | |
312 | } | |
c9918ff5 | 313 | |
2fa8f88d CL |
314 | /** |
315 | * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC | |
316 | * @cq: completion queue (ignored) | |
317 | * @wc: completed WR | |
318 | * | |
319 | * Awaken anyone waiting for an MR to finish being fenced. | |
320 | */ | |
321 | static void | |
322 | frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | |
323 | { | |
324 | struct rpcrdma_frmr *frmr; | |
325 | struct ib_cqe *cqe; | |
326 | ||
327 | /* WARNING: Only wr_cqe and status are reliable at this point */ | |
328 | cqe = wc->wr_cqe; | |
329 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | |
330 | if (wc->status != IB_WC_SUCCESS) | |
331 | __frwr_sendcompletion_flush(wc, frmr, "localinv"); | |
332 | complete_all(&frmr->fr_linv_done); | |
c9918ff5 CL |
333 | } |
334 | ||
564471d2 | 335 | /* Post a REG_MR Work Request to register a memory region |
9c1b4d77 CL |
336 | * for remote access via RDMA READ or RDMA WRITE. |
337 | */ | |
338 | static int | |
339 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |
9d6b0409 | 340 | int nsegs, bool writing, struct rpcrdma_mw **out) |
9c1b4d77 CL |
341 | { |
342 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
c14d86e5 CL |
343 | struct rpcrdma_mw *mw; |
344 | struct rpcrdma_frmr *frmr; | |
345 | struct ib_mr *mr; | |
3cf4e169 | 346 | struct ib_reg_wr *reg_wr; |
e622f2f4 | 347 | struct ib_send_wr *bad_wr; |
4143f34e | 348 | int rc, i, n, dma_nents; |
9c1b4d77 | 349 | u8 key; |
9c1b4d77 | 350 | |
9d6b0409 | 351 | mw = NULL; |
c14d86e5 CL |
352 | do { |
353 | if (mw) | |
505bbe64 | 354 | rpcrdma_defer_mr_recovery(mw); |
c14d86e5 CL |
355 | mw = rpcrdma_get_mw(r_xprt); |
356 | if (!mw) | |
7a89f9c6 | 357 | return -ENOBUFS; |
c882a655 CL |
358 | } while (mw->frmr.fr_state != FRMR_IS_INVALID); |
359 | frmr = &mw->frmr; | |
c14d86e5 | 360 | frmr->fr_state = FRMR_IS_VALID; |
4143f34e | 361 | mr = frmr->fr_mr; |
3cf4e169 | 362 | reg_wr = &frmr->fr_regwr; |
c14d86e5 | 363 | |
9c1b4d77 CL |
364 | if (nsegs > ia->ri_max_frmr_depth) |
365 | nsegs = ia->ri_max_frmr_depth; | |
4143f34e SG |
366 | for (i = 0; i < nsegs;) { |
367 | if (seg->mr_page) | |
564471d2 | 368 | sg_set_page(&mw->mw_sg[i], |
4143f34e SG |
369 | seg->mr_page, |
370 | seg->mr_len, | |
371 | offset_in_page(seg->mr_offset)); | |
372 | else | |
564471d2 | 373 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, |
4143f34e SG |
374 | seg->mr_len); |
375 | ||
9c1b4d77 CL |
376 | ++seg; |
377 | ++i; | |
4143f34e | 378 | |
9c1b4d77 CL |
379 | /* Check for holes */ |
380 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | |
381 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | |
382 | break; | |
383 | } | |
564471d2 CL |
384 | mw->mw_nents = i; |
385 | mw->mw_dir = rpcrdma_data_dir(writing); | |
a54d4059 CL |
386 | if (i == 0) |
387 | goto out_dmamap_err; | |
4143f34e | 388 | |
564471d2 CL |
389 | dma_nents = ib_dma_map_sg(ia->ri_device, |
390 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | |
391 | if (!dma_nents) | |
392 | goto out_dmamap_err; | |
393 | ||
394 | n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); | |
395 | if (unlikely(n != mw->mw_nents)) | |
396 | goto out_mapmr_err; | |
4143f34e SG |
397 | |
398 | dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", | |
564471d2 | 399 | __func__, mw, mw->mw_nents, mr->length); |
4143f34e | 400 | |
9c1b4d77 CL |
401 | key = (u8)(mr->rkey & 0x000000FF); |
402 | ib_update_fast_reg_key(mr, ++key); | |
4143f34e | 403 | |
3cf4e169 CL |
404 | reg_wr->wr.next = NULL; |
405 | reg_wr->wr.opcode = IB_WR_REG_MR; | |
2fa8f88d CL |
406 | frmr->fr_cqe.done = frwr_wc_fastreg; |
407 | reg_wr->wr.wr_cqe = &frmr->fr_cqe; | |
3cf4e169 CL |
408 | reg_wr->wr.num_sge = 0; |
409 | reg_wr->wr.send_flags = 0; | |
410 | reg_wr->mr = mr; | |
411 | reg_wr->key = mr->rkey; | |
412 | reg_wr->access = writing ? | |
413 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | |
414 | IB_ACCESS_REMOTE_READ; | |
9c1b4d77 CL |
415 | |
416 | DECR_CQCOUNT(&r_xprt->rx_ep); | |
3cf4e169 | 417 | rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); |
9c1b4d77 CL |
418 | if (rc) |
419 | goto out_senderr; | |
420 | ||
9d6b0409 CL |
421 | mw->mw_handle = mr->rkey; |
422 | mw->mw_length = mr->length; | |
423 | mw->mw_offset = mr->iova; | |
4143f34e | 424 | |
9d6b0409 | 425 | *out = mw; |
564471d2 CL |
426 | return mw->mw_nents; |
427 | ||
428 | out_dmamap_err: | |
429 | pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n", | |
430 | mw->mw_sg, mw->mw_nents); | |
42fe28f6 | 431 | rpcrdma_defer_mr_recovery(mw); |
7a89f9c6 | 432 | return -EIO; |
564471d2 CL |
433 | |
434 | out_mapmr_err: | |
435 | pr_err("rpcrdma: failed to map mr %p (%u/%u)\n", | |
436 | frmr->fr_mr, n, mw->mw_nents); | |
505bbe64 | 437 | rpcrdma_defer_mr_recovery(mw); |
7a89f9c6 | 438 | return -EIO; |
9c1b4d77 CL |
439 | |
440 | out_senderr: | |
7a89f9c6 | 441 | pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); |
505bbe64 | 442 | rpcrdma_defer_mr_recovery(mw); |
7a89f9c6 | 443 | return -ENOTCONN; |
9c1b4d77 CL |
444 | } |
445 | ||
c9918ff5 | 446 | static struct ib_send_wr * |
9d6b0409 | 447 | __frwr_prepare_linv_wr(struct rpcrdma_mw *mw) |
c9918ff5 | 448 | { |
c882a655 | 449 | struct rpcrdma_frmr *f = &mw->frmr; |
c9918ff5 CL |
450 | struct ib_send_wr *invalidate_wr; |
451 | ||
c9918ff5 CL |
452 | f->fr_state = FRMR_IS_INVALID; |
453 | invalidate_wr = &f->fr_invwr; | |
454 | ||
455 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); | |
2fa8f88d CL |
456 | f->fr_cqe.done = frwr_wc_localinv; |
457 | invalidate_wr->wr_cqe = &f->fr_cqe; | |
c9918ff5 CL |
458 | invalidate_wr->opcode = IB_WR_LOCAL_INV; |
459 | invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; | |
460 | ||
461 | return invalidate_wr; | |
462 | } | |
463 | ||
c9918ff5 CL |
464 | /* Invalidate all memory regions that were registered for "req". |
465 | * | |
466 | * Sleeps until it is safe for the host CPU to access the | |
467 | * previously mapped memory regions. | |
9d6b0409 CL |
468 | * |
469 | * Caller ensures that req->rl_registered is not empty. | |
c9918ff5 CL |
470 | */ |
471 | static void | |
472 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |
473 | { | |
474 | struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; | |
475 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
9d6b0409 | 476 | struct rpcrdma_mw *mw, *tmp; |
c9918ff5 CL |
477 | struct rpcrdma_frmr *f; |
478 | int rc; | |
479 | ||
480 | dprintk("RPC: %s: req %p\n", __func__, req); | |
481 | ||
482 | /* ORDER: Invalidate all of the req's MRs first | |
483 | * | |
484 | * Chain the LOCAL_INV Work Requests and post them with | |
485 | * a single ib_post_send() call. | |
486 | */ | |
9d6b0409 | 487 | f = NULL; |
c9918ff5 | 488 | invalidate_wrs = pos = prev = NULL; |
9d6b0409 CL |
489 | list_for_each_entry(mw, &req->rl_registered, mw_list) { |
490 | pos = __frwr_prepare_linv_wr(mw); | |
c9918ff5 CL |
491 | |
492 | if (!invalidate_wrs) | |
493 | invalidate_wrs = pos; | |
494 | else | |
495 | prev->next = pos; | |
496 | prev = pos; | |
9d6b0409 | 497 | f = &mw->frmr; |
c9918ff5 | 498 | } |
c9918ff5 CL |
499 | |
500 | /* Strong send queue ordering guarantees that when the | |
501 | * last WR in the chain completes, all WRs in the chain | |
502 | * are complete. | |
503 | */ | |
504 | f->fr_invwr.send_flags = IB_SEND_SIGNALED; | |
2fa8f88d CL |
505 | f->fr_cqe.done = frwr_wc_localinv_wake; |
506 | reinit_completion(&f->fr_linv_done); | |
c9918ff5 CL |
507 | INIT_CQCOUNT(&r_xprt->rx_ep); |
508 | ||
509 | /* Transport disconnect drains the receive CQ before it | |
510 | * replaces the QP. The RPC reply handler won't call us | |
511 | * unless ri_id->qp is a valid pointer. | |
512 | */ | |
513 | rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); | |
d7a21c1b CL |
514 | if (rc) |
515 | goto reset_mrs; | |
c9918ff5 CL |
516 | |
517 | wait_for_completion(&f->fr_linv_done); | |
518 | ||
519 | /* ORDER: Now DMA unmap all of the req's MRs, and return | |
520 | * them to the free MW list. | |
521 | */ | |
b892a699 | 522 | unmap: |
9d6b0409 CL |
523 | list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { |
524 | list_del_init(&mw->mw_list); | |
564471d2 CL |
525 | ib_dma_unmap_sg(ia->ri_device, |
526 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | |
d7a21c1b | 527 | rpcrdma_put_mw(r_xprt, mw); |
c9918ff5 | 528 | } |
d7a21c1b | 529 | return; |
c9918ff5 | 530 | |
d7a21c1b | 531 | reset_mrs: |
7a89f9c6 CL |
532 | pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); |
533 | rdma_disconnect(ia->ri_id); | |
6814baea | 534 | |
d7a21c1b CL |
535 | /* Find and reset the MRs in the LOCAL_INV WRs that did not |
536 | * get posted. This is synchronous, and slow. | |
537 | */ | |
9d6b0409 | 538 | list_for_each_entry(mw, &req->rl_registered, mw_list) { |
d7a21c1b | 539 | f = &mw->frmr; |
d7a21c1b CL |
540 | if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) { |
541 | __frwr_reset_mr(ia, mw); | |
542 | bad_wr = bad_wr->next; | |
543 | } | |
d7a21c1b CL |
544 | } |
545 | goto unmap; | |
c9918ff5 | 546 | } |
6814baea | 547 | |
ead3f26e CL |
548 | /* Use a slow, safe mechanism to invalidate all memory regions |
549 | * that were registered for "req". | |
550 | */ | |
551 | static void | |
552 | frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |
553 | bool sync) | |
554 | { | |
ead3f26e | 555 | struct rpcrdma_mw *mw; |
c14d86e5 | 556 | |
9d6b0409 CL |
557 | while (!list_empty(&req->rl_registered)) { |
558 | mw = list_first_entry(&req->rl_registered, | |
559 | struct rpcrdma_mw, mw_list); | |
560 | list_del_init(&mw->mw_list); | |
6814baea | 561 | |
ead3f26e | 562 | if (sync) |
505bbe64 | 563 | frwr_op_recover_mr(mw); |
ead3f26e | 564 | else |
505bbe64 | 565 | rpcrdma_defer_mr_recovery(mw); |
ead3f26e | 566 | } |
6814baea CL |
567 | } |
568 | ||
a0ce85f5 | 569 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
9c1b4d77 | 570 | .ro_map = frwr_op_map, |
c9918ff5 | 571 | .ro_unmap_sync = frwr_op_unmap_sync, |
ead3f26e | 572 | .ro_unmap_safe = frwr_op_unmap_safe, |
505bbe64 | 573 | .ro_recover_mr = frwr_op_recover_mr, |
3968cb58 | 574 | .ro_open = frwr_op_open, |
1c9351ee | 575 | .ro_maxpages = frwr_op_maxpages, |
e2ac236c CL |
576 | .ro_init_mr = frwr_op_init_mr, |
577 | .ro_release_mr = frwr_op_release_mr, | |
a0ce85f5 CL |
578 | .ro_displayname = "frwr", |
579 | }; |