Commit | Line | Data |
---|---|---|
a0ce85f5 CL |
1 | /* |
2 | * Copyright (c) 2015 Oracle. All rights reserved. | |
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | |
4 | */ | |
5 | ||
6 | /* Lightweight memory registration using Fast Registration Work | |
7 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | |
8 | * | |
9 | * FRWR features ordered asynchronous registration and deregistration | |
10 | * of arbitrarily sized memory regions. This is the fastest and safest | |
11 | * but most complex memory registration mode. | |
12 | */ | |
13 | ||
c14d86e5 CL |
14 | /* Normal operation |
15 | * | |
16 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG | |
17 | * Work Request (frmr_op_map). When the RDMA operation is finished, this | |
18 | * Memory Region is invalidated using a LOCAL_INV Work Request | |
19 | * (frmr_op_unmap). | |
20 | * | |
21 | * Typically these Work Requests are not signaled, and neither are RDMA | |
22 | * SEND Work Requests (with the exception of signaling occasionally to | |
23 | * prevent provider work queue overflows). This greatly reduces HCA | |
24 | * interrupt workload. | |
25 | * | |
26 | * As an optimization, frwr_op_unmap marks MRs INVALID before the | |
27 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on | |
28 | * rb_mws immediately so that no work (like managing a linked list | |
29 | * under a spinlock) is needed in the completion upcall. | |
30 | * | |
31 | * But this means that frwr_op_map() can occasionally encounter an MR | |
32 | * that is INVALID but the LOCAL_INV WR has not completed. Work Queue | |
33 | * ordering prevents a subsequent FAST_REG WR from executing against | |
34 | * that MR while it is still being invalidated. | |
35 | */ | |
36 | ||
37 | /* Transport recovery | |
38 | * | |
39 | * ->op_map and the transport connect worker cannot run at the same | |
40 | * time, but ->op_unmap can fire while the transport connect worker | |
41 | * is running. Thus MR recovery is handled in ->op_map, to guarantee | |
42 | * that recovered MRs are owned by a sending RPC, and not one where | |
43 | * ->op_unmap could fire at the same time transport reconnect is | |
44 | * being done. | |
45 | * | |
46 | * When the underlying transport disconnects, MRs are left in one of | |
47 | * three states: | |
48 | * | |
49 | * INVALID: The MR was not in use before the QP entered ERROR state. | |
50 | * (Or, the LOCAL_INV WR has not completed or flushed yet). | |
51 | * | |
52 | * STALE: The MR was being registered or unregistered when the QP | |
53 | * entered ERROR state, and the pending WR was flushed. | |
54 | * | |
55 | * VALID: The MR was registered before the QP entered ERROR state. | |
56 | * | |
57 | * When frwr_op_map encounters STALE and VALID MRs, they are recovered | |
58 | * with ib_dereg_mr and then are re-initialized. Beause MR recovery | |
59 | * allocates fresh resources, it is deferred to a workqueue, and the | |
60 | * recovered MRs are placed back on the rb_mws list when recovery is | |
61 | * complete. frwr_op_map allocates another MR for the current RPC while | |
62 | * the broken MR is reset. | |
63 | * | |
64 | * To ensure that frwr_op_map doesn't encounter an MR that is marked | |
65 | * INVALID but that is about to be flushed due to a previous transport | |
66 | * disconnect, the transport connect worker attempts to drain all | |
67 | * pending send queue WRs before the transport is reconnected. | |
68 | */ | |
69 | ||
a0ce85f5 CL |
70 | #include "xprt_rdma.h" |
71 | ||
72 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | |
73 | # define RPCDBG_FACILITY RPCDBG_TRANS | |
74 | #endif | |
75 | ||
b54054ca CL |
76 | bool |
77 | frwr_is_supported(struct rpcrdma_ia *ia) | |
78 | { | |
79 | struct ib_device_attr *attrs = &ia->ri_device->attrs; | |
80 | ||
81 | if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) | |
82 | goto out_not_supported; | |
83 | if (attrs->max_fast_reg_page_list_len == 0) | |
84 | goto out_not_supported; | |
85 | return true; | |
86 | ||
87 | out_not_supported: | |
88 | pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n", | |
89 | ia->ri_device->name); | |
90 | return false; | |
91 | } | |
92 | ||
d48b1d29 CL |
93 | static int |
94 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, unsigned int depth) | |
95 | { | |
96 | struct rpcrdma_frmr *f = &r->frmr; | |
97 | int rc; | |
98 | ||
99 | f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); | |
100 | if (IS_ERR(f->fr_mr)) | |
101 | goto out_mr_err; | |
102 | ||
103 | r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); | |
104 | if (!r->mw_sg) | |
105 | goto out_list_err; | |
106 | ||
107 | sg_init_table(r->mw_sg, depth); | |
108 | init_completion(&f->fr_linv_done); | |
109 | return 0; | |
110 | ||
111 | out_mr_err: | |
112 | rc = PTR_ERR(f->fr_mr); | |
113 | dprintk("RPC: %s: ib_alloc_mr status %i\n", | |
114 | __func__, rc); | |
115 | return rc; | |
116 | ||
117 | out_list_err: | |
118 | rc = -ENOMEM; | |
119 | dprintk("RPC: %s: sg allocation failure\n", | |
120 | __func__); | |
121 | ib_dereg_mr(f->fr_mr); | |
122 | return rc; | |
123 | } | |
124 | ||
125 | static void | |
126 | __frwr_release(struct rpcrdma_mw *r) | |
127 | { | |
128 | int rc; | |
129 | ||
130 | rc = ib_dereg_mr(r->frmr.fr_mr); | |
131 | if (rc) | |
132 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", | |
133 | r, rc); | |
134 | kfree(r->mw_sg); | |
135 | } | |
136 | ||
d7a21c1b CL |
137 | static int |
138 | __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) | |
139 | { | |
140 | struct rpcrdma_frmr *f = &r->frmr; | |
141 | int rc; | |
142 | ||
143 | rc = ib_dereg_mr(f->fr_mr); | |
144 | if (rc) { | |
145 | pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", | |
146 | rc, r); | |
147 | return rc; | |
148 | } | |
149 | ||
150 | f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, | |
151 | ia->ri_max_frmr_depth); | |
152 | if (IS_ERR(f->fr_mr)) { | |
153 | pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", | |
154 | PTR_ERR(f->fr_mr), r); | |
155 | return PTR_ERR(f->fr_mr); | |
156 | } | |
157 | ||
158 | dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); | |
159 | f->fr_state = FRMR_IS_INVALID; | |
160 | return 0; | |
161 | } | |
162 | ||
505bbe64 CL |
163 | /* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. |
164 | * | |
165 | * There's no recovery if this fails. The FRMR is abandoned, but | |
166 | * remains in rb_all. It will be cleaned up when the transport is | |
167 | * destroyed. | |
168 | */ | |
660bb497 | 169 | static void |
505bbe64 | 170 | frwr_op_recover_mr(struct rpcrdma_mw *mw) |
660bb497 | 171 | { |
564471d2 | 172 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; |
660bb497 | 173 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
660bb497 CL |
174 | int rc; |
175 | ||
176 | rc = __frwr_reset_mr(ia, mw); | |
564471d2 | 177 | ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); |
505bbe64 CL |
178 | if (rc) { |
179 | pr_err("rpcrdma: FRMR reset status %d, %p orphaned\n", | |
180 | rc, mw); | |
181 | r_xprt->rx_stats.mrs_orphaned++; | |
660bb497 | 182 | return; |
505bbe64 | 183 | } |
951e721c | 184 | |
505bbe64 CL |
185 | rpcrdma_put_mw(r_xprt, mw); |
186 | r_xprt->rx_stats.mrs_recovered++; | |
951e721c CL |
187 | } |
188 | ||
3968cb58 CL |
189 | static int |
190 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |
191 | struct rpcrdma_create_data_internal *cdata) | |
192 | { | |
3968cb58 CL |
193 | int depth, delta; |
194 | ||
195 | ia->ri_max_frmr_depth = | |
196 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | |
e3e45b1b | 197 | ia->ri_device->attrs.max_fast_reg_page_list_len); |
3968cb58 CL |
198 | dprintk("RPC: %s: device's max FR page list len = %u\n", |
199 | __func__, ia->ri_max_frmr_depth); | |
200 | ||
201 | /* Add room for frmr register and invalidate WRs. | |
202 | * 1. FRMR reg WR for head | |
203 | * 2. FRMR invalidate WR for head | |
204 | * 3. N FRMR reg WRs for pagelist | |
205 | * 4. N FRMR invalidate WRs for pagelist | |
206 | * 5. FRMR reg WR for tail | |
207 | * 6. FRMR invalidate WR for tail | |
208 | * 7. The RDMA_SEND WR | |
209 | */ | |
210 | depth = 7; | |
211 | ||
212 | /* Calculate N if the device max FRMR depth is smaller than | |
213 | * RPCRDMA_MAX_DATA_SEGS. | |
214 | */ | |
215 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | |
216 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | |
217 | do { | |
218 | depth += 2; /* FRMR reg + invalidate */ | |
219 | delta -= ia->ri_max_frmr_depth; | |
220 | } while (delta > 0); | |
221 | } | |
222 | ||
223 | ep->rep_attr.cap.max_send_wr *= depth; | |
e3e45b1b OG |
224 | if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) { |
225 | cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth; | |
3968cb58 CL |
226 | if (!cdata->max_requests) |
227 | return -EINVAL; | |
228 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | |
229 | depth; | |
230 | } | |
231 | ||
302d3deb CL |
232 | rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, |
233 | RPCRDMA_MAX_DATA_SEGS / | |
234 | ia->ri_max_frmr_depth)); | |
3968cb58 CL |
235 | return 0; |
236 | } | |
237 | ||
1c9351ee CL |
238 | /* FRWR mode conveys a list of pages per chunk segment. The |
239 | * maximum length of that list is the FRWR page list depth. | |
240 | */ | |
241 | static size_t | |
242 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |
243 | { | |
244 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
245 | ||
246 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | |
94931746 | 247 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); |
1c9351ee CL |
248 | } |
249 | ||
2fa8f88d CL |
250 | static void |
251 | __frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr, | |
252 | const char *wr) | |
253 | { | |
254 | frmr->fr_state = FRMR_IS_STALE; | |
255 | if (wc->status != IB_WC_WR_FLUSH_ERR) | |
256 | pr_err("rpcrdma: %s: %s (%u/0x%x)\n", | |
257 | wr, ib_wc_status_msg(wc->status), | |
258 | wc->status, wc->vendor_err); | |
259 | } | |
260 | ||
261 | /** | |
262 | * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC | |
263 | * @cq: completion queue (ignored) | |
264 | * @wc: completed WR | |
c9918ff5 | 265 | * |
c9918ff5 | 266 | */ |
e46ac34c | 267 | static void |
2fa8f88d | 268 | frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) |
e46ac34c | 269 | { |
2fa8f88d CL |
270 | struct rpcrdma_frmr *frmr; |
271 | struct ib_cqe *cqe; | |
c9918ff5 | 272 | |
2fa8f88d CL |
273 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
274 | if (wc->status != IB_WC_SUCCESS) { | |
275 | cqe = wc->wr_cqe; | |
276 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | |
277 | __frwr_sendcompletion_flush(wc, frmr, "fastreg"); | |
278 | } | |
e46ac34c CL |
279 | } |
280 | ||
2fa8f88d CL |
281 | /** |
282 | * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC | |
283 | * @cq: completion queue (ignored) | |
284 | * @wc: completed WR | |
285 | * | |
286 | */ | |
c9918ff5 | 287 | static void |
2fa8f88d | 288 | frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) |
c9918ff5 | 289 | { |
2fa8f88d CL |
290 | struct rpcrdma_frmr *frmr; |
291 | struct ib_cqe *cqe; | |
c9918ff5 | 292 | |
2fa8f88d CL |
293 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
294 | if (wc->status != IB_WC_SUCCESS) { | |
295 | cqe = wc->wr_cqe; | |
296 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | |
297 | __frwr_sendcompletion_flush(wc, frmr, "localinv"); | |
298 | } | |
299 | } | |
c9918ff5 | 300 | |
2fa8f88d CL |
301 | /** |
302 | * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC | |
303 | * @cq: completion queue (ignored) | |
304 | * @wc: completed WR | |
305 | * | |
306 | * Awaken anyone waiting for an MR to finish being fenced. | |
307 | */ | |
308 | static void | |
309 | frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | |
310 | { | |
311 | struct rpcrdma_frmr *frmr; | |
312 | struct ib_cqe *cqe; | |
313 | ||
314 | /* WARNING: Only wr_cqe and status are reliable at this point */ | |
315 | cqe = wc->wr_cqe; | |
316 | frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); | |
317 | if (wc->status != IB_WC_SUCCESS) | |
318 | __frwr_sendcompletion_flush(wc, frmr, "localinv"); | |
319 | complete_all(&frmr->fr_linv_done); | |
c9918ff5 CL |
320 | } |
321 | ||
91e70e70 CL |
322 | static int |
323 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | |
324 | { | |
325 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | |
91e70e70 CL |
326 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; |
327 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | |
328 | int i; | |
329 | ||
58d1dcf5 | 330 | spin_lock_init(&buf->rb_mwlock); |
91e70e70 CL |
331 | INIT_LIST_HEAD(&buf->rb_mws); |
332 | INIT_LIST_HEAD(&buf->rb_all); | |
333 | ||
40c6ed0c CL |
334 | i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1); |
335 | i += 2; /* head + tail */ | |
336 | i *= buf->rb_max_requests; /* one set for each RPC slot */ | |
337 | dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); | |
91e70e70 CL |
338 | |
339 | while (i--) { | |
340 | struct rpcrdma_mw *r; | |
341 | int rc; | |
342 | ||
343 | r = kzalloc(sizeof(*r), GFP_KERNEL); | |
344 | if (!r) | |
345 | return -ENOMEM; | |
346 | ||
564471d2 | 347 | rc = __frwr_init(r, pd, depth); |
91e70e70 CL |
348 | if (rc) { |
349 | kfree(r); | |
350 | return rc; | |
351 | } | |
352 | ||
766656b0 | 353 | r->mw_xprt = r_xprt; |
91e70e70 CL |
354 | list_add(&r->mw_list, &buf->rb_mws); |
355 | list_add(&r->mw_all, &buf->rb_all); | |
356 | } | |
357 | ||
358 | return 0; | |
359 | } | |
360 | ||
564471d2 | 361 | /* Post a REG_MR Work Request to register a memory region |
9c1b4d77 CL |
362 | * for remote access via RDMA READ or RDMA WRITE. |
363 | */ | |
364 | static int | |
365 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |
366 | int nsegs, bool writing) | |
367 | { | |
368 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
369 | struct rpcrdma_mr_seg *seg1 = seg; | |
c14d86e5 CL |
370 | struct rpcrdma_mw *mw; |
371 | struct rpcrdma_frmr *frmr; | |
372 | struct ib_mr *mr; | |
3cf4e169 | 373 | struct ib_reg_wr *reg_wr; |
e622f2f4 | 374 | struct ib_send_wr *bad_wr; |
4143f34e | 375 | int rc, i, n, dma_nents; |
9c1b4d77 | 376 | u8 key; |
9c1b4d77 | 377 | |
c14d86e5 CL |
378 | mw = seg1->rl_mw; |
379 | seg1->rl_mw = NULL; | |
380 | do { | |
381 | if (mw) | |
505bbe64 | 382 | rpcrdma_defer_mr_recovery(mw); |
c14d86e5 CL |
383 | mw = rpcrdma_get_mw(r_xprt); |
384 | if (!mw) | |
7a89f9c6 | 385 | return -ENOBUFS; |
c882a655 CL |
386 | } while (mw->frmr.fr_state != FRMR_IS_INVALID); |
387 | frmr = &mw->frmr; | |
c14d86e5 | 388 | frmr->fr_state = FRMR_IS_VALID; |
4143f34e | 389 | mr = frmr->fr_mr; |
3cf4e169 | 390 | reg_wr = &frmr->fr_regwr; |
c14d86e5 | 391 | |
9c1b4d77 CL |
392 | if (nsegs > ia->ri_max_frmr_depth) |
393 | nsegs = ia->ri_max_frmr_depth; | |
4143f34e SG |
394 | for (i = 0; i < nsegs;) { |
395 | if (seg->mr_page) | |
564471d2 | 396 | sg_set_page(&mw->mw_sg[i], |
4143f34e SG |
397 | seg->mr_page, |
398 | seg->mr_len, | |
399 | offset_in_page(seg->mr_offset)); | |
400 | else | |
564471d2 | 401 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, |
4143f34e SG |
402 | seg->mr_len); |
403 | ||
9c1b4d77 CL |
404 | ++seg; |
405 | ++i; | |
4143f34e | 406 | |
9c1b4d77 CL |
407 | /* Check for holes */ |
408 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | |
409 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | |
410 | break; | |
411 | } | |
564471d2 CL |
412 | mw->mw_nents = i; |
413 | mw->mw_dir = rpcrdma_data_dir(writing); | |
a54d4059 CL |
414 | if (i == 0) |
415 | goto out_dmamap_err; | |
4143f34e | 416 | |
564471d2 CL |
417 | dma_nents = ib_dma_map_sg(ia->ri_device, |
418 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | |
419 | if (!dma_nents) | |
420 | goto out_dmamap_err; | |
421 | ||
422 | n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); | |
423 | if (unlikely(n != mw->mw_nents)) | |
424 | goto out_mapmr_err; | |
4143f34e SG |
425 | |
426 | dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", | |
564471d2 | 427 | __func__, mw, mw->mw_nents, mr->length); |
4143f34e | 428 | |
9c1b4d77 CL |
429 | key = (u8)(mr->rkey & 0x000000FF); |
430 | ib_update_fast_reg_key(mr, ++key); | |
4143f34e | 431 | |
3cf4e169 CL |
432 | reg_wr->wr.next = NULL; |
433 | reg_wr->wr.opcode = IB_WR_REG_MR; | |
2fa8f88d CL |
434 | frmr->fr_cqe.done = frwr_wc_fastreg; |
435 | reg_wr->wr.wr_cqe = &frmr->fr_cqe; | |
3cf4e169 CL |
436 | reg_wr->wr.num_sge = 0; |
437 | reg_wr->wr.send_flags = 0; | |
438 | reg_wr->mr = mr; | |
439 | reg_wr->key = mr->rkey; | |
440 | reg_wr->access = writing ? | |
441 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | |
442 | IB_ACCESS_REMOTE_READ; | |
9c1b4d77 CL |
443 | |
444 | DECR_CQCOUNT(&r_xprt->rx_ep); | |
3cf4e169 | 445 | rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); |
9c1b4d77 CL |
446 | if (rc) |
447 | goto out_senderr; | |
448 | ||
c14d86e5 | 449 | seg1->rl_mw = mw; |
9c1b4d77 | 450 | seg1->mr_rkey = mr->rkey; |
4143f34e | 451 | seg1->mr_base = mr->iova; |
564471d2 | 452 | seg1->mr_nsegs = mw->mw_nents; |
4143f34e SG |
453 | seg1->mr_len = mr->length; |
454 | ||
564471d2 CL |
455 | return mw->mw_nents; |
456 | ||
457 | out_dmamap_err: | |
458 | pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n", | |
459 | mw->mw_sg, mw->mw_nents); | |
42fe28f6 | 460 | rpcrdma_defer_mr_recovery(mw); |
7a89f9c6 | 461 | return -EIO; |
564471d2 CL |
462 | |
463 | out_mapmr_err: | |
464 | pr_err("rpcrdma: failed to map mr %p (%u/%u)\n", | |
465 | frmr->fr_mr, n, mw->mw_nents); | |
505bbe64 | 466 | rpcrdma_defer_mr_recovery(mw); |
7a89f9c6 | 467 | return -EIO; |
9c1b4d77 CL |
468 | |
469 | out_senderr: | |
7a89f9c6 | 470 | pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); |
505bbe64 | 471 | rpcrdma_defer_mr_recovery(mw); |
7a89f9c6 | 472 | return -ENOTCONN; |
9c1b4d77 CL |
473 | } |
474 | ||
c9918ff5 CL |
475 | static struct ib_send_wr * |
476 | __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) | |
477 | { | |
478 | struct rpcrdma_mw *mw = seg->rl_mw; | |
c882a655 | 479 | struct rpcrdma_frmr *f = &mw->frmr; |
c9918ff5 CL |
480 | struct ib_send_wr *invalidate_wr; |
481 | ||
c9918ff5 CL |
482 | f->fr_state = FRMR_IS_INVALID; |
483 | invalidate_wr = &f->fr_invwr; | |
484 | ||
485 | memset(invalidate_wr, 0, sizeof(*invalidate_wr)); | |
2fa8f88d CL |
486 | f->fr_cqe.done = frwr_wc_localinv; |
487 | invalidate_wr->wr_cqe = &f->fr_cqe; | |
c9918ff5 CL |
488 | invalidate_wr->opcode = IB_WR_LOCAL_INV; |
489 | invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; | |
490 | ||
491 | return invalidate_wr; | |
492 | } | |
493 | ||
c9918ff5 CL |
494 | /* Invalidate all memory regions that were registered for "req". |
495 | * | |
496 | * Sleeps until it is safe for the host CPU to access the | |
497 | * previously mapped memory regions. | |
498 | */ | |
499 | static void | |
500 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |
501 | { | |
502 | struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; | |
503 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
504 | struct rpcrdma_mr_seg *seg; | |
505 | unsigned int i, nchunks; | |
506 | struct rpcrdma_frmr *f; | |
d7a21c1b | 507 | struct rpcrdma_mw *mw; |
c9918ff5 CL |
508 | int rc; |
509 | ||
510 | dprintk("RPC: %s: req %p\n", __func__, req); | |
511 | ||
512 | /* ORDER: Invalidate all of the req's MRs first | |
513 | * | |
514 | * Chain the LOCAL_INV Work Requests and post them with | |
515 | * a single ib_post_send() call. | |
516 | */ | |
517 | invalidate_wrs = pos = prev = NULL; | |
518 | seg = NULL; | |
519 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | |
520 | seg = &req->rl_segments[i]; | |
521 | ||
522 | pos = __frwr_prepare_linv_wr(seg); | |
523 | ||
524 | if (!invalidate_wrs) | |
525 | invalidate_wrs = pos; | |
526 | else | |
527 | prev->next = pos; | |
528 | prev = pos; | |
529 | ||
530 | i += seg->mr_nsegs; | |
531 | } | |
c882a655 | 532 | f = &seg->rl_mw->frmr; |
c9918ff5 CL |
533 | |
534 | /* Strong send queue ordering guarantees that when the | |
535 | * last WR in the chain completes, all WRs in the chain | |
536 | * are complete. | |
537 | */ | |
538 | f->fr_invwr.send_flags = IB_SEND_SIGNALED; | |
2fa8f88d CL |
539 | f->fr_cqe.done = frwr_wc_localinv_wake; |
540 | reinit_completion(&f->fr_linv_done); | |
c9918ff5 CL |
541 | INIT_CQCOUNT(&r_xprt->rx_ep); |
542 | ||
543 | /* Transport disconnect drains the receive CQ before it | |
544 | * replaces the QP. The RPC reply handler won't call us | |
545 | * unless ri_id->qp is a valid pointer. | |
546 | */ | |
547 | rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); | |
d7a21c1b CL |
548 | if (rc) |
549 | goto reset_mrs; | |
c9918ff5 CL |
550 | |
551 | wait_for_completion(&f->fr_linv_done); | |
552 | ||
553 | /* ORDER: Now DMA unmap all of the req's MRs, and return | |
554 | * them to the free MW list. | |
555 | */ | |
b892a699 | 556 | unmap: |
c9918ff5 CL |
557 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { |
558 | seg = &req->rl_segments[i]; | |
d7a21c1b CL |
559 | mw = seg->rl_mw; |
560 | seg->rl_mw = NULL; | |
c9918ff5 | 561 | |
564471d2 CL |
562 | ib_dma_unmap_sg(ia->ri_device, |
563 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | |
d7a21c1b | 564 | rpcrdma_put_mw(r_xprt, mw); |
c9918ff5 CL |
565 | |
566 | i += seg->mr_nsegs; | |
567 | seg->mr_nsegs = 0; | |
568 | } | |
569 | ||
570 | req->rl_nchunks = 0; | |
d7a21c1b | 571 | return; |
c9918ff5 | 572 | |
d7a21c1b | 573 | reset_mrs: |
7a89f9c6 CL |
574 | pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); |
575 | rdma_disconnect(ia->ri_id); | |
6814baea | 576 | |
d7a21c1b CL |
577 | /* Find and reset the MRs in the LOCAL_INV WRs that did not |
578 | * get posted. This is synchronous, and slow. | |
579 | */ | |
580 | for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { | |
581 | seg = &req->rl_segments[i]; | |
582 | mw = seg->rl_mw; | |
583 | f = &mw->frmr; | |
c14d86e5 | 584 | |
d7a21c1b CL |
585 | if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) { |
586 | __frwr_reset_mr(ia, mw); | |
587 | bad_wr = bad_wr->next; | |
588 | } | |
6814baea | 589 | |
d7a21c1b CL |
590 | i += seg->mr_nsegs; |
591 | } | |
592 | goto unmap; | |
c9918ff5 | 593 | } |
6814baea | 594 | |
ead3f26e CL |
595 | /* Use a slow, safe mechanism to invalidate all memory regions |
596 | * that were registered for "req". | |
597 | */ | |
598 | static void | |
599 | frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |
600 | bool sync) | |
601 | { | |
602 | struct rpcrdma_mr_seg *seg; | |
603 | struct rpcrdma_mw *mw; | |
604 | unsigned int i; | |
c14d86e5 | 605 | |
ead3f26e CL |
606 | for (i = 0; req->rl_nchunks; req->rl_nchunks--) { |
607 | seg = &req->rl_segments[i]; | |
608 | mw = seg->rl_mw; | |
6814baea | 609 | |
ead3f26e | 610 | if (sync) |
505bbe64 | 611 | frwr_op_recover_mr(mw); |
ead3f26e | 612 | else |
505bbe64 | 613 | rpcrdma_defer_mr_recovery(mw); |
ead3f26e CL |
614 | |
615 | i += seg->mr_nsegs; | |
616 | seg->mr_nsegs = 0; | |
617 | seg->rl_mw = NULL; | |
618 | } | |
6814baea CL |
619 | } |
620 | ||
4561f347 CL |
621 | static void |
622 | frwr_op_destroy(struct rpcrdma_buffer *buf) | |
623 | { | |
624 | struct rpcrdma_mw *r; | |
625 | ||
626 | while (!list_empty(&buf->rb_all)) { | |
627 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | |
628 | list_del(&r->mw_all); | |
629 | __frwr_release(r); | |
630 | kfree(r); | |
631 | } | |
632 | } | |
633 | ||
a0ce85f5 | 634 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { |
9c1b4d77 | 635 | .ro_map = frwr_op_map, |
c9918ff5 | 636 | .ro_unmap_sync = frwr_op_unmap_sync, |
ead3f26e | 637 | .ro_unmap_safe = frwr_op_unmap_safe, |
505bbe64 | 638 | .ro_recover_mr = frwr_op_recover_mr, |
3968cb58 | 639 | .ro_open = frwr_op_open, |
1c9351ee | 640 | .ro_maxpages = frwr_op_maxpages, |
91e70e70 | 641 | .ro_init = frwr_op_init, |
4561f347 | 642 | .ro_destroy = frwr_op_destroy, |
a0ce85f5 CL |
643 | .ro_displayname = "frwr", |
644 | }; |