Commit | Line | Data |
---|---|---|
d5b31be6 | 1 | /* |
0bf48289 | 2 | * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. |
d5b31be6 TT |
3 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the BSD-type | |
9 | * license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * | |
15 | * Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * | |
18 | * Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials provided | |
21 | * with the distribution. | |
22 | * | |
23 | * Neither the name of the Network Appliance, Inc. nor the names of | |
24 | * its contributors may be used to endorse or promote products | |
25 | * derived from this software without specific prior written | |
26 | * permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
39 | * | |
40 | * Author: Tom Tucker <tom@opengridcomputing.com> | |
41 | */ | |
42 | ||
43 | #include <linux/sunrpc/debug.h> | |
44 | #include <linux/sunrpc/rpc_rdma.h> | |
45 | #include <linux/spinlock.h> | |
46 | #include <asm/unaligned.h> | |
47 | #include <rdma/ib_verbs.h> | |
48 | #include <rdma/rdma_cm.h> | |
49 | #include <linux/sunrpc/svc_rdma.h> | |
50 | ||
51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | |
52 | ||
53 | /* | |
54 | * Replace the pages in the rq_argpages array with the pages from the SGE in | |
55 | * the RDMA_RECV completion. The SGL should contain full pages up until the | |
56 | * last one. | |
57 | */ | |
58 | static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |
59 | struct svc_rdma_op_ctxt *ctxt, | |
60 | u32 byte_count) | |
61 | { | |
0b056c22 | 62 | struct rpcrdma_msg *rmsgp; |
d5b31be6 TT |
63 | struct page *page; |
64 | u32 bc; | |
65 | int sge_no; | |
66 | ||
67 | /* Swap the page in the SGE with the page in argpages */ | |
68 | page = ctxt->pages[0]; | |
69 | put_page(rqstp->rq_pages[0]); | |
70 | rqstp->rq_pages[0] = page; | |
71 | ||
72 | /* Set up the XDR head */ | |
73 | rqstp->rq_arg.head[0].iov_base = page_address(page); | |
0bf48289 SW |
74 | rqstp->rq_arg.head[0].iov_len = |
75 | min_t(size_t, byte_count, ctxt->sge[0].length); | |
d5b31be6 TT |
76 | rqstp->rq_arg.len = byte_count; |
77 | rqstp->rq_arg.buflen = byte_count; | |
78 | ||
79 | /* Compute bytes past head in the SGL */ | |
80 | bc = byte_count - rqstp->rq_arg.head[0].iov_len; | |
81 | ||
82 | /* If data remains, store it in the pagelist */ | |
83 | rqstp->rq_arg.page_len = bc; | |
84 | rqstp->rq_arg.page_base = 0; | |
0b056c22 CL |
85 | |
86 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ | |
87 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | |
30b7e246 | 88 | if (rmsgp->rm_type == rdma_nomsg) |
0b056c22 CL |
89 | rqstp->rq_arg.pages = &rqstp->rq_pages[0]; |
90 | else | |
91 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | |
92 | ||
d5b31be6 TT |
93 | sge_no = 1; |
94 | while (bc && sge_no < ctxt->count) { | |
95 | page = ctxt->pages[sge_no]; | |
96 | put_page(rqstp->rq_pages[sge_no]); | |
97 | rqstp->rq_pages[sge_no] = page; | |
0bf48289 | 98 | bc -= min_t(u32, bc, ctxt->sge[sge_no].length); |
d5b31be6 TT |
99 | rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; |
100 | sge_no++; | |
101 | } | |
102 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | |
7e4359e2 | 103 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 | 104 | |
d5b31be6 TT |
105 | /* If not all pages were used from the SGL, free the remaining ones */ |
106 | bc = sge_no; | |
107 | while (sge_no < ctxt->count) { | |
108 | page = ctxt->pages[sge_no++]; | |
109 | put_page(page); | |
110 | } | |
111 | ctxt->count = bc; | |
112 | ||
113 | /* Set up tail */ | |
114 | rqstp->rq_arg.tail[0].iov_base = NULL; | |
115 | rqstp->rq_arg.tail[0].iov_len = 0; | |
116 | } | |
117 | ||
0bf48289 | 118 | /* Issue an RDMA_READ using the local lkey to map the data sink */ |
e5452411 CL |
119 | int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, |
120 | struct svc_rqst *rqstp, | |
121 | struct svc_rdma_op_ctxt *head, | |
122 | int *page_no, | |
123 | u32 *page_offset, | |
124 | u32 rs_handle, | |
125 | u32 rs_length, | |
126 | u64 rs_offset, | |
127 | bool last) | |
0bf48289 | 128 | { |
e622f2f4 | 129 | struct ib_rdma_wr read_wr; |
0bf48289 SW |
130 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; |
131 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); | |
132 | int ret, read, pno; | |
133 | u32 pg_off = *page_offset; | |
134 | u32 pg_no = *page_no; | |
135 | ||
136 | ctxt->direction = DMA_FROM_DEVICE; | |
137 | ctxt->read_hdr = head; | |
bc3fe2e3 | 138 | pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd); |
c91aed98 SW |
139 | read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, |
140 | rs_length); | |
0bf48289 SW |
141 | |
142 | for (pno = 0; pno < pages_needed; pno++) { | |
143 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); | |
144 | ||
145 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
146 | head->arg.page_len += len; | |
5fe1043d | 147 | |
0bf48289 SW |
148 | head->arg.len += len; |
149 | if (!pg_off) | |
150 | head->count++; | |
151 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; | |
7e4359e2 | 152 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
0bf48289 SW |
153 | ctxt->sge[pno].addr = |
154 | ib_dma_map_page(xprt->sc_cm_id->device, | |
155 | head->arg.pages[pg_no], pg_off, | |
156 | PAGE_SIZE - pg_off, | |
157 | DMA_FROM_DEVICE); | |
158 | ret = ib_dma_mapping_error(xprt->sc_cm_id->device, | |
159 | ctxt->sge[pno].addr); | |
160 | if (ret) | |
161 | goto err; | |
162 | atomic_inc(&xprt->sc_dma_used); | |
d5b31be6 | 163 | |
5fe1043d | 164 | ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; |
0bf48289 SW |
165 | ctxt->sge[pno].length = len; |
166 | ctxt->count++; | |
167 | ||
168 | /* adjust offset and wrap to next page if needed */ | |
169 | pg_off += len; | |
170 | if (pg_off == PAGE_SIZE) { | |
171 | pg_off = 0; | |
172 | pg_no++; | |
d5b31be6 | 173 | } |
0bf48289 | 174 | rs_length -= len; |
d5b31be6 | 175 | } |
0bf48289 SW |
176 | |
177 | if (last && rs_length == 0) | |
178 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
179 | else | |
180 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
181 | ||
182 | memset(&read_wr, 0, sizeof(read_wr)); | |
be99bb11 CL |
183 | ctxt->cqe.done = svc_rdma_wc_read; |
184 | read_wr.wr.wr_cqe = &ctxt->cqe; | |
e622f2f4 | 185 | read_wr.wr.opcode = IB_WR_RDMA_READ; |
e622f2f4 CH |
186 | read_wr.wr.send_flags = IB_SEND_SIGNALED; |
187 | read_wr.rkey = rs_handle; | |
188 | read_wr.remote_addr = rs_offset; | |
189 | read_wr.wr.sg_list = ctxt->sge; | |
190 | read_wr.wr.num_sge = pages_needed; | |
191 | ||
192 | ret = svc_rdma_send(xprt, &read_wr.wr); | |
0bf48289 SW |
193 | if (ret) { |
194 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
195 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
196 | goto err; | |
197 | } | |
198 | ||
199 | /* return current location in page array */ | |
200 | *page_no = pg_no; | |
201 | *page_offset = pg_off; | |
202 | ret = read; | |
203 | atomic_inc(&rdma_stat_read); | |
204 | return ret; | |
205 | err: | |
206 | svc_rdma_unmap_dma(ctxt); | |
207 | svc_rdma_put_context(ctxt, 0); | |
208 | return ret; | |
d5b31be6 TT |
209 | } |
210 | ||
0bf48289 | 211 | /* Issue an RDMA_READ using an FRMR to map the data sink */ |
e5452411 CL |
212 | int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, |
213 | struct svc_rqst *rqstp, | |
214 | struct svc_rdma_op_ctxt *head, | |
215 | int *page_no, | |
216 | u32 *page_offset, | |
217 | u32 rs_handle, | |
218 | u32 rs_length, | |
219 | u64 rs_offset, | |
220 | bool last) | |
146b6df6 | 221 | { |
e622f2f4 | 222 | struct ib_rdma_wr read_wr; |
0bf48289 | 223 | struct ib_send_wr inv_wr; |
412a15c0 | 224 | struct ib_reg_wr reg_wr; |
0bf48289 | 225 | u8 key; |
412a15c0 | 226 | int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; |
0bf48289 SW |
227 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); |
228 | struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); | |
412a15c0 | 229 | int ret, read, pno, dma_nents, n; |
0bf48289 SW |
230 | u32 pg_off = *page_offset; |
231 | u32 pg_no = *page_no; | |
146b6df6 | 232 | |
146b6df6 TT |
233 | if (IS_ERR(frmr)) |
234 | return -ENOMEM; | |
235 | ||
0bf48289 SW |
236 | ctxt->direction = DMA_FROM_DEVICE; |
237 | ctxt->frmr = frmr; | |
412a15c0 | 238 | nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len); |
ab9f2faf | 239 | read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length); |
146b6df6 | 240 | |
146b6df6 TT |
241 | frmr->direction = DMA_FROM_DEVICE; |
242 | frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); | |
412a15c0 | 243 | frmr->sg_nents = nents; |
0bf48289 | 244 | |
412a15c0 | 245 | for (pno = 0; pno < nents; pno++) { |
0bf48289 SW |
246 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); |
247 | ||
248 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
249 | head->arg.page_len += len; | |
250 | head->arg.len += len; | |
251 | if (!pg_off) | |
252 | head->count++; | |
412a15c0 SG |
253 | |
254 | sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no], | |
255 | len, pg_off); | |
256 | ||
0bf48289 SW |
257 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; |
258 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
146b6df6 | 259 | |
0bf48289 SW |
260 | /* adjust offset and wrap to next page if needed */ |
261 | pg_off += len; | |
262 | if (pg_off == PAGE_SIZE) { | |
263 | pg_off = 0; | |
264 | pg_no++; | |
265 | } | |
266 | rs_length -= len; | |
146b6df6 TT |
267 | } |
268 | ||
0bf48289 SW |
269 | if (last && rs_length == 0) |
270 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
271 | else | |
272 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
d5b31be6 | 273 | |
412a15c0 SG |
274 | dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device, |
275 | frmr->sg, frmr->sg_nents, | |
276 | frmr->direction); | |
277 | if (!dma_nents) { | |
278 | pr_err("svcrdma: failed to dma map sg %p\n", | |
279 | frmr->sg); | |
280 | return -ENOMEM; | |
281 | } | |
282 | atomic_inc(&xprt->sc_dma_used); | |
283 | ||
9aa8b321 | 284 | n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); |
412a15c0 SG |
285 | if (unlikely(n != frmr->sg_nents)) { |
286 | pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", | |
287 | frmr->mr, n, frmr->sg_nents); | |
288 | return n < 0 ? n : -EINVAL; | |
289 | } | |
290 | ||
0bf48289 SW |
291 | /* Bump the key */ |
292 | key = (u8)(frmr->mr->lkey & 0x000000FF); | |
293 | ib_update_fast_reg_key(frmr->mr, ++key); | |
294 | ||
412a15c0 | 295 | ctxt->sge[0].addr = frmr->mr->iova; |
0bf48289 | 296 | ctxt->sge[0].lkey = frmr->mr->lkey; |
412a15c0 | 297 | ctxt->sge[0].length = frmr->mr->length; |
0bf48289 SW |
298 | ctxt->count = 1; |
299 | ctxt->read_hdr = head; | |
300 | ||
412a15c0 | 301 | /* Prepare REG WR */ |
be99bb11 CL |
302 | ctxt->reg_cqe.done = svc_rdma_wc_reg; |
303 | reg_wr.wr.wr_cqe = &ctxt->reg_cqe; | |
412a15c0 | 304 | reg_wr.wr.opcode = IB_WR_REG_MR; |
412a15c0 SG |
305 | reg_wr.wr.send_flags = IB_SEND_SIGNALED; |
306 | reg_wr.wr.num_sge = 0; | |
307 | reg_wr.mr = frmr->mr; | |
308 | reg_wr.key = frmr->mr->lkey; | |
309 | reg_wr.access = frmr->access_flags; | |
310 | reg_wr.wr.next = &read_wr.wr; | |
0bf48289 SW |
311 | |
312 | /* Prepare RDMA_READ */ | |
313 | memset(&read_wr, 0, sizeof(read_wr)); | |
be99bb11 CL |
314 | ctxt->cqe.done = svc_rdma_wc_read; |
315 | read_wr.wr.wr_cqe = &ctxt->cqe; | |
e622f2f4 CH |
316 | read_wr.wr.send_flags = IB_SEND_SIGNALED; |
317 | read_wr.rkey = rs_handle; | |
318 | read_wr.remote_addr = rs_offset; | |
319 | read_wr.wr.sg_list = ctxt->sge; | |
320 | read_wr.wr.num_sge = 1; | |
0bf48289 | 321 | if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { |
e622f2f4 | 322 | read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; |
e622f2f4 | 323 | read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; |
0bf48289 | 324 | } else { |
e622f2f4 CH |
325 | read_wr.wr.opcode = IB_WR_RDMA_READ; |
326 | read_wr.wr.next = &inv_wr; | |
0bf48289 SW |
327 | /* Prepare invalidate */ |
328 | memset(&inv_wr, 0, sizeof(inv_wr)); | |
be99bb11 CL |
329 | ctxt->inv_cqe.done = svc_rdma_wc_inv; |
330 | inv_wr.wr_cqe = &ctxt->inv_cqe; | |
0bf48289 | 331 | inv_wr.opcode = IB_WR_LOCAL_INV; |
83710fc7 | 332 | inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; |
0bf48289 SW |
333 | inv_wr.ex.invalidate_rkey = frmr->mr->lkey; |
334 | } | |
0bf48289 SW |
335 | |
336 | /* Post the chain */ | |
412a15c0 | 337 | ret = svc_rdma_send(xprt, ®_wr.wr); |
0bf48289 SW |
338 | if (ret) { |
339 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
340 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
341 | goto err; | |
d5b31be6 | 342 | } |
d5b31be6 | 343 | |
0bf48289 SW |
344 | /* return current location in page array */ |
345 | *page_no = pg_no; | |
346 | *page_offset = pg_off; | |
347 | ret = read; | |
348 | atomic_inc(&rdma_stat_read); | |
349 | return ret; | |
350 | err: | |
412a15c0 SG |
351 | ib_dma_unmap_sg(xprt->sc_cm_id->device, |
352 | frmr->sg, frmr->sg_nents, frmr->direction); | |
0bf48289 SW |
353 | svc_rdma_put_context(ctxt, 0); |
354 | svc_rdma_put_frmr(xprt, frmr); | |
355 | return ret; | |
d5b31be6 TT |
356 | } |
357 | ||
2397aa8b CL |
358 | static unsigned int |
359 | rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch) | |
360 | { | |
361 | unsigned int count; | |
362 | ||
363 | for (count = 0; ch->rc_discrim != xdr_zero; ch++) | |
364 | count++; | |
365 | return count; | |
366 | } | |
367 | ||
a97c331f CL |
368 | /* If there was additional inline content, append it to the end of arg.pages. |
369 | * Tail copy has to be done after the reader function has determined how many | |
370 | * pages are needed for RDMA READ. | |
371 | */ | |
372 | static int | |
373 | rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, | |
374 | u32 position, u32 byte_count, u32 page_offset, int page_no) | |
375 | { | |
376 | char *srcp, *destp; | |
377 | int ret; | |
378 | ||
379 | ret = 0; | |
380 | srcp = head->arg.head[0].iov_base + position; | |
381 | byte_count = head->arg.head[0].iov_len - position; | |
382 | if (byte_count > PAGE_SIZE) { | |
383 | dprintk("svcrdma: large tail unsupported\n"); | |
384 | return 0; | |
385 | } | |
386 | ||
387 | /* Fit as much of the tail on the current page as possible */ | |
388 | if (page_offset != PAGE_SIZE) { | |
389 | destp = page_address(rqstp->rq_arg.pages[page_no]); | |
390 | destp += page_offset; | |
391 | while (byte_count--) { | |
392 | *destp++ = *srcp++; | |
393 | page_offset++; | |
394 | if (page_offset == PAGE_SIZE && byte_count) | |
395 | goto more; | |
396 | } | |
397 | goto done; | |
398 | } | |
399 | ||
400 | more: | |
401 | /* Fit the rest on the next page */ | |
402 | page_no++; | |
403 | destp = page_address(rqstp->rq_arg.pages[page_no]); | |
404 | while (byte_count--) | |
405 | *destp++ = *srcp++; | |
406 | ||
407 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | |
408 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
409 | ||
410 | done: | |
411 | byte_count = head->arg.head[0].iov_len - position; | |
412 | head->arg.page_len += byte_count; | |
413 | head->arg.len += byte_count; | |
414 | head->arg.buflen += byte_count; | |
415 | return 1; | |
416 | } | |
417 | ||
0bf48289 SW |
418 | static int rdma_read_chunks(struct svcxprt_rdma *xprt, |
419 | struct rpcrdma_msg *rmsgp, | |
420 | struct svc_rqst *rqstp, | |
421 | struct svc_rdma_op_ctxt *head) | |
d5b31be6 | 422 | { |
2397aa8b | 423 | int page_no, ret; |
d5b31be6 | 424 | struct rpcrdma_read_chunk *ch; |
e5452411 | 425 | u32 handle, page_offset, byte_count; |
61edbcb7 | 426 | u32 position; |
0bf48289 | 427 | u64 rs_offset; |
e5452411 | 428 | bool last; |
d5b31be6 TT |
429 | |
430 | /* If no read list is present, return 0 */ | |
431 | ch = svc_rdma_get_read_chunk(rmsgp); | |
432 | if (!ch) | |
433 | return 0; | |
434 | ||
2397aa8b | 435 | if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) |
a6f911c0 | 436 | return -EINVAL; |
146b6df6 | 437 | |
0bf48289 SW |
438 | /* The request is completed when the RDMA_READs complete. The |
439 | * head context keeps all the pages that comprise the | |
440 | * request. | |
441 | */ | |
442 | head->arg.head[0] = rqstp->rq_arg.head[0]; | |
443 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | |
0bf48289 SW |
444 | head->hdr_count = head->count; |
445 | head->arg.page_base = 0; | |
446 | head->arg.page_len = 0; | |
447 | head->arg.len = rqstp->rq_arg.len; | |
448 | head->arg.buflen = rqstp->rq_arg.buflen; | |
59fb3066 | 449 | |
0b056c22 | 450 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ |
cac7f150 | 451 | position = be32_to_cpu(ch->rc_position); |
0b056c22 CL |
452 | if (position == 0) { |
453 | head->arg.pages = &head->pages[0]; | |
454 | page_offset = head->byte_len; | |
455 | } else { | |
456 | head->arg.pages = &head->pages[head->count]; | |
457 | page_offset = 0; | |
458 | } | |
459 | ||
61edbcb7 CL |
460 | ret = 0; |
461 | page_no = 0; | |
61edbcb7 CL |
462 | for (; ch->rc_discrim != xdr_zero; ch++) { |
463 | if (be32_to_cpu(ch->rc_position) != position) | |
464 | goto err; | |
465 | ||
466 | handle = be32_to_cpu(ch->rc_target.rs_handle), | |
e5452411 | 467 | byte_count = be32_to_cpu(ch->rc_target.rs_length); |
cec56c8f TT |
468 | xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, |
469 | &rs_offset); | |
0bf48289 SW |
470 | |
471 | while (byte_count > 0) { | |
e5452411 CL |
472 | last = (ch + 1)->rc_discrim == xdr_zero; |
473 | ret = xprt->sc_reader(xprt, rqstp, head, | |
474 | &page_no, &page_offset, | |
475 | handle, byte_count, | |
476 | rs_offset, last); | |
0bf48289 SW |
477 | if (ret < 0) |
478 | goto err; | |
479 | byte_count -= ret; | |
480 | rs_offset += ret; | |
481 | head->arg.buflen += ret; | |
d5b31be6 | 482 | } |
d5b31be6 | 483 | } |
0b056c22 | 484 | |
fcbeced5 CL |
485 | /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ |
486 | if (page_offset & 3) { | |
487 | u32 pad = 4 - (page_offset & 3); | |
488 | ||
6625d091 | 489 | head->arg.tail[0].iov_len += pad; |
fcbeced5 CL |
490 | head->arg.len += pad; |
491 | head->arg.buflen += pad; | |
a97c331f | 492 | page_offset += pad; |
fcbeced5 CL |
493 | } |
494 | ||
0bf48289 | 495 | ret = 1; |
a97c331f CL |
496 | if (position && position < head->arg.head[0].iov_len) |
497 | ret = rdma_copy_tail(rqstp, head, position, | |
498 | byte_count, page_offset, page_no); | |
499 | head->arg.head[0].iov_len = position; | |
0b056c22 CL |
500 | head->position = position; |
501 | ||
0bf48289 | 502 | err: |
d5b31be6 | 503 | /* Detach arg pages. svc_recv will replenish them */ |
0bf48289 SW |
504 | for (page_no = 0; |
505 | &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) | |
506 | rqstp->rq_pages[page_no] = NULL; | |
d5b31be6 | 507 | |
0bf48289 | 508 | return ret; |
d5b31be6 TT |
509 | } |
510 | ||
84f225c2 CL |
511 | static void rdma_read_complete(struct svc_rqst *rqstp, |
512 | struct svc_rdma_op_ctxt *head) | |
d5b31be6 | 513 | { |
d5b31be6 | 514 | int page_no; |
d5b31be6 | 515 | |
d5b31be6 TT |
516 | /* Copy RPC pages */ |
517 | for (page_no = 0; page_no < head->count; page_no++) { | |
518 | put_page(rqstp->rq_pages[page_no]); | |
519 | rqstp->rq_pages[page_no] = head->pages[page_no]; | |
520 | } | |
0b056c22 CL |
521 | |
522 | /* Adjustments made for RDMA_NOMSG type requests */ | |
523 | if (head->position == 0) { | |
524 | if (head->arg.len <= head->sge[0].length) { | |
525 | head->arg.head[0].iov_len = head->arg.len - | |
526 | head->byte_len; | |
527 | head->arg.page_len = 0; | |
528 | } else { | |
529 | head->arg.head[0].iov_len = head->sge[0].length - | |
530 | head->byte_len; | |
531 | head->arg.page_len = head->arg.len - | |
532 | head->sge[0].length; | |
533 | } | |
534 | } | |
535 | ||
d5b31be6 | 536 | /* Point rq_arg.pages past header */ |
f820c57e | 537 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; |
d5b31be6 TT |
538 | rqstp->rq_arg.page_len = head->arg.page_len; |
539 | rqstp->rq_arg.page_base = head->arg.page_base; | |
540 | ||
541 | /* rq_respages starts after the last arg page */ | |
3be7f328 | 542 | rqstp->rq_respages = &rqstp->rq_pages[page_no]; |
7e4359e2 | 543 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 TT |
544 | |
545 | /* Rebuild rq_arg head and tail. */ | |
546 | rqstp->rq_arg.head[0] = head->arg.head[0]; | |
547 | rqstp->rq_arg.tail[0] = head->arg.tail[0]; | |
548 | rqstp->rq_arg.len = head->arg.len; | |
549 | rqstp->rq_arg.buflen = head->arg.buflen; | |
d5b31be6 TT |
550 | } |
551 | ||
5d252f90 CL |
552 | /* By convention, backchannel calls arrive via rdma_msg type |
553 | * messages, and never populate the chunk lists. This makes | |
554 | * the RPC/RDMA header small and fixed in size, so it is | |
555 | * straightforward to check the RPC header's direction field. | |
556 | */ | |
557 | static bool | |
558 | svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp) | |
559 | { | |
560 | __be32 *p = (__be32 *)rmsgp; | |
561 | ||
562 | if (!xprt->xpt_bc_xprt) | |
563 | return false; | |
564 | ||
565 | if (rmsgp->rm_type != rdma_msg) | |
566 | return false; | |
567 | if (rmsgp->rm_body.rm_chunks[0] != xdr_zero) | |
568 | return false; | |
569 | if (rmsgp->rm_body.rm_chunks[1] != xdr_zero) | |
570 | return false; | |
571 | if (rmsgp->rm_body.rm_chunks[2] != xdr_zero) | |
572 | return false; | |
573 | ||
574 | /* sanity */ | |
575 | if (p[7] != rmsgp->rm_xid) | |
576 | return false; | |
577 | /* call direction */ | |
578 | if (p[8] == cpu_to_be32(RPC_CALL)) | |
579 | return false; | |
580 | ||
581 | return true; | |
582 | } | |
583 | ||
d5b31be6 TT |
584 | /* |
585 | * Set up the rqstp thread context to point to the RQ buffer. If | |
586 | * necessary, pull additional data from the client with an RDMA_READ | |
587 | * request. | |
588 | */ | |
589 | int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |
590 | { | |
591 | struct svc_xprt *xprt = rqstp->rq_xprt; | |
592 | struct svcxprt_rdma *rdma_xprt = | |
593 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | |
594 | struct svc_rdma_op_ctxt *ctxt = NULL; | |
595 | struct rpcrdma_msg *rmsgp; | |
596 | int ret = 0; | |
d5b31be6 TT |
597 | |
598 | dprintk("svcrdma: rqstp=%p\n", rqstp); | |
599 | ||
24b8b447 | 600 | spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); |
d5b31be6 TT |
601 | if (!list_empty(&rdma_xprt->sc_read_complete_q)) { |
602 | ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, | |
603 | struct svc_rdma_op_ctxt, | |
604 | dto_q); | |
605 | list_del_init(&ctxt->dto_q); | |
24b8b447 | 606 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); |
84f225c2 CL |
607 | rdma_read_complete(rqstp, ctxt); |
608 | goto complete; | |
0bf48289 | 609 | } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { |
d5b31be6 TT |
610 | ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, |
611 | struct svc_rdma_op_ctxt, | |
612 | dto_q); | |
613 | list_del_init(&ctxt->dto_q); | |
614 | } else { | |
615 | atomic_inc(&rdma_stat_rq_starve); | |
616 | clear_bit(XPT_DATA, &xprt->xpt_flags); | |
617 | ctxt = NULL; | |
618 | } | |
619 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); | |
620 | if (!ctxt) { | |
621 | /* This is the EAGAIN path. The svc_recv routine will | |
622 | * return -EAGAIN, the nfsd thread will go to call into | |
623 | * svc_recv again and we shouldn't be on the active | |
624 | * transport list | |
625 | */ | |
626 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | |
ec705fd4 | 627 | goto defer; |
d5b31be6 TT |
628 | goto out; |
629 | } | |
630 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", | |
631 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); | |
d5b31be6 TT |
632 | atomic_inc(&rdma_stat_recv); |
633 | ||
634 | /* Build up the XDR from the receive buffers. */ | |
635 | rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); | |
636 | ||
637 | /* Decode the RDMA header. */ | |
f3ea53fb | 638 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; |
d9e4084f | 639 | ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); |
a6081b82 CL |
640 | if (ret < 0) |
641 | goto out_err; | |
a0544c94 CL |
642 | if (ret == 0) |
643 | goto out_drop; | |
a6081b82 | 644 | rqstp->rq_xprt_hlen = ret; |
d5b31be6 | 645 | |
5d252f90 CL |
646 | if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) { |
647 | ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp, | |
648 | &rqstp->rq_arg); | |
649 | svc_rdma_put_context(ctxt, 0); | |
650 | if (ret) | |
651 | goto repost; | |
652 | return ret; | |
653 | } | |
654 | ||
d16d4009 | 655 | /* Read read-list data. */ |
0bf48289 | 656 | ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); |
d16d4009 TT |
657 | if (ret > 0) { |
658 | /* read-list posted, defer until data received from client. */ | |
b1721d2b | 659 | goto defer; |
0bf48289 | 660 | } else if (ret < 0) { |
d16d4009 TT |
661 | /* Post of read-list failed, free context. */ |
662 | svc_rdma_put_context(ctxt, 1); | |
663 | return 0; | |
664 | } | |
d5b31be6 | 665 | |
84f225c2 | 666 | complete: |
d5b31be6 TT |
667 | ret = rqstp->rq_arg.head[0].iov_len |
668 | + rqstp->rq_arg.page_len | |
669 | + rqstp->rq_arg.tail[0].iov_len; | |
670 | svc_rdma_put_context(ctxt, 0); | |
671 | out: | |
597561bf CL |
672 | dprintk("svcrdma: ret=%d, rq_arg.len=%u, " |
673 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n", | |
d5b31be6 TT |
674 | ret, rqstp->rq_arg.len, |
675 | rqstp->rq_arg.head[0].iov_base, | |
676 | rqstp->rq_arg.head[0].iov_len); | |
677 | rqstp->rq_prot = IPPROTO_MAX; | |
678 | svc_xprt_copy_addrs(rqstp, xprt); | |
d5b31be6 TT |
679 | return ret; |
680 | ||
a6081b82 CL |
681 | out_err: |
682 | svc_rdma_send_error(rdma_xprt, rmsgp, ret); | |
683 | svc_rdma_put_context(ctxt, 0); | |
684 | return 0; | |
685 | ||
b1721d2b | 686 | defer: |
d5b31be6 | 687 | return 0; |
5d252f90 | 688 | |
a0544c94 CL |
689 | out_drop: |
690 | svc_rdma_put_context(ctxt, 1); | |
5d252f90 | 691 | repost: |
bf36387a | 692 | return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL); |
d5b31be6 | 693 | } |