Commit | Line | Data |
---|---|---|
d5b31be6 | 1 | /* |
0bf48289 | 2 | * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. |
d5b31be6 TT |
3 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the BSD-type | |
9 | * license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * | |
15 | * Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * | |
18 | * Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials provided | |
21 | * with the distribution. | |
22 | * | |
23 | * Neither the name of the Network Appliance, Inc. nor the names of | |
24 | * its contributors may be used to endorse or promote products | |
25 | * derived from this software without specific prior written | |
26 | * permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
39 | * | |
40 | * Author: Tom Tucker <tom@opengridcomputing.com> | |
41 | */ | |
42 | ||
43 | #include <linux/sunrpc/debug.h> | |
44 | #include <linux/sunrpc/rpc_rdma.h> | |
45 | #include <linux/spinlock.h> | |
46 | #include <asm/unaligned.h> | |
47 | #include <rdma/ib_verbs.h> | |
48 | #include <rdma/rdma_cm.h> | |
49 | #include <linux/sunrpc/svc_rdma.h> | |
50 | ||
51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | |
52 | ||
53 | /* | |
54 | * Replace the pages in the rq_argpages array with the pages from the SGE in | |
55 | * the RDMA_RECV completion. The SGL should contain full pages up until the | |
56 | * last one. | |
57 | */ | |
58 | static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |
59 | struct svc_rdma_op_ctxt *ctxt, | |
60 | u32 byte_count) | |
61 | { | |
0b056c22 | 62 | struct rpcrdma_msg *rmsgp; |
d5b31be6 TT |
63 | struct page *page; |
64 | u32 bc; | |
65 | int sge_no; | |
66 | ||
67 | /* Swap the page in the SGE with the page in argpages */ | |
68 | page = ctxt->pages[0]; | |
69 | put_page(rqstp->rq_pages[0]); | |
70 | rqstp->rq_pages[0] = page; | |
71 | ||
72 | /* Set up the XDR head */ | |
73 | rqstp->rq_arg.head[0].iov_base = page_address(page); | |
0bf48289 SW |
74 | rqstp->rq_arg.head[0].iov_len = |
75 | min_t(size_t, byte_count, ctxt->sge[0].length); | |
d5b31be6 TT |
76 | rqstp->rq_arg.len = byte_count; |
77 | rqstp->rq_arg.buflen = byte_count; | |
78 | ||
79 | /* Compute bytes past head in the SGL */ | |
80 | bc = byte_count - rqstp->rq_arg.head[0].iov_len; | |
81 | ||
82 | /* If data remains, store it in the pagelist */ | |
83 | rqstp->rq_arg.page_len = bc; | |
84 | rqstp->rq_arg.page_base = 0; | |
0b056c22 CL |
85 | |
86 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ | |
87 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | |
30b7e246 | 88 | if (rmsgp->rm_type == rdma_nomsg) |
0b056c22 CL |
89 | rqstp->rq_arg.pages = &rqstp->rq_pages[0]; |
90 | else | |
91 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | |
92 | ||
d5b31be6 TT |
93 | sge_no = 1; |
94 | while (bc && sge_no < ctxt->count) { | |
95 | page = ctxt->pages[sge_no]; | |
96 | put_page(rqstp->rq_pages[sge_no]); | |
97 | rqstp->rq_pages[sge_no] = page; | |
0bf48289 | 98 | bc -= min_t(u32, bc, ctxt->sge[sge_no].length); |
d5b31be6 TT |
99 | rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; |
100 | sge_no++; | |
101 | } | |
102 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | |
7e4359e2 | 103 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 | 104 | |
d5b31be6 TT |
105 | /* If not all pages were used from the SGL, free the remaining ones */ |
106 | bc = sge_no; | |
107 | while (sge_no < ctxt->count) { | |
108 | page = ctxt->pages[sge_no++]; | |
109 | put_page(page); | |
110 | } | |
111 | ctxt->count = bc; | |
112 | ||
113 | /* Set up tail */ | |
114 | rqstp->rq_arg.tail[0].iov_base = NULL; | |
115 | rqstp->rq_arg.tail[0].iov_len = 0; | |
116 | } | |
117 | ||
0bf48289 | 118 | /* Issue an RDMA_READ using the local lkey to map the data sink */ |
e5452411 CL |
119 | int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, |
120 | struct svc_rqst *rqstp, | |
121 | struct svc_rdma_op_ctxt *head, | |
122 | int *page_no, | |
123 | u32 *page_offset, | |
124 | u32 rs_handle, | |
125 | u32 rs_length, | |
126 | u64 rs_offset, | |
127 | bool last) | |
0bf48289 | 128 | { |
e622f2f4 | 129 | struct ib_rdma_wr read_wr; |
0bf48289 SW |
130 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; |
131 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); | |
132 | int ret, read, pno; | |
133 | u32 pg_off = *page_offset; | |
134 | u32 pg_no = *page_no; | |
135 | ||
136 | ctxt->direction = DMA_FROM_DEVICE; | |
137 | ctxt->read_hdr = head; | |
bc3fe2e3 | 138 | pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd); |
c91aed98 SW |
139 | read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, |
140 | rs_length); | |
0bf48289 SW |
141 | |
142 | for (pno = 0; pno < pages_needed; pno++) { | |
143 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); | |
144 | ||
145 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
146 | head->arg.page_len += len; | |
5fe1043d | 147 | |
0bf48289 SW |
148 | head->arg.len += len; |
149 | if (!pg_off) | |
150 | head->count++; | |
151 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; | |
7e4359e2 | 152 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
0bf48289 SW |
153 | ctxt->sge[pno].addr = |
154 | ib_dma_map_page(xprt->sc_cm_id->device, | |
155 | head->arg.pages[pg_no], pg_off, | |
156 | PAGE_SIZE - pg_off, | |
157 | DMA_FROM_DEVICE); | |
158 | ret = ib_dma_mapping_error(xprt->sc_cm_id->device, | |
159 | ctxt->sge[pno].addr); | |
160 | if (ret) | |
161 | goto err; | |
162 | atomic_inc(&xprt->sc_dma_used); | |
d5b31be6 | 163 | |
5fe1043d | 164 | ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; |
0bf48289 SW |
165 | ctxt->sge[pno].length = len; |
166 | ctxt->count++; | |
167 | ||
168 | /* adjust offset and wrap to next page if needed */ | |
169 | pg_off += len; | |
170 | if (pg_off == PAGE_SIZE) { | |
171 | pg_off = 0; | |
172 | pg_no++; | |
d5b31be6 | 173 | } |
0bf48289 | 174 | rs_length -= len; |
d5b31be6 | 175 | } |
0bf48289 SW |
176 | |
177 | if (last && rs_length == 0) | |
178 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
179 | else | |
180 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
181 | ||
182 | memset(&read_wr, 0, sizeof(read_wr)); | |
e622f2f4 CH |
183 | read_wr.wr.wr_id = (unsigned long)ctxt; |
184 | read_wr.wr.opcode = IB_WR_RDMA_READ; | |
185 | ctxt->wr_op = read_wr.wr.opcode; | |
186 | read_wr.wr.send_flags = IB_SEND_SIGNALED; | |
187 | read_wr.rkey = rs_handle; | |
188 | read_wr.remote_addr = rs_offset; | |
189 | read_wr.wr.sg_list = ctxt->sge; | |
190 | read_wr.wr.num_sge = pages_needed; | |
191 | ||
192 | ret = svc_rdma_send(xprt, &read_wr.wr); | |
0bf48289 SW |
193 | if (ret) { |
194 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
195 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
196 | goto err; | |
197 | } | |
198 | ||
199 | /* return current location in page array */ | |
200 | *page_no = pg_no; | |
201 | *page_offset = pg_off; | |
202 | ret = read; | |
203 | atomic_inc(&rdma_stat_read); | |
204 | return ret; | |
205 | err: | |
206 | svc_rdma_unmap_dma(ctxt); | |
207 | svc_rdma_put_context(ctxt, 0); | |
208 | return ret; | |
d5b31be6 TT |
209 | } |
210 | ||
0bf48289 | 211 | /* Issue an RDMA_READ using an FRMR to map the data sink */ |
e5452411 CL |
212 | int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, |
213 | struct svc_rqst *rqstp, | |
214 | struct svc_rdma_op_ctxt *head, | |
215 | int *page_no, | |
216 | u32 *page_offset, | |
217 | u32 rs_handle, | |
218 | u32 rs_length, | |
219 | u64 rs_offset, | |
220 | bool last) | |
146b6df6 | 221 | { |
e622f2f4 | 222 | struct ib_rdma_wr read_wr; |
0bf48289 | 223 | struct ib_send_wr inv_wr; |
412a15c0 | 224 | struct ib_reg_wr reg_wr; |
0bf48289 | 225 | u8 key; |
412a15c0 | 226 | int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; |
0bf48289 SW |
227 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); |
228 | struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); | |
412a15c0 | 229 | int ret, read, pno, dma_nents, n; |
0bf48289 SW |
230 | u32 pg_off = *page_offset; |
231 | u32 pg_no = *page_no; | |
146b6df6 | 232 | |
146b6df6 TT |
233 | if (IS_ERR(frmr)) |
234 | return -ENOMEM; | |
235 | ||
0bf48289 SW |
236 | ctxt->direction = DMA_FROM_DEVICE; |
237 | ctxt->frmr = frmr; | |
412a15c0 | 238 | nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len); |
ab9f2faf | 239 | read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length); |
146b6df6 | 240 | |
146b6df6 TT |
241 | frmr->direction = DMA_FROM_DEVICE; |
242 | frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); | |
412a15c0 | 243 | frmr->sg_nents = nents; |
0bf48289 | 244 | |
412a15c0 | 245 | for (pno = 0; pno < nents; pno++) { |
0bf48289 SW |
246 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); |
247 | ||
248 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
249 | head->arg.page_len += len; | |
250 | head->arg.len += len; | |
251 | if (!pg_off) | |
252 | head->count++; | |
412a15c0 SG |
253 | |
254 | sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no], | |
255 | len, pg_off); | |
256 | ||
0bf48289 SW |
257 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; |
258 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
146b6df6 | 259 | |
0bf48289 SW |
260 | /* adjust offset and wrap to next page if needed */ |
261 | pg_off += len; | |
262 | if (pg_off == PAGE_SIZE) { | |
263 | pg_off = 0; | |
264 | pg_no++; | |
265 | } | |
266 | rs_length -= len; | |
146b6df6 TT |
267 | } |
268 | ||
0bf48289 SW |
269 | if (last && rs_length == 0) |
270 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
271 | else | |
272 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
d5b31be6 | 273 | |
412a15c0 SG |
274 | dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device, |
275 | frmr->sg, frmr->sg_nents, | |
276 | frmr->direction); | |
277 | if (!dma_nents) { | |
278 | pr_err("svcrdma: failed to dma map sg %p\n", | |
279 | frmr->sg); | |
280 | return -ENOMEM; | |
281 | } | |
282 | atomic_inc(&xprt->sc_dma_used); | |
283 | ||
284 | n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); | |
285 | if (unlikely(n != frmr->sg_nents)) { | |
286 | pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", | |
287 | frmr->mr, n, frmr->sg_nents); | |
288 | return n < 0 ? n : -EINVAL; | |
289 | } | |
290 | ||
0bf48289 SW |
291 | /* Bump the key */ |
292 | key = (u8)(frmr->mr->lkey & 0x000000FF); | |
293 | ib_update_fast_reg_key(frmr->mr, ++key); | |
294 | ||
412a15c0 | 295 | ctxt->sge[0].addr = frmr->mr->iova; |
0bf48289 | 296 | ctxt->sge[0].lkey = frmr->mr->lkey; |
412a15c0 | 297 | ctxt->sge[0].length = frmr->mr->length; |
0bf48289 SW |
298 | ctxt->count = 1; |
299 | ctxt->read_hdr = head; | |
300 | ||
412a15c0 SG |
301 | /* Prepare REG WR */ |
302 | reg_wr.wr.opcode = IB_WR_REG_MR; | |
303 | reg_wr.wr.wr_id = 0; | |
304 | reg_wr.wr.send_flags = IB_SEND_SIGNALED; | |
305 | reg_wr.wr.num_sge = 0; | |
306 | reg_wr.mr = frmr->mr; | |
307 | reg_wr.key = frmr->mr->lkey; | |
308 | reg_wr.access = frmr->access_flags; | |
309 | reg_wr.wr.next = &read_wr.wr; | |
0bf48289 SW |
310 | |
311 | /* Prepare RDMA_READ */ | |
312 | memset(&read_wr, 0, sizeof(read_wr)); | |
e622f2f4 CH |
313 | read_wr.wr.send_flags = IB_SEND_SIGNALED; |
314 | read_wr.rkey = rs_handle; | |
315 | read_wr.remote_addr = rs_offset; | |
316 | read_wr.wr.sg_list = ctxt->sge; | |
317 | read_wr.wr.num_sge = 1; | |
0bf48289 | 318 | if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { |
e622f2f4 CH |
319 | read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; |
320 | read_wr.wr.wr_id = (unsigned long)ctxt; | |
321 | read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; | |
0bf48289 | 322 | } else { |
e622f2f4 CH |
323 | read_wr.wr.opcode = IB_WR_RDMA_READ; |
324 | read_wr.wr.next = &inv_wr; | |
0bf48289 SW |
325 | /* Prepare invalidate */ |
326 | memset(&inv_wr, 0, sizeof(inv_wr)); | |
327 | inv_wr.wr_id = (unsigned long)ctxt; | |
328 | inv_wr.opcode = IB_WR_LOCAL_INV; | |
83710fc7 | 329 | inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; |
0bf48289 SW |
330 | inv_wr.ex.invalidate_rkey = frmr->mr->lkey; |
331 | } | |
e622f2f4 | 332 | ctxt->wr_op = read_wr.wr.opcode; |
0bf48289 SW |
333 | |
334 | /* Post the chain */ | |
412a15c0 | 335 | ret = svc_rdma_send(xprt, ®_wr.wr); |
0bf48289 SW |
336 | if (ret) { |
337 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
338 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
339 | goto err; | |
d5b31be6 | 340 | } |
d5b31be6 | 341 | |
0bf48289 SW |
342 | /* return current location in page array */ |
343 | *page_no = pg_no; | |
344 | *page_offset = pg_off; | |
345 | ret = read; | |
346 | atomic_inc(&rdma_stat_read); | |
347 | return ret; | |
348 | err: | |
412a15c0 SG |
349 | ib_dma_unmap_sg(xprt->sc_cm_id->device, |
350 | frmr->sg, frmr->sg_nents, frmr->direction); | |
0bf48289 SW |
351 | svc_rdma_put_context(ctxt, 0); |
352 | svc_rdma_put_frmr(xprt, frmr); | |
353 | return ret; | |
d5b31be6 TT |
354 | } |
355 | ||
2397aa8b CL |
356 | static unsigned int |
357 | rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch) | |
358 | { | |
359 | unsigned int count; | |
360 | ||
361 | for (count = 0; ch->rc_discrim != xdr_zero; ch++) | |
362 | count++; | |
363 | return count; | |
364 | } | |
365 | ||
a97c331f CL |
366 | /* If there was additional inline content, append it to the end of arg.pages. |
367 | * Tail copy has to be done after the reader function has determined how many | |
368 | * pages are needed for RDMA READ. | |
369 | */ | |
370 | static int | |
371 | rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, | |
372 | u32 position, u32 byte_count, u32 page_offset, int page_no) | |
373 | { | |
374 | char *srcp, *destp; | |
375 | int ret; | |
376 | ||
377 | ret = 0; | |
378 | srcp = head->arg.head[0].iov_base + position; | |
379 | byte_count = head->arg.head[0].iov_len - position; | |
380 | if (byte_count > PAGE_SIZE) { | |
381 | dprintk("svcrdma: large tail unsupported\n"); | |
382 | return 0; | |
383 | } | |
384 | ||
385 | /* Fit as much of the tail on the current page as possible */ | |
386 | if (page_offset != PAGE_SIZE) { | |
387 | destp = page_address(rqstp->rq_arg.pages[page_no]); | |
388 | destp += page_offset; | |
389 | while (byte_count--) { | |
390 | *destp++ = *srcp++; | |
391 | page_offset++; | |
392 | if (page_offset == PAGE_SIZE && byte_count) | |
393 | goto more; | |
394 | } | |
395 | goto done; | |
396 | } | |
397 | ||
398 | more: | |
399 | /* Fit the rest on the next page */ | |
400 | page_no++; | |
401 | destp = page_address(rqstp->rq_arg.pages[page_no]); | |
402 | while (byte_count--) | |
403 | *destp++ = *srcp++; | |
404 | ||
405 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | |
406 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
407 | ||
408 | done: | |
409 | byte_count = head->arg.head[0].iov_len - position; | |
410 | head->arg.page_len += byte_count; | |
411 | head->arg.len += byte_count; | |
412 | head->arg.buflen += byte_count; | |
413 | return 1; | |
414 | } | |
415 | ||
0bf48289 SW |
416 | static int rdma_read_chunks(struct svcxprt_rdma *xprt, |
417 | struct rpcrdma_msg *rmsgp, | |
418 | struct svc_rqst *rqstp, | |
419 | struct svc_rdma_op_ctxt *head) | |
d5b31be6 | 420 | { |
2397aa8b | 421 | int page_no, ret; |
d5b31be6 | 422 | struct rpcrdma_read_chunk *ch; |
e5452411 | 423 | u32 handle, page_offset, byte_count; |
61edbcb7 | 424 | u32 position; |
0bf48289 | 425 | u64 rs_offset; |
e5452411 | 426 | bool last; |
d5b31be6 TT |
427 | |
428 | /* If no read list is present, return 0 */ | |
429 | ch = svc_rdma_get_read_chunk(rmsgp); | |
430 | if (!ch) | |
431 | return 0; | |
432 | ||
2397aa8b | 433 | if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) |
a6f911c0 | 434 | return -EINVAL; |
146b6df6 | 435 | |
0bf48289 SW |
436 | /* The request is completed when the RDMA_READs complete. The |
437 | * head context keeps all the pages that comprise the | |
438 | * request. | |
439 | */ | |
440 | head->arg.head[0] = rqstp->rq_arg.head[0]; | |
441 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | |
0bf48289 SW |
442 | head->hdr_count = head->count; |
443 | head->arg.page_base = 0; | |
444 | head->arg.page_len = 0; | |
445 | head->arg.len = rqstp->rq_arg.len; | |
446 | head->arg.buflen = rqstp->rq_arg.buflen; | |
59fb3066 | 447 | |
61edbcb7 CL |
448 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; |
449 | position = be32_to_cpu(ch->rc_position); | |
450 | ||
0b056c22 CL |
451 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ |
452 | if (position == 0) { | |
453 | head->arg.pages = &head->pages[0]; | |
454 | page_offset = head->byte_len; | |
455 | } else { | |
456 | head->arg.pages = &head->pages[head->count]; | |
457 | page_offset = 0; | |
458 | } | |
459 | ||
61edbcb7 CL |
460 | ret = 0; |
461 | page_no = 0; | |
61edbcb7 CL |
462 | for (; ch->rc_discrim != xdr_zero; ch++) { |
463 | if (be32_to_cpu(ch->rc_position) != position) | |
464 | goto err; | |
465 | ||
466 | handle = be32_to_cpu(ch->rc_target.rs_handle), | |
e5452411 | 467 | byte_count = be32_to_cpu(ch->rc_target.rs_length); |
cec56c8f TT |
468 | xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, |
469 | &rs_offset); | |
0bf48289 SW |
470 | |
471 | while (byte_count > 0) { | |
e5452411 CL |
472 | last = (ch + 1)->rc_discrim == xdr_zero; |
473 | ret = xprt->sc_reader(xprt, rqstp, head, | |
474 | &page_no, &page_offset, | |
475 | handle, byte_count, | |
476 | rs_offset, last); | |
0bf48289 SW |
477 | if (ret < 0) |
478 | goto err; | |
479 | byte_count -= ret; | |
480 | rs_offset += ret; | |
481 | head->arg.buflen += ret; | |
d5b31be6 | 482 | } |
d5b31be6 | 483 | } |
0b056c22 | 484 | |
fcbeced5 CL |
485 | /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ |
486 | if (page_offset & 3) { | |
487 | u32 pad = 4 - (page_offset & 3); | |
488 | ||
489 | head->arg.page_len += pad; | |
490 | head->arg.len += pad; | |
491 | head->arg.buflen += pad; | |
a97c331f | 492 | page_offset += pad; |
fcbeced5 CL |
493 | } |
494 | ||
0bf48289 | 495 | ret = 1; |
a97c331f CL |
496 | if (position && position < head->arg.head[0].iov_len) |
497 | ret = rdma_copy_tail(rqstp, head, position, | |
498 | byte_count, page_offset, page_no); | |
499 | head->arg.head[0].iov_len = position; | |
0b056c22 CL |
500 | head->position = position; |
501 | ||
0bf48289 | 502 | err: |
d5b31be6 | 503 | /* Detach arg pages. svc_recv will replenish them */ |
0bf48289 SW |
504 | for (page_no = 0; |
505 | &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) | |
506 | rqstp->rq_pages[page_no] = NULL; | |
d5b31be6 | 507 | |
0bf48289 | 508 | return ret; |
d5b31be6 TT |
509 | } |
510 | ||
511 | static int rdma_read_complete(struct svc_rqst *rqstp, | |
02e7452d | 512 | struct svc_rdma_op_ctxt *head) |
d5b31be6 | 513 | { |
d5b31be6 TT |
514 | int page_no; |
515 | int ret; | |
516 | ||
d5b31be6 TT |
517 | /* Copy RPC pages */ |
518 | for (page_no = 0; page_no < head->count; page_no++) { | |
519 | put_page(rqstp->rq_pages[page_no]); | |
520 | rqstp->rq_pages[page_no] = head->pages[page_no]; | |
521 | } | |
0b056c22 CL |
522 | |
523 | /* Adjustments made for RDMA_NOMSG type requests */ | |
524 | if (head->position == 0) { | |
525 | if (head->arg.len <= head->sge[0].length) { | |
526 | head->arg.head[0].iov_len = head->arg.len - | |
527 | head->byte_len; | |
528 | head->arg.page_len = 0; | |
529 | } else { | |
530 | head->arg.head[0].iov_len = head->sge[0].length - | |
531 | head->byte_len; | |
532 | head->arg.page_len = head->arg.len - | |
533 | head->sge[0].length; | |
534 | } | |
535 | } | |
536 | ||
d5b31be6 | 537 | /* Point rq_arg.pages past header */ |
f820c57e | 538 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; |
d5b31be6 TT |
539 | rqstp->rq_arg.page_len = head->arg.page_len; |
540 | rqstp->rq_arg.page_base = head->arg.page_base; | |
541 | ||
542 | /* rq_respages starts after the last arg page */ | |
3be7f328 | 543 | rqstp->rq_respages = &rqstp->rq_pages[page_no]; |
7e4359e2 | 544 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 TT |
545 | |
546 | /* Rebuild rq_arg head and tail. */ | |
547 | rqstp->rq_arg.head[0] = head->arg.head[0]; | |
548 | rqstp->rq_arg.tail[0] = head->arg.tail[0]; | |
549 | rqstp->rq_arg.len = head->arg.len; | |
550 | rqstp->rq_arg.buflen = head->arg.buflen; | |
551 | ||
02e7452d TT |
552 | /* Free the context */ |
553 | svc_rdma_put_context(head, 0); | |
554 | ||
d5b31be6 TT |
555 | /* XXX: What should this be? */ |
556 | rqstp->rq_prot = IPPROTO_MAX; | |
69500c43 | 557 | svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt); |
d5b31be6 | 558 | |
d5b31be6 TT |
559 | ret = rqstp->rq_arg.head[0].iov_len |
560 | + rqstp->rq_arg.page_len | |
561 | + rqstp->rq_arg.tail[0].iov_len; | |
597561bf CL |
562 | dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, " |
563 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n", | |
d5b31be6 TT |
564 | ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, |
565 | rqstp->rq_arg.head[0].iov_len); | |
566 | ||
d5b31be6 TT |
567 | return ret; |
568 | } | |
569 | ||
5d252f90 CL |
570 | /* By convention, backchannel calls arrive via rdma_msg type |
571 | * messages, and never populate the chunk lists. This makes | |
572 | * the RPC/RDMA header small and fixed in size, so it is | |
573 | * straightforward to check the RPC header's direction field. | |
574 | */ | |
575 | static bool | |
576 | svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp) | |
577 | { | |
578 | __be32 *p = (__be32 *)rmsgp; | |
579 | ||
580 | if (!xprt->xpt_bc_xprt) | |
581 | return false; | |
582 | ||
583 | if (rmsgp->rm_type != rdma_msg) | |
584 | return false; | |
585 | if (rmsgp->rm_body.rm_chunks[0] != xdr_zero) | |
586 | return false; | |
587 | if (rmsgp->rm_body.rm_chunks[1] != xdr_zero) | |
588 | return false; | |
589 | if (rmsgp->rm_body.rm_chunks[2] != xdr_zero) | |
590 | return false; | |
591 | ||
592 | /* sanity */ | |
593 | if (p[7] != rmsgp->rm_xid) | |
594 | return false; | |
595 | /* call direction */ | |
596 | if (p[8] == cpu_to_be32(RPC_CALL)) | |
597 | return false; | |
598 | ||
599 | return true; | |
600 | } | |
601 | ||
d5b31be6 TT |
602 | /* |
603 | * Set up the rqstp thread context to point to the RQ buffer. If | |
604 | * necessary, pull additional data from the client with an RDMA_READ | |
605 | * request. | |
606 | */ | |
607 | int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |
608 | { | |
609 | struct svc_xprt *xprt = rqstp->rq_xprt; | |
610 | struct svcxprt_rdma *rdma_xprt = | |
611 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | |
612 | struct svc_rdma_op_ctxt *ctxt = NULL; | |
613 | struct rpcrdma_msg *rmsgp; | |
614 | int ret = 0; | |
615 | int len; | |
616 | ||
617 | dprintk("svcrdma: rqstp=%p\n", rqstp); | |
618 | ||
24b8b447 | 619 | spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); |
d5b31be6 TT |
620 | if (!list_empty(&rdma_xprt->sc_read_complete_q)) { |
621 | ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, | |
622 | struct svc_rdma_op_ctxt, | |
623 | dto_q); | |
624 | list_del_init(&ctxt->dto_q); | |
24b8b447 | 625 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); |
d5b31be6 | 626 | return rdma_read_complete(rqstp, ctxt); |
0bf48289 | 627 | } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { |
d5b31be6 TT |
628 | ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, |
629 | struct svc_rdma_op_ctxt, | |
630 | dto_q); | |
631 | list_del_init(&ctxt->dto_q); | |
632 | } else { | |
633 | atomic_inc(&rdma_stat_rq_starve); | |
634 | clear_bit(XPT_DATA, &xprt->xpt_flags); | |
635 | ctxt = NULL; | |
636 | } | |
637 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); | |
638 | if (!ctxt) { | |
639 | /* This is the EAGAIN path. The svc_recv routine will | |
640 | * return -EAGAIN, the nfsd thread will go to call into | |
641 | * svc_recv again and we shouldn't be on the active | |
642 | * transport list | |
643 | */ | |
644 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | |
645 | goto close_out; | |
646 | ||
d5b31be6 TT |
647 | goto out; |
648 | } | |
649 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", | |
650 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); | |
d5b31be6 TT |
651 | atomic_inc(&rdma_stat_recv); |
652 | ||
653 | /* Build up the XDR from the receive buffers. */ | |
654 | rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); | |
655 | ||
656 | /* Decode the RDMA header. */ | |
657 | len = svc_rdma_xdr_decode_req(&rmsgp, rqstp); | |
658 | rqstp->rq_xprt_hlen = len; | |
659 | ||
660 | /* If the request is invalid, reply with an error */ | |
661 | if (len < 0) { | |
662 | if (len == -ENOSYS) | |
008fdbc5 | 663 | svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); |
d5b31be6 TT |
664 | goto close_out; |
665 | } | |
666 | ||
5d252f90 CL |
667 | if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) { |
668 | ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp, | |
669 | &rqstp->rq_arg); | |
670 | svc_rdma_put_context(ctxt, 0); | |
671 | if (ret) | |
672 | goto repost; | |
673 | return ret; | |
674 | } | |
675 | ||
d16d4009 | 676 | /* Read read-list data. */ |
0bf48289 | 677 | ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); |
d16d4009 TT |
678 | if (ret > 0) { |
679 | /* read-list posted, defer until data received from client. */ | |
b1721d2b | 680 | goto defer; |
0bf48289 | 681 | } else if (ret < 0) { |
d16d4009 TT |
682 | /* Post of read-list failed, free context. */ |
683 | svc_rdma_put_context(ctxt, 1); | |
684 | return 0; | |
685 | } | |
d5b31be6 | 686 | |
d5b31be6 TT |
687 | ret = rqstp->rq_arg.head[0].iov_len |
688 | + rqstp->rq_arg.page_len | |
689 | + rqstp->rq_arg.tail[0].iov_len; | |
690 | svc_rdma_put_context(ctxt, 0); | |
691 | out: | |
597561bf CL |
692 | dprintk("svcrdma: ret=%d, rq_arg.len=%u, " |
693 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n", | |
d5b31be6 TT |
694 | ret, rqstp->rq_arg.len, |
695 | rqstp->rq_arg.head[0].iov_base, | |
696 | rqstp->rq_arg.head[0].iov_len); | |
697 | rqstp->rq_prot = IPPROTO_MAX; | |
698 | svc_xprt_copy_addrs(rqstp, xprt); | |
d5b31be6 TT |
699 | return ret; |
700 | ||
701 | close_out: | |
0e7f011a | 702 | if (ctxt) |
d5b31be6 | 703 | svc_rdma_put_context(ctxt, 1); |
d5b31be6 TT |
704 | dprintk("svcrdma: transport %p is closing\n", xprt); |
705 | /* | |
706 | * Set the close bit and enqueue it. svc_recv will see the | |
707 | * close bit and call svc_xprt_delete | |
708 | */ | |
709 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | |
b1721d2b | 710 | defer: |
d5b31be6 | 711 | return 0; |
5d252f90 CL |
712 | |
713 | repost: | |
bf36387a | 714 | return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL); |
d5b31be6 | 715 | } |