Commit | Line | Data |
---|---|---|
599d49de DJ |
1 | /* |
2 | * Intel I/OAT DMA Linux driver | |
3 | * Copyright(c) 2004 - 2015 Intel Corporation. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms and conditions of the GNU General Public License, | |
7 | * version 2, as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | * The full GNU General Public License is included in this distribution in | |
15 | * the file called "COPYING". | |
16 | * | |
17 | */ | |
18 | #include <linux/module.h> | |
19 | #include <linux/pci.h> | |
20 | #include <linux/gfp.h> | |
21 | #include <linux/dmaengine.h> | |
22 | #include <linux/dma-mapping.h> | |
23 | #include <linux/prefetch.h> | |
24 | #include "../dmaengine.h" | |
25 | #include "registers.h" | |
26 | #include "hw.h" | |
27 | #include "dma.h" | |
28 | ||
29 | /* provide a lookup table for setting the source address in the base or | |
30 | * extended descriptor of an xor or pq descriptor | |
31 | */ | |
32 | static const u8 xor_idx_to_desc = 0xe0; | |
33 | static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; | |
34 | static const u8 pq_idx_to_desc = 0xf8; | |
35 | static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, | |
36 | 2, 2, 2, 2, 2, 2, 2 }; | |
37 | static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; | |
38 | static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, | |
39 | 0, 1, 2, 3, 4, 5, 6 }; | |
40 | ||
41 | static void xor_set_src(struct ioat_raw_descriptor *descs[2], | |
42 | dma_addr_t addr, u32 offset, int idx) | |
43 | { | |
44 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | |
45 | ||
46 | raw->field[xor_idx_to_field[idx]] = addr + offset; | |
47 | } | |
48 | ||
49 | static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) | |
50 | { | |
51 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | |
52 | ||
53 | return raw->field[pq_idx_to_field[idx]]; | |
54 | } | |
55 | ||
56 | static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) | |
57 | { | |
58 | struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; | |
59 | ||
60 | return raw->field[pq16_idx_to_field[idx]]; | |
61 | } | |
62 | ||
63 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], | |
64 | dma_addr_t addr, u32 offset, u8 coef, int idx) | |
65 | { | |
66 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; | |
67 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | |
68 | ||
69 | raw->field[pq_idx_to_field[idx]] = addr + offset; | |
70 | pq->coef[idx] = coef; | |
71 | } | |
72 | ||
73 | static void pq16_set_src(struct ioat_raw_descriptor *desc[3], | |
74 | dma_addr_t addr, u32 offset, u8 coef, unsigned idx) | |
75 | { | |
76 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; | |
77 | struct ioat_pq16a_descriptor *pq16 = | |
78 | (struct ioat_pq16a_descriptor *)desc[1]; | |
79 | struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; | |
80 | ||
81 | raw->field[pq16_idx_to_field[idx]] = addr + offset; | |
82 | ||
83 | if (idx < 8) | |
84 | pq->coef[idx] = coef; | |
85 | else | |
86 | pq16->coef[idx - 8] = coef; | |
87 | } | |
88 | ||
89 | static struct ioat_sed_ent * | |
90 | ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) | |
91 | { | |
92 | struct ioat_sed_ent *sed; | |
93 | gfp_t flags = __GFP_ZERO | GFP_ATOMIC; | |
94 | ||
95 | sed = kmem_cache_alloc(ioat_sed_cache, flags); | |
96 | if (!sed) | |
97 | return NULL; | |
98 | ||
99 | sed->hw_pool = hw_pool; | |
100 | sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], | |
101 | flags, &sed->dma); | |
102 | if (!sed->hw) { | |
103 | kmem_cache_free(ioat_sed_cache, sed); | |
104 | return NULL; | |
105 | } | |
106 | ||
107 | return sed; | |
108 | } | |
109 | ||
110 | struct dma_async_tx_descriptor * | |
111 | ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, | |
112 | dma_addr_t dma_src, size_t len, unsigned long flags) | |
113 | { | |
114 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
115 | struct ioat_dma_descriptor *hw; | |
116 | struct ioat_ring_ent *desc; | |
117 | dma_addr_t dst = dma_dest; | |
118 | dma_addr_t src = dma_src; | |
119 | size_t total_len = len; | |
120 | int num_descs, idx, i; | |
121 | ||
122 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
123 | if (likely(num_descs) && | |
124 | ioat_check_space_lock(ioat_chan, num_descs) == 0) | |
125 | idx = ioat_chan->head; | |
126 | else | |
127 | return NULL; | |
128 | i = 0; | |
129 | do { | |
130 | size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); | |
131 | ||
132 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
133 | hw = desc->hw; | |
134 | ||
135 | hw->size = copy; | |
136 | hw->ctl = 0; | |
137 | hw->src_addr = src; | |
138 | hw->dst_addr = dst; | |
139 | ||
140 | len -= copy; | |
141 | dst += copy; | |
142 | src += copy; | |
143 | dump_desc_dbg(ioat_chan, desc); | |
144 | } while (++i < num_descs); | |
145 | ||
146 | desc->txd.flags = flags; | |
147 | desc->len = total_len; | |
148 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
149 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
150 | hw->ctl_f.compl_write = 1; | |
151 | dump_desc_dbg(ioat_chan, desc); | |
152 | /* we leave the channel locked to ensure in order submission */ | |
153 | ||
154 | return &desc->txd; | |
155 | } | |
156 | ||
157 | ||
158 | static struct dma_async_tx_descriptor * | |
159 | __ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | |
160 | dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, | |
161 | size_t len, unsigned long flags) | |
162 | { | |
163 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
164 | struct ioat_ring_ent *compl_desc; | |
165 | struct ioat_ring_ent *desc; | |
166 | struct ioat_ring_ent *ext; | |
167 | size_t total_len = len; | |
168 | struct ioat_xor_descriptor *xor; | |
169 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | |
170 | struct ioat_dma_descriptor *hw; | |
171 | int num_descs, with_ext, idx, i; | |
172 | u32 offset = 0; | |
173 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; | |
174 | ||
175 | BUG_ON(src_cnt < 2); | |
176 | ||
177 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
178 | /* we need 2x the number of descriptors to cover greater than 5 | |
179 | * sources | |
180 | */ | |
181 | if (src_cnt > 5) { | |
182 | with_ext = 1; | |
183 | num_descs *= 2; | |
184 | } else | |
185 | with_ext = 0; | |
186 | ||
187 | /* completion writes from the raid engine may pass completion | |
188 | * writes from the legacy engine, so we need one extra null | |
189 | * (legacy) descriptor to ensure all completion writes arrive in | |
190 | * order. | |
191 | */ | |
192 | if (likely(num_descs) && | |
193 | ioat_check_space_lock(ioat_chan, num_descs+1) == 0) | |
194 | idx = ioat_chan->head; | |
195 | else | |
196 | return NULL; | |
197 | i = 0; | |
198 | do { | |
199 | struct ioat_raw_descriptor *descs[2]; | |
200 | size_t xfer_size = min_t(size_t, | |
201 | len, 1 << ioat_chan->xfercap_log); | |
202 | int s; | |
203 | ||
204 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
205 | xor = desc->xor; | |
206 | ||
207 | /* save a branch by unconditionally retrieving the | |
208 | * extended descriptor xor_set_src() knows to not write | |
209 | * to it in the single descriptor case | |
210 | */ | |
211 | ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); | |
212 | xor_ex = ext->xor_ex; | |
213 | ||
214 | descs[0] = (struct ioat_raw_descriptor *) xor; | |
215 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | |
216 | for (s = 0; s < src_cnt; s++) | |
217 | xor_set_src(descs, src[s], offset, s); | |
218 | xor->size = xfer_size; | |
219 | xor->dst_addr = dest + offset; | |
220 | xor->ctl = 0; | |
221 | xor->ctl_f.op = op; | |
222 | xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); | |
223 | ||
224 | len -= xfer_size; | |
225 | offset += xfer_size; | |
226 | dump_desc_dbg(ioat_chan, desc); | |
227 | } while ((i += 1 + with_ext) < num_descs); | |
228 | ||
229 | /* last xor descriptor carries the unmap parameters and fence bit */ | |
230 | desc->txd.flags = flags; | |
231 | desc->len = total_len; | |
232 | if (result) | |
233 | desc->result = result; | |
234 | xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
235 | ||
236 | /* completion descriptor carries interrupt bit */ | |
237 | compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
238 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | |
239 | hw = compl_desc->hw; | |
240 | hw->ctl = 0; | |
241 | hw->ctl_f.null = 1; | |
242 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
243 | hw->ctl_f.compl_write = 1; | |
244 | hw->size = NULL_DESC_BUFFER_SIZE; | |
245 | dump_desc_dbg(ioat_chan, compl_desc); | |
246 | ||
247 | /* we leave the channel locked to ensure in order submission */ | |
248 | return &compl_desc->txd; | |
249 | } | |
250 | ||
251 | struct dma_async_tx_descriptor * | |
252 | ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | |
253 | unsigned int src_cnt, size_t len, unsigned long flags) | |
254 | { | |
255 | return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); | |
256 | } | |
257 | ||
258 | struct dma_async_tx_descriptor * | |
259 | ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | |
260 | unsigned int src_cnt, size_t len, | |
261 | enum sum_check_flags *result, unsigned long flags) | |
262 | { | |
263 | /* the cleanup routine only sets bits on validate failure, it | |
264 | * does not clear bits on validate success... so clear it here | |
265 | */ | |
266 | *result = 0; | |
267 | ||
268 | return __ioat_prep_xor_lock(chan, result, src[0], &src[1], | |
269 | src_cnt - 1, len, flags); | |
270 | } | |
271 | ||
272 | static void | |
273 | dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, | |
274 | struct ioat_ring_ent *ext) | |
275 | { | |
276 | struct device *dev = to_dev(ioat_chan); | |
277 | struct ioat_pq_descriptor *pq = desc->pq; | |
278 | struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; | |
279 | struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; | |
280 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | |
281 | int i; | |
282 | ||
283 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | |
284 | " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" | |
285 | " src_cnt: %d)\n", | |
286 | desc_id(desc), (unsigned long long) desc->txd.phys, | |
287 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), | |
288 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, | |
289 | pq->ctl_f.int_en, pq->ctl_f.compl_write, | |
290 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | |
291 | pq->ctl_f.src_cnt); | |
292 | for (i = 0; i < src_cnt; i++) | |
293 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | |
294 | (unsigned long long) pq_get_src(descs, i), pq->coef[i]); | |
295 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | |
296 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | |
297 | dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); | |
298 | } | |
299 | ||
300 | static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, | |
301 | struct ioat_ring_ent *desc) | |
302 | { | |
303 | struct device *dev = to_dev(ioat_chan); | |
304 | struct ioat_pq_descriptor *pq = desc->pq; | |
305 | struct ioat_raw_descriptor *descs[] = { (void *)pq, | |
306 | (void *)pq, | |
307 | (void *)pq }; | |
308 | int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); | |
309 | int i; | |
310 | ||
311 | if (desc->sed) { | |
312 | descs[1] = (void *)desc->sed->hw; | |
313 | descs[2] = (void *)desc->sed->hw + 64; | |
314 | } | |
315 | ||
316 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | |
317 | " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" | |
318 | " src_cnt: %d)\n", | |
319 | desc_id(desc), (unsigned long long) desc->txd.phys, | |
320 | (unsigned long long) pq->next, | |
321 | desc->txd.flags, pq->size, pq->ctl, | |
322 | pq->ctl_f.op, pq->ctl_f.int_en, | |
323 | pq->ctl_f.compl_write, | |
324 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | |
325 | pq->ctl_f.src_cnt); | |
326 | for (i = 0; i < src_cnt; i++) { | |
327 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | |
328 | (unsigned long long) pq16_get_src(descs, i), | |
329 | pq->coef[i]); | |
330 | } | |
331 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | |
332 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | |
333 | } | |
334 | ||
335 | static struct dma_async_tx_descriptor * | |
336 | __ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | |
337 | const dma_addr_t *dst, const dma_addr_t *src, | |
338 | unsigned int src_cnt, const unsigned char *scf, | |
339 | size_t len, unsigned long flags) | |
340 | { | |
341 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
342 | struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; | |
343 | struct ioat_ring_ent *compl_desc; | |
344 | struct ioat_ring_ent *desc; | |
345 | struct ioat_ring_ent *ext; | |
346 | size_t total_len = len; | |
347 | struct ioat_pq_descriptor *pq; | |
348 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | |
349 | struct ioat_dma_descriptor *hw; | |
350 | u32 offset = 0; | |
351 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; | |
352 | int i, s, idx, with_ext, num_descs; | |
353 | int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; | |
354 | ||
355 | dev_dbg(to_dev(ioat_chan), "%s\n", __func__); | |
356 | /* the engine requires at least two sources (we provide | |
357 | * at least 1 implied source in the DMA_PREP_CONTINUE case) | |
358 | */ | |
359 | BUG_ON(src_cnt + dmaf_continue(flags) < 2); | |
360 | ||
361 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
362 | /* we need 2x the number of descriptors to cover greater than 3 | |
363 | * sources (we need 1 extra source in the q-only continuation | |
364 | * case and 3 extra sources in the p+q continuation case. | |
365 | */ | |
366 | if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || | |
367 | (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { | |
368 | with_ext = 1; | |
369 | num_descs *= 2; | |
370 | } else | |
371 | with_ext = 0; | |
372 | ||
373 | /* completion writes from the raid engine may pass completion | |
374 | * writes from the legacy engine, so we need one extra null | |
375 | * (legacy) descriptor to ensure all completion writes arrive in | |
376 | * order. | |
377 | */ | |
378 | if (likely(num_descs) && | |
379 | ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) | |
380 | idx = ioat_chan->head; | |
381 | else | |
382 | return NULL; | |
383 | i = 0; | |
384 | do { | |
385 | struct ioat_raw_descriptor *descs[2]; | |
386 | size_t xfer_size = min_t(size_t, len, | |
387 | 1 << ioat_chan->xfercap_log); | |
388 | ||
389 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
390 | pq = desc->pq; | |
391 | ||
392 | /* save a branch by unconditionally retrieving the | |
393 | * extended descriptor pq_set_src() knows to not write | |
394 | * to it in the single descriptor case | |
395 | */ | |
396 | ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); | |
397 | pq_ex = ext->pq_ex; | |
398 | ||
399 | descs[0] = (struct ioat_raw_descriptor *) pq; | |
400 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | |
401 | ||
402 | for (s = 0; s < src_cnt; s++) | |
403 | pq_set_src(descs, src[s], offset, scf[s], s); | |
404 | ||
405 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | |
406 | if (dmaf_p_disabled_continue(flags)) | |
407 | pq_set_src(descs, dst[1], offset, 1, s++); | |
408 | else if (dmaf_continue(flags)) { | |
409 | pq_set_src(descs, dst[0], offset, 0, s++); | |
410 | pq_set_src(descs, dst[1], offset, 1, s++); | |
411 | pq_set_src(descs, dst[1], offset, 0, s++); | |
412 | } | |
413 | pq->size = xfer_size; | |
414 | pq->p_addr = dst[0] + offset; | |
415 | pq->q_addr = dst[1] + offset; | |
416 | pq->ctl = 0; | |
417 | pq->ctl_f.op = op; | |
418 | /* we turn on descriptor write back error status */ | |
419 | if (ioat_dma->cap & IOAT_CAP_DWBES) | |
420 | pq->ctl_f.wb_en = result ? 1 : 0; | |
421 | pq->ctl_f.src_cnt = src_cnt_to_hw(s); | |
422 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | |
423 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | |
424 | ||
425 | len -= xfer_size; | |
426 | offset += xfer_size; | |
427 | } while ((i += 1 + with_ext) < num_descs); | |
428 | ||
429 | /* last pq descriptor carries the unmap parameters and fence bit */ | |
430 | desc->txd.flags = flags; | |
431 | desc->len = total_len; | |
432 | if (result) | |
433 | desc->result = result; | |
434 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
435 | dump_pq_desc_dbg(ioat_chan, desc, ext); | |
436 | ||
437 | if (!cb32) { | |
438 | pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
439 | pq->ctl_f.compl_write = 1; | |
440 | compl_desc = desc; | |
441 | } else { | |
442 | /* completion descriptor carries interrupt bit */ | |
443 | compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
444 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | |
445 | hw = compl_desc->hw; | |
446 | hw->ctl = 0; | |
447 | hw->ctl_f.null = 1; | |
448 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
449 | hw->ctl_f.compl_write = 1; | |
450 | hw->size = NULL_DESC_BUFFER_SIZE; | |
451 | dump_desc_dbg(ioat_chan, compl_desc); | |
452 | } | |
453 | ||
454 | ||
455 | /* we leave the channel locked to ensure in order submission */ | |
456 | return &compl_desc->txd; | |
457 | } | |
458 | ||
459 | static struct dma_async_tx_descriptor * | |
460 | __ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, | |
461 | const dma_addr_t *dst, const dma_addr_t *src, | |
462 | unsigned int src_cnt, const unsigned char *scf, | |
463 | size_t len, unsigned long flags) | |
464 | { | |
465 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
466 | struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; | |
467 | struct ioat_ring_ent *desc; | |
468 | size_t total_len = len; | |
469 | struct ioat_pq_descriptor *pq; | |
470 | u32 offset = 0; | |
471 | u8 op; | |
472 | int i, s, idx, num_descs; | |
473 | ||
474 | /* this function is only called with 9-16 sources */ | |
475 | op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; | |
476 | ||
477 | dev_dbg(to_dev(ioat_chan), "%s\n", __func__); | |
478 | ||
479 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
480 | ||
481 | /* | |
482 | * 16 source pq is only available on cb3.3 and has no completion | |
483 | * write hw bug. | |
484 | */ | |
485 | if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) | |
486 | idx = ioat_chan->head; | |
487 | else | |
488 | return NULL; | |
489 | ||
490 | i = 0; | |
491 | ||
492 | do { | |
493 | struct ioat_raw_descriptor *descs[4]; | |
494 | size_t xfer_size = min_t(size_t, len, | |
495 | 1 << ioat_chan->xfercap_log); | |
496 | ||
497 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
498 | pq = desc->pq; | |
499 | ||
500 | descs[0] = (struct ioat_raw_descriptor *) pq; | |
501 | ||
502 | desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); | |
503 | if (!desc->sed) { | |
504 | dev_err(to_dev(ioat_chan), | |
505 | "%s: no free sed entries\n", __func__); | |
506 | return NULL; | |
507 | } | |
508 | ||
509 | pq->sed_addr = desc->sed->dma; | |
510 | desc->sed->parent = desc; | |
511 | ||
512 | descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; | |
513 | descs[2] = (void *)descs[1] + 64; | |
514 | ||
515 | for (s = 0; s < src_cnt; s++) | |
516 | pq16_set_src(descs, src[s], offset, scf[s], s); | |
517 | ||
518 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | |
519 | if (dmaf_p_disabled_continue(flags)) | |
520 | pq16_set_src(descs, dst[1], offset, 1, s++); | |
521 | else if (dmaf_continue(flags)) { | |
522 | pq16_set_src(descs, dst[0], offset, 0, s++); | |
523 | pq16_set_src(descs, dst[1], offset, 1, s++); | |
524 | pq16_set_src(descs, dst[1], offset, 0, s++); | |
525 | } | |
526 | ||
527 | pq->size = xfer_size; | |
528 | pq->p_addr = dst[0] + offset; | |
529 | pq->q_addr = dst[1] + offset; | |
530 | pq->ctl = 0; | |
531 | pq->ctl_f.op = op; | |
532 | pq->ctl_f.src_cnt = src16_cnt_to_hw(s); | |
533 | /* we turn on descriptor write back error status */ | |
534 | if (ioat_dma->cap & IOAT_CAP_DWBES) | |
535 | pq->ctl_f.wb_en = result ? 1 : 0; | |
536 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | |
537 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | |
538 | ||
539 | len -= xfer_size; | |
540 | offset += xfer_size; | |
541 | } while (++i < num_descs); | |
542 | ||
543 | /* last pq descriptor carries the unmap parameters and fence bit */ | |
544 | desc->txd.flags = flags; | |
545 | desc->len = total_len; | |
546 | if (result) | |
547 | desc->result = result; | |
548 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
549 | ||
550 | /* with cb3.3 we should be able to do completion w/o a null desc */ | |
551 | pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
552 | pq->ctl_f.compl_write = 1; | |
553 | ||
554 | dump_pq16_desc_dbg(ioat_chan, desc); | |
555 | ||
556 | /* we leave the channel locked to ensure in order submission */ | |
557 | return &desc->txd; | |
558 | } | |
559 | ||
560 | static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) | |
561 | { | |
562 | if (dmaf_p_disabled_continue(flags)) | |
563 | return src_cnt + 1; | |
564 | else if (dmaf_continue(flags)) | |
565 | return src_cnt + 3; | |
566 | else | |
567 | return src_cnt; | |
568 | } | |
569 | ||
570 | struct dma_async_tx_descriptor * | |
571 | ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | |
572 | unsigned int src_cnt, const unsigned char *scf, size_t len, | |
573 | unsigned long flags) | |
574 | { | |
575 | /* specify valid address for disabled result */ | |
576 | if (flags & DMA_PREP_PQ_DISABLE_P) | |
577 | dst[0] = dst[1]; | |
578 | if (flags & DMA_PREP_PQ_DISABLE_Q) | |
579 | dst[1] = dst[0]; | |
580 | ||
581 | /* handle the single source multiply case from the raid6 | |
582 | * recovery path | |
583 | */ | |
584 | if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { | |
585 | dma_addr_t single_source[2]; | |
586 | unsigned char single_source_coef[2]; | |
587 | ||
588 | BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); | |
589 | single_source[0] = src[0]; | |
590 | single_source[1] = src[0]; | |
591 | single_source_coef[0] = scf[0]; | |
592 | single_source_coef[1] = 0; | |
593 | ||
594 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
595 | __ioat_prep_pq16_lock(chan, NULL, dst, single_source, | |
596 | 2, single_source_coef, len, | |
597 | flags) : | |
598 | __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, | |
599 | single_source_coef, len, flags); | |
600 | ||
601 | } else { | |
602 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
603 | __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, | |
604 | scf, len, flags) : | |
605 | __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, | |
606 | scf, len, flags); | |
607 | } | |
608 | } | |
609 | ||
610 | struct dma_async_tx_descriptor * | |
611 | ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | |
612 | unsigned int src_cnt, const unsigned char *scf, size_t len, | |
613 | enum sum_check_flags *pqres, unsigned long flags) | |
614 | { | |
615 | /* specify valid address for disabled result */ | |
616 | if (flags & DMA_PREP_PQ_DISABLE_P) | |
617 | pq[0] = pq[1]; | |
618 | if (flags & DMA_PREP_PQ_DISABLE_Q) | |
619 | pq[1] = pq[0]; | |
620 | ||
621 | /* the cleanup routine only sets bits on validate failure, it | |
622 | * does not clear bits on validate success... so clear it here | |
623 | */ | |
624 | *pqres = 0; | |
625 | ||
626 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
627 | __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, | |
628 | flags) : | |
629 | __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | |
630 | flags); | |
631 | } | |
632 | ||
633 | struct dma_async_tx_descriptor * | |
634 | ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, | |
635 | unsigned int src_cnt, size_t len, unsigned long flags) | |
636 | { | |
637 | unsigned char scf[src_cnt]; | |
638 | dma_addr_t pq[2]; | |
639 | ||
640 | memset(scf, 0, src_cnt); | |
641 | pq[0] = dst; | |
642 | flags |= DMA_PREP_PQ_DISABLE_Q; | |
643 | pq[1] = dst; /* specify valid address for disabled result */ | |
644 | ||
645 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
646 | __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, | |
647 | flags) : | |
648 | __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, | |
649 | flags); | |
650 | } | |
651 | ||
652 | struct dma_async_tx_descriptor * | |
653 | ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, | |
654 | unsigned int src_cnt, size_t len, | |
655 | enum sum_check_flags *result, unsigned long flags) | |
656 | { | |
657 | unsigned char scf[src_cnt]; | |
658 | dma_addr_t pq[2]; | |
659 | ||
660 | /* the cleanup routine only sets bits on validate failure, it | |
661 | * does not clear bits on validate success... so clear it here | |
662 | */ | |
663 | *result = 0; | |
664 | ||
665 | memset(scf, 0, src_cnt); | |
666 | pq[0] = src[0]; | |
667 | flags |= DMA_PREP_PQ_DISABLE_Q; | |
668 | pq[1] = pq[0]; /* specify valid address for disabled result */ | |
669 | ||
670 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
671 | __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, | |
672 | scf, len, flags) : | |
673 | __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, | |
674 | scf, len, flags); | |
675 | } | |
676 | ||
677 | struct dma_async_tx_descriptor * | |
678 | ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) | |
679 | { | |
680 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
681 | struct ioat_ring_ent *desc; | |
682 | struct ioat_dma_descriptor *hw; | |
683 | ||
684 | if (ioat_check_space_lock(ioat_chan, 1) == 0) | |
685 | desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); | |
686 | else | |
687 | return NULL; | |
688 | ||
689 | hw = desc->hw; | |
690 | hw->ctl = 0; | |
691 | hw->ctl_f.null = 1; | |
692 | hw->ctl_f.int_en = 1; | |
693 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
694 | hw->ctl_f.compl_write = 1; | |
695 | hw->size = NULL_DESC_BUFFER_SIZE; | |
696 | hw->src_addr = 0; | |
697 | hw->dst_addr = 0; | |
698 | ||
699 | desc->txd.flags = flags; | |
700 | desc->len = 1; | |
701 | ||
702 | dump_desc_dbg(ioat_chan, desc); | |
703 | ||
704 | /* we leave the channel locked to ensure in order submission */ | |
705 | return &desc->txd; | |
706 | } | |
707 |