Commit | Line | Data |
---|---|---|
599d49de DJ |
1 | /* |
2 | * Intel I/OAT DMA Linux driver | |
3 | * Copyright(c) 2004 - 2015 Intel Corporation. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms and conditions of the GNU General Public License, | |
7 | * version 2, as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | * The full GNU General Public License is included in this distribution in | |
15 | * the file called "COPYING". | |
16 | * | |
17 | */ | |
18 | #include <linux/module.h> | |
19 | #include <linux/pci.h> | |
20 | #include <linux/gfp.h> | |
21 | #include <linux/dmaengine.h> | |
22 | #include <linux/dma-mapping.h> | |
23 | #include <linux/prefetch.h> | |
24 | #include "../dmaengine.h" | |
25 | #include "registers.h" | |
26 | #include "hw.h" | |
27 | #include "dma.h" | |
28 | ||
7b7d0ca7 DJ |
29 | #define MAX_SCF 1024 |
30 | ||
599d49de DJ |
31 | /* provide a lookup table for setting the source address in the base or |
32 | * extended descriptor of an xor or pq descriptor | |
33 | */ | |
34 | static const u8 xor_idx_to_desc = 0xe0; | |
35 | static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; | |
36 | static const u8 pq_idx_to_desc = 0xf8; | |
37 | static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, | |
38 | 2, 2, 2, 2, 2, 2, 2 }; | |
39 | static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; | |
40 | static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, | |
41 | 0, 1, 2, 3, 4, 5, 6 }; | |
42 | ||
43 | static void xor_set_src(struct ioat_raw_descriptor *descs[2], | |
44 | dma_addr_t addr, u32 offset, int idx) | |
45 | { | |
46 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | |
47 | ||
48 | raw->field[xor_idx_to_field[idx]] = addr + offset; | |
49 | } | |
50 | ||
51 | static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) | |
52 | { | |
53 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | |
54 | ||
55 | return raw->field[pq_idx_to_field[idx]]; | |
56 | } | |
57 | ||
58 | static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) | |
59 | { | |
60 | struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; | |
61 | ||
62 | return raw->field[pq16_idx_to_field[idx]]; | |
63 | } | |
64 | ||
65 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], | |
66 | dma_addr_t addr, u32 offset, u8 coef, int idx) | |
67 | { | |
68 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; | |
69 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | |
70 | ||
71 | raw->field[pq_idx_to_field[idx]] = addr + offset; | |
72 | pq->coef[idx] = coef; | |
73 | } | |
74 | ||
75 | static void pq16_set_src(struct ioat_raw_descriptor *desc[3], | |
76 | dma_addr_t addr, u32 offset, u8 coef, unsigned idx) | |
77 | { | |
78 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; | |
79 | struct ioat_pq16a_descriptor *pq16 = | |
80 | (struct ioat_pq16a_descriptor *)desc[1]; | |
81 | struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; | |
82 | ||
83 | raw->field[pq16_idx_to_field[idx]] = addr + offset; | |
84 | ||
85 | if (idx < 8) | |
86 | pq->coef[idx] = coef; | |
87 | else | |
88 | pq16->coef[idx - 8] = coef; | |
89 | } | |
90 | ||
91 | static struct ioat_sed_ent * | |
92 | ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) | |
93 | { | |
94 | struct ioat_sed_ent *sed; | |
95 | gfp_t flags = __GFP_ZERO | GFP_ATOMIC; | |
96 | ||
97 | sed = kmem_cache_alloc(ioat_sed_cache, flags); | |
98 | if (!sed) | |
99 | return NULL; | |
100 | ||
101 | sed->hw_pool = hw_pool; | |
102 | sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], | |
103 | flags, &sed->dma); | |
104 | if (!sed->hw) { | |
105 | kmem_cache_free(ioat_sed_cache, sed); | |
106 | return NULL; | |
107 | } | |
108 | ||
109 | return sed; | |
110 | } | |
111 | ||
112 | struct dma_async_tx_descriptor * | |
113 | ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, | |
114 | dma_addr_t dma_src, size_t len, unsigned long flags) | |
115 | { | |
116 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
117 | struct ioat_dma_descriptor *hw; | |
118 | struct ioat_ring_ent *desc; | |
119 | dma_addr_t dst = dma_dest; | |
120 | dma_addr_t src = dma_src; | |
121 | size_t total_len = len; | |
122 | int num_descs, idx, i; | |
123 | ||
ad4a7b50 DJ |
124 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) |
125 | return NULL; | |
126 | ||
599d49de DJ |
127 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); |
128 | if (likely(num_descs) && | |
129 | ioat_check_space_lock(ioat_chan, num_descs) == 0) | |
130 | idx = ioat_chan->head; | |
131 | else | |
132 | return NULL; | |
133 | i = 0; | |
134 | do { | |
135 | size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); | |
136 | ||
137 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
138 | hw = desc->hw; | |
139 | ||
140 | hw->size = copy; | |
141 | hw->ctl = 0; | |
142 | hw->src_addr = src; | |
143 | hw->dst_addr = dst; | |
144 | ||
145 | len -= copy; | |
146 | dst += copy; | |
147 | src += copy; | |
148 | dump_desc_dbg(ioat_chan, desc); | |
149 | } while (++i < num_descs); | |
150 | ||
151 | desc->txd.flags = flags; | |
152 | desc->len = total_len; | |
153 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
154 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
155 | hw->ctl_f.compl_write = 1; | |
156 | dump_desc_dbg(ioat_chan, desc); | |
157 | /* we leave the channel locked to ensure in order submission */ | |
158 | ||
159 | return &desc->txd; | |
160 | } | |
161 | ||
162 | ||
163 | static struct dma_async_tx_descriptor * | |
164 | __ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | |
165 | dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, | |
166 | size_t len, unsigned long flags) | |
167 | { | |
168 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
169 | struct ioat_ring_ent *compl_desc; | |
170 | struct ioat_ring_ent *desc; | |
171 | struct ioat_ring_ent *ext; | |
172 | size_t total_len = len; | |
173 | struct ioat_xor_descriptor *xor; | |
174 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | |
175 | struct ioat_dma_descriptor *hw; | |
176 | int num_descs, with_ext, idx, i; | |
177 | u32 offset = 0; | |
178 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; | |
179 | ||
180 | BUG_ON(src_cnt < 2); | |
181 | ||
182 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
183 | /* we need 2x the number of descriptors to cover greater than 5 | |
184 | * sources | |
185 | */ | |
186 | if (src_cnt > 5) { | |
187 | with_ext = 1; | |
188 | num_descs *= 2; | |
189 | } else | |
190 | with_ext = 0; | |
191 | ||
192 | /* completion writes from the raid engine may pass completion | |
193 | * writes from the legacy engine, so we need one extra null | |
194 | * (legacy) descriptor to ensure all completion writes arrive in | |
195 | * order. | |
196 | */ | |
197 | if (likely(num_descs) && | |
198 | ioat_check_space_lock(ioat_chan, num_descs+1) == 0) | |
199 | idx = ioat_chan->head; | |
200 | else | |
201 | return NULL; | |
202 | i = 0; | |
203 | do { | |
204 | struct ioat_raw_descriptor *descs[2]; | |
205 | size_t xfer_size = min_t(size_t, | |
206 | len, 1 << ioat_chan->xfercap_log); | |
207 | int s; | |
208 | ||
209 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
210 | xor = desc->xor; | |
211 | ||
212 | /* save a branch by unconditionally retrieving the | |
213 | * extended descriptor xor_set_src() knows to not write | |
214 | * to it in the single descriptor case | |
215 | */ | |
216 | ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); | |
217 | xor_ex = ext->xor_ex; | |
218 | ||
219 | descs[0] = (struct ioat_raw_descriptor *) xor; | |
220 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | |
221 | for (s = 0; s < src_cnt; s++) | |
222 | xor_set_src(descs, src[s], offset, s); | |
223 | xor->size = xfer_size; | |
224 | xor->dst_addr = dest + offset; | |
225 | xor->ctl = 0; | |
226 | xor->ctl_f.op = op; | |
227 | xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); | |
228 | ||
229 | len -= xfer_size; | |
230 | offset += xfer_size; | |
231 | dump_desc_dbg(ioat_chan, desc); | |
232 | } while ((i += 1 + with_ext) < num_descs); | |
233 | ||
234 | /* last xor descriptor carries the unmap parameters and fence bit */ | |
235 | desc->txd.flags = flags; | |
236 | desc->len = total_len; | |
237 | if (result) | |
238 | desc->result = result; | |
239 | xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
240 | ||
241 | /* completion descriptor carries interrupt bit */ | |
242 | compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
243 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | |
244 | hw = compl_desc->hw; | |
245 | hw->ctl = 0; | |
246 | hw->ctl_f.null = 1; | |
247 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
248 | hw->ctl_f.compl_write = 1; | |
249 | hw->size = NULL_DESC_BUFFER_SIZE; | |
250 | dump_desc_dbg(ioat_chan, compl_desc); | |
251 | ||
252 | /* we leave the channel locked to ensure in order submission */ | |
253 | return &compl_desc->txd; | |
254 | } | |
255 | ||
256 | struct dma_async_tx_descriptor * | |
257 | ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | |
258 | unsigned int src_cnt, size_t len, unsigned long flags) | |
259 | { | |
ad4a7b50 DJ |
260 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
261 | ||
262 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
263 | return NULL; | |
264 | ||
599d49de DJ |
265 | return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); |
266 | } | |
267 | ||
268 | struct dma_async_tx_descriptor * | |
269 | ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | |
270 | unsigned int src_cnt, size_t len, | |
271 | enum sum_check_flags *result, unsigned long flags) | |
272 | { | |
ad4a7b50 DJ |
273 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
274 | ||
275 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
276 | return NULL; | |
277 | ||
599d49de DJ |
278 | /* the cleanup routine only sets bits on validate failure, it |
279 | * does not clear bits on validate success... so clear it here | |
280 | */ | |
281 | *result = 0; | |
282 | ||
283 | return __ioat_prep_xor_lock(chan, result, src[0], &src[1], | |
284 | src_cnt - 1, len, flags); | |
285 | } | |
286 | ||
287 | static void | |
288 | dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, | |
289 | struct ioat_ring_ent *ext) | |
290 | { | |
291 | struct device *dev = to_dev(ioat_chan); | |
292 | struct ioat_pq_descriptor *pq = desc->pq; | |
293 | struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; | |
294 | struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; | |
295 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | |
296 | int i; | |
297 | ||
298 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | |
299 | " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" | |
300 | " src_cnt: %d)\n", | |
301 | desc_id(desc), (unsigned long long) desc->txd.phys, | |
302 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), | |
303 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, | |
304 | pq->ctl_f.int_en, pq->ctl_f.compl_write, | |
305 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | |
306 | pq->ctl_f.src_cnt); | |
307 | for (i = 0; i < src_cnt; i++) | |
308 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | |
309 | (unsigned long long) pq_get_src(descs, i), pq->coef[i]); | |
310 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | |
311 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | |
312 | dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); | |
313 | } | |
314 | ||
315 | static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, | |
316 | struct ioat_ring_ent *desc) | |
317 | { | |
318 | struct device *dev = to_dev(ioat_chan); | |
319 | struct ioat_pq_descriptor *pq = desc->pq; | |
320 | struct ioat_raw_descriptor *descs[] = { (void *)pq, | |
321 | (void *)pq, | |
322 | (void *)pq }; | |
323 | int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); | |
324 | int i; | |
325 | ||
326 | if (desc->sed) { | |
327 | descs[1] = (void *)desc->sed->hw; | |
328 | descs[2] = (void *)desc->sed->hw + 64; | |
329 | } | |
330 | ||
331 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | |
332 | " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" | |
333 | " src_cnt: %d)\n", | |
334 | desc_id(desc), (unsigned long long) desc->txd.phys, | |
335 | (unsigned long long) pq->next, | |
336 | desc->txd.flags, pq->size, pq->ctl, | |
337 | pq->ctl_f.op, pq->ctl_f.int_en, | |
338 | pq->ctl_f.compl_write, | |
339 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | |
340 | pq->ctl_f.src_cnt); | |
341 | for (i = 0; i < src_cnt; i++) { | |
342 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | |
343 | (unsigned long long) pq16_get_src(descs, i), | |
344 | pq->coef[i]); | |
345 | } | |
346 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | |
347 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | |
348 | } | |
349 | ||
350 | static struct dma_async_tx_descriptor * | |
351 | __ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | |
352 | const dma_addr_t *dst, const dma_addr_t *src, | |
353 | unsigned int src_cnt, const unsigned char *scf, | |
354 | size_t len, unsigned long flags) | |
355 | { | |
356 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
357 | struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; | |
358 | struct ioat_ring_ent *compl_desc; | |
359 | struct ioat_ring_ent *desc; | |
360 | struct ioat_ring_ent *ext; | |
361 | size_t total_len = len; | |
362 | struct ioat_pq_descriptor *pq; | |
363 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | |
364 | struct ioat_dma_descriptor *hw; | |
365 | u32 offset = 0; | |
366 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; | |
367 | int i, s, idx, with_ext, num_descs; | |
368 | int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; | |
369 | ||
370 | dev_dbg(to_dev(ioat_chan), "%s\n", __func__); | |
371 | /* the engine requires at least two sources (we provide | |
372 | * at least 1 implied source in the DMA_PREP_CONTINUE case) | |
373 | */ | |
374 | BUG_ON(src_cnt + dmaf_continue(flags) < 2); | |
375 | ||
376 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
377 | /* we need 2x the number of descriptors to cover greater than 3 | |
378 | * sources (we need 1 extra source in the q-only continuation | |
379 | * case and 3 extra sources in the p+q continuation case. | |
380 | */ | |
381 | if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || | |
382 | (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { | |
383 | with_ext = 1; | |
384 | num_descs *= 2; | |
385 | } else | |
386 | with_ext = 0; | |
387 | ||
388 | /* completion writes from the raid engine may pass completion | |
389 | * writes from the legacy engine, so we need one extra null | |
390 | * (legacy) descriptor to ensure all completion writes arrive in | |
391 | * order. | |
392 | */ | |
393 | if (likely(num_descs) && | |
394 | ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) | |
395 | idx = ioat_chan->head; | |
396 | else | |
397 | return NULL; | |
398 | i = 0; | |
399 | do { | |
400 | struct ioat_raw_descriptor *descs[2]; | |
401 | size_t xfer_size = min_t(size_t, len, | |
402 | 1 << ioat_chan->xfercap_log); | |
403 | ||
404 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
405 | pq = desc->pq; | |
406 | ||
407 | /* save a branch by unconditionally retrieving the | |
408 | * extended descriptor pq_set_src() knows to not write | |
409 | * to it in the single descriptor case | |
410 | */ | |
411 | ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); | |
412 | pq_ex = ext->pq_ex; | |
413 | ||
414 | descs[0] = (struct ioat_raw_descriptor *) pq; | |
415 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | |
416 | ||
417 | for (s = 0; s < src_cnt; s++) | |
418 | pq_set_src(descs, src[s], offset, scf[s], s); | |
419 | ||
420 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | |
421 | if (dmaf_p_disabled_continue(flags)) | |
422 | pq_set_src(descs, dst[1], offset, 1, s++); | |
423 | else if (dmaf_continue(flags)) { | |
424 | pq_set_src(descs, dst[0], offset, 0, s++); | |
425 | pq_set_src(descs, dst[1], offset, 1, s++); | |
426 | pq_set_src(descs, dst[1], offset, 0, s++); | |
427 | } | |
428 | pq->size = xfer_size; | |
429 | pq->p_addr = dst[0] + offset; | |
430 | pq->q_addr = dst[1] + offset; | |
431 | pq->ctl = 0; | |
432 | pq->ctl_f.op = op; | |
433 | /* we turn on descriptor write back error status */ | |
434 | if (ioat_dma->cap & IOAT_CAP_DWBES) | |
435 | pq->ctl_f.wb_en = result ? 1 : 0; | |
436 | pq->ctl_f.src_cnt = src_cnt_to_hw(s); | |
437 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | |
438 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | |
439 | ||
440 | len -= xfer_size; | |
441 | offset += xfer_size; | |
442 | } while ((i += 1 + with_ext) < num_descs); | |
443 | ||
444 | /* last pq descriptor carries the unmap parameters and fence bit */ | |
445 | desc->txd.flags = flags; | |
446 | desc->len = total_len; | |
447 | if (result) | |
448 | desc->result = result; | |
449 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
450 | dump_pq_desc_dbg(ioat_chan, desc, ext); | |
451 | ||
452 | if (!cb32) { | |
453 | pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
454 | pq->ctl_f.compl_write = 1; | |
455 | compl_desc = desc; | |
456 | } else { | |
457 | /* completion descriptor carries interrupt bit */ | |
458 | compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
459 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | |
460 | hw = compl_desc->hw; | |
461 | hw->ctl = 0; | |
462 | hw->ctl_f.null = 1; | |
463 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
464 | hw->ctl_f.compl_write = 1; | |
465 | hw->size = NULL_DESC_BUFFER_SIZE; | |
466 | dump_desc_dbg(ioat_chan, compl_desc); | |
467 | } | |
468 | ||
469 | ||
470 | /* we leave the channel locked to ensure in order submission */ | |
471 | return &compl_desc->txd; | |
472 | } | |
473 | ||
474 | static struct dma_async_tx_descriptor * | |
475 | __ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, | |
476 | const dma_addr_t *dst, const dma_addr_t *src, | |
477 | unsigned int src_cnt, const unsigned char *scf, | |
478 | size_t len, unsigned long flags) | |
479 | { | |
480 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
481 | struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; | |
482 | struct ioat_ring_ent *desc; | |
483 | size_t total_len = len; | |
484 | struct ioat_pq_descriptor *pq; | |
485 | u32 offset = 0; | |
486 | u8 op; | |
487 | int i, s, idx, num_descs; | |
488 | ||
489 | /* this function is only called with 9-16 sources */ | |
490 | op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; | |
491 | ||
492 | dev_dbg(to_dev(ioat_chan), "%s\n", __func__); | |
493 | ||
494 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
495 | ||
496 | /* | |
497 | * 16 source pq is only available on cb3.3 and has no completion | |
498 | * write hw bug. | |
499 | */ | |
500 | if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) | |
501 | idx = ioat_chan->head; | |
502 | else | |
503 | return NULL; | |
504 | ||
505 | i = 0; | |
506 | ||
507 | do { | |
508 | struct ioat_raw_descriptor *descs[4]; | |
509 | size_t xfer_size = min_t(size_t, len, | |
510 | 1 << ioat_chan->xfercap_log); | |
511 | ||
512 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
513 | pq = desc->pq; | |
514 | ||
515 | descs[0] = (struct ioat_raw_descriptor *) pq; | |
516 | ||
517 | desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); | |
518 | if (!desc->sed) { | |
519 | dev_err(to_dev(ioat_chan), | |
520 | "%s: no free sed entries\n", __func__); | |
521 | return NULL; | |
522 | } | |
523 | ||
524 | pq->sed_addr = desc->sed->dma; | |
525 | desc->sed->parent = desc; | |
526 | ||
527 | descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; | |
528 | descs[2] = (void *)descs[1] + 64; | |
529 | ||
530 | for (s = 0; s < src_cnt; s++) | |
531 | pq16_set_src(descs, src[s], offset, scf[s], s); | |
532 | ||
533 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | |
534 | if (dmaf_p_disabled_continue(flags)) | |
535 | pq16_set_src(descs, dst[1], offset, 1, s++); | |
536 | else if (dmaf_continue(flags)) { | |
537 | pq16_set_src(descs, dst[0], offset, 0, s++); | |
538 | pq16_set_src(descs, dst[1], offset, 1, s++); | |
539 | pq16_set_src(descs, dst[1], offset, 0, s++); | |
540 | } | |
541 | ||
542 | pq->size = xfer_size; | |
543 | pq->p_addr = dst[0] + offset; | |
544 | pq->q_addr = dst[1] + offset; | |
545 | pq->ctl = 0; | |
546 | pq->ctl_f.op = op; | |
547 | pq->ctl_f.src_cnt = src16_cnt_to_hw(s); | |
548 | /* we turn on descriptor write back error status */ | |
549 | if (ioat_dma->cap & IOAT_CAP_DWBES) | |
550 | pq->ctl_f.wb_en = result ? 1 : 0; | |
551 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | |
552 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | |
553 | ||
554 | len -= xfer_size; | |
555 | offset += xfer_size; | |
556 | } while (++i < num_descs); | |
557 | ||
558 | /* last pq descriptor carries the unmap parameters and fence bit */ | |
559 | desc->txd.flags = flags; | |
560 | desc->len = total_len; | |
561 | if (result) | |
562 | desc->result = result; | |
563 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
564 | ||
565 | /* with cb3.3 we should be able to do completion w/o a null desc */ | |
566 | pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
567 | pq->ctl_f.compl_write = 1; | |
568 | ||
569 | dump_pq16_desc_dbg(ioat_chan, desc); | |
570 | ||
571 | /* we leave the channel locked to ensure in order submission */ | |
572 | return &desc->txd; | |
573 | } | |
574 | ||
575 | static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) | |
576 | { | |
577 | if (dmaf_p_disabled_continue(flags)) | |
578 | return src_cnt + 1; | |
579 | else if (dmaf_continue(flags)) | |
580 | return src_cnt + 3; | |
581 | else | |
582 | return src_cnt; | |
583 | } | |
584 | ||
585 | struct dma_async_tx_descriptor * | |
586 | ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | |
587 | unsigned int src_cnt, const unsigned char *scf, size_t len, | |
588 | unsigned long flags) | |
589 | { | |
ad4a7b50 DJ |
590 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
591 | ||
592 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
593 | return NULL; | |
594 | ||
599d49de DJ |
595 | /* specify valid address for disabled result */ |
596 | if (flags & DMA_PREP_PQ_DISABLE_P) | |
597 | dst[0] = dst[1]; | |
598 | if (flags & DMA_PREP_PQ_DISABLE_Q) | |
599 | dst[1] = dst[0]; | |
600 | ||
601 | /* handle the single source multiply case from the raid6 | |
602 | * recovery path | |
603 | */ | |
604 | if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { | |
605 | dma_addr_t single_source[2]; | |
606 | unsigned char single_source_coef[2]; | |
607 | ||
608 | BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); | |
609 | single_source[0] = src[0]; | |
610 | single_source[1] = src[0]; | |
611 | single_source_coef[0] = scf[0]; | |
612 | single_source_coef[1] = 0; | |
613 | ||
614 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
615 | __ioat_prep_pq16_lock(chan, NULL, dst, single_source, | |
616 | 2, single_source_coef, len, | |
617 | flags) : | |
618 | __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, | |
619 | single_source_coef, len, flags); | |
620 | ||
621 | } else { | |
622 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
623 | __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, | |
624 | scf, len, flags) : | |
625 | __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, | |
626 | scf, len, flags); | |
627 | } | |
628 | } | |
629 | ||
630 | struct dma_async_tx_descriptor * | |
631 | ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | |
632 | unsigned int src_cnt, const unsigned char *scf, size_t len, | |
633 | enum sum_check_flags *pqres, unsigned long flags) | |
634 | { | |
ad4a7b50 DJ |
635 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
636 | ||
637 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
638 | return NULL; | |
639 | ||
599d49de DJ |
640 | /* specify valid address for disabled result */ |
641 | if (flags & DMA_PREP_PQ_DISABLE_P) | |
642 | pq[0] = pq[1]; | |
643 | if (flags & DMA_PREP_PQ_DISABLE_Q) | |
644 | pq[1] = pq[0]; | |
645 | ||
646 | /* the cleanup routine only sets bits on validate failure, it | |
647 | * does not clear bits on validate success... so clear it here | |
648 | */ | |
649 | *pqres = 0; | |
650 | ||
651 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
652 | __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, | |
653 | flags) : | |
654 | __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | |
655 | flags); | |
656 | } | |
657 | ||
658 | struct dma_async_tx_descriptor * | |
659 | ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, | |
660 | unsigned int src_cnt, size_t len, unsigned long flags) | |
661 | { | |
7b7d0ca7 | 662 | unsigned char scf[MAX_SCF]; |
599d49de | 663 | dma_addr_t pq[2]; |
ad4a7b50 DJ |
664 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
665 | ||
666 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
667 | return NULL; | |
599d49de | 668 | |
7b7d0ca7 DJ |
669 | if (src_cnt > MAX_SCF) |
670 | return NULL; | |
671 | ||
599d49de DJ |
672 | memset(scf, 0, src_cnt); |
673 | pq[0] = dst; | |
674 | flags |= DMA_PREP_PQ_DISABLE_Q; | |
675 | pq[1] = dst; /* specify valid address for disabled result */ | |
676 | ||
677 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
678 | __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, | |
679 | flags) : | |
680 | __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, | |
681 | flags); | |
682 | } | |
683 | ||
684 | struct dma_async_tx_descriptor * | |
685 | ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, | |
686 | unsigned int src_cnt, size_t len, | |
687 | enum sum_check_flags *result, unsigned long flags) | |
688 | { | |
7b7d0ca7 | 689 | unsigned char scf[MAX_SCF]; |
599d49de | 690 | dma_addr_t pq[2]; |
ad4a7b50 DJ |
691 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
692 | ||
693 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
694 | return NULL; | |
599d49de | 695 | |
7b7d0ca7 DJ |
696 | if (src_cnt > MAX_SCF) |
697 | return NULL; | |
698 | ||
599d49de DJ |
699 | /* the cleanup routine only sets bits on validate failure, it |
700 | * does not clear bits on validate success... so clear it here | |
701 | */ | |
702 | *result = 0; | |
703 | ||
704 | memset(scf, 0, src_cnt); | |
705 | pq[0] = src[0]; | |
706 | flags |= DMA_PREP_PQ_DISABLE_Q; | |
707 | pq[1] = pq[0]; /* specify valid address for disabled result */ | |
708 | ||
709 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
710 | __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, | |
711 | scf, len, flags) : | |
712 | __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, | |
713 | scf, len, flags); | |
714 | } | |
715 | ||
716 | struct dma_async_tx_descriptor * | |
717 | ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) | |
718 | { | |
719 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
720 | struct ioat_ring_ent *desc; | |
721 | struct ioat_dma_descriptor *hw; | |
722 | ||
ad4a7b50 DJ |
723 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) |
724 | return NULL; | |
725 | ||
599d49de DJ |
726 | if (ioat_check_space_lock(ioat_chan, 1) == 0) |
727 | desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); | |
728 | else | |
729 | return NULL; | |
730 | ||
731 | hw = desc->hw; | |
732 | hw->ctl = 0; | |
733 | hw->ctl_f.null = 1; | |
734 | hw->ctl_f.int_en = 1; | |
735 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
736 | hw->ctl_f.compl_write = 1; | |
737 | hw->size = NULL_DESC_BUFFER_SIZE; | |
738 | hw->src_addr = 0; | |
739 | hw->dst_addr = 0; | |
740 | ||
741 | desc->txd.flags = flags; | |
742 | desc->len = 1; | |
743 | ||
744 | dump_desc_dbg(ioat_chan, desc); | |
745 | ||
746 | /* we leave the channel locked to ensure in order submission */ | |
747 | return &desc->txd; | |
748 | } | |
749 |