Commit | Line | Data |
---|---|---|
77241056 MM |
1 | /* |
2 | * | |
3 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
4 | * redistributing this file, you may do so under either license. | |
5 | * | |
6 | * GPL LICENSE SUMMARY | |
7 | * | |
8 | * Copyright(c) 2015 Intel Corporation. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of version 2 of the GNU General Public License as | |
12 | * published by the Free Software Foundation. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, but | |
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * General Public License for more details. | |
18 | * | |
19 | * BSD LICENSE | |
20 | * | |
21 | * Copyright(c) 2015 Intel Corporation. | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
51 | #include "hfi.h" | |
52 | ||
53 | /* additive distance between non-SOP and SOP space */ | |
54 | #define SOP_DISTANCE (TXE_PIO_SIZE / 2) | |
8638b77f | 55 | #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1) |
77241056 | 56 | /* number of QUADWORDs in a block */ |
8638b77f | 57 | #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64)) |
77241056 MM |
58 | |
59 | /** | |
60 | * pio_copy - copy data block to MMIO space | |
61 | * @pbuf: a number of blocks allocated within a PIO send context | |
62 | * @pbc: PBC to send | |
63 | * @from: source, must be 8 byte aligned | |
64 | * @count: number of DWORD (32-bit) quantities to copy from source | |
65 | * | |
66 | * Copy data from source to PIO Send Buffer memory, 8 bytes at a time. | |
67 | * Must always write full BLOCK_SIZE bytes blocks. The first block must | |
68 | * be written to the corresponding SOP=1 address. | |
69 | * | |
70 | * Known: | |
71 | * o pbuf->start always starts on a block boundary | |
72 | * o pbuf can wrap only at a block boundary | |
73 | */ | |
74 | void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, | |
75 | const void *from, size_t count) | |
76 | { | |
77 | void __iomem *dest = pbuf->start + SOP_DISTANCE; | |
78 | void __iomem *send = dest + PIO_BLOCK_SIZE; | |
79 | void __iomem *dend; /* 8-byte data end */ | |
80 | ||
81 | /* write the PBC */ | |
82 | writeq(pbc, dest); | |
83 | dest += sizeof(u64); | |
84 | ||
85 | /* calculate where the QWORD data ends - in SOP=1 space */ | |
8638b77f | 86 | dend = dest + ((count >> 1) * sizeof(u64)); |
77241056 MM |
87 | |
88 | if (dend < send) { | |
89 | /* all QWORD data is within the SOP block, does *not* | |
90 | reach the end of the SOP block */ | |
91 | ||
92 | while (dest < dend) { | |
93 | writeq(*(u64 *)from, dest); | |
94 | from += sizeof(u64); | |
95 | dest += sizeof(u64); | |
96 | } | |
97 | /* | |
98 | * No boundary checks are needed here: | |
99 | * 0. We're not on the SOP block boundary | |
100 | * 1. The possible DWORD dangle will still be within | |
101 | * the SOP block | |
102 | * 2. We cannot wrap except on a block boundary. | |
103 | */ | |
104 | } else { | |
105 | /* QWORD data extends _to_ or beyond the SOP block */ | |
106 | ||
107 | /* write 8-byte SOP chunk data */ | |
108 | while (dest < send) { | |
109 | writeq(*(u64 *)from, dest); | |
110 | from += sizeof(u64); | |
111 | dest += sizeof(u64); | |
112 | } | |
113 | /* drop out of the SOP range */ | |
114 | dest -= SOP_DISTANCE; | |
115 | dend -= SOP_DISTANCE; | |
116 | ||
117 | /* | |
118 | * If the wrap comes before or matches the data end, | |
119 | * copy until until the wrap, then wrap. | |
120 | * | |
121 | * If the data ends at the end of the SOP above and | |
122 | * the buffer wraps, then pbuf->end == dend == dest | |
123 | * and nothing will get written, but we will wrap in | |
124 | * case there is a dangling DWORD. | |
125 | */ | |
126 | if (pbuf->end <= dend) { | |
127 | while (dest < pbuf->end) { | |
128 | writeq(*(u64 *)from, dest); | |
129 | from += sizeof(u64); | |
130 | dest += sizeof(u64); | |
131 | } | |
132 | ||
133 | dest -= pbuf->size; | |
134 | dend -= pbuf->size; | |
135 | } | |
136 | ||
137 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
138 | while (dest < dend) { | |
139 | writeq(*(u64 *)from, dest); | |
140 | from += sizeof(u64); | |
141 | dest += sizeof(u64); | |
142 | } | |
143 | } | |
144 | /* at this point we have wrapped if we are going to wrap */ | |
145 | ||
146 | /* write dangling u32, if any */ | |
147 | if (count & 1) { | |
148 | union mix val; | |
149 | ||
150 | val.val64 = 0; | |
151 | val.val32[0] = *(u32 *)from; | |
152 | writeq(val.val64, dest); | |
153 | dest += sizeof(u64); | |
154 | } | |
155 | /* fill in rest of block, no need to check pbuf->end | |
156 | as we only wrap on a block boundary */ | |
157 | while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { | |
158 | writeq(0, dest); | |
159 | dest += sizeof(u64); | |
160 | } | |
161 | ||
162 | /* finished with this buffer */ | |
a054374f MM |
163 | this_cpu_dec(*pbuf->sc->buffers_allocated); |
164 | preempt_enable(); | |
77241056 MM |
165 | } |
166 | ||
167 | /* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */ | |
168 | #define USE_SHIFTS 1 | |
169 | #ifdef USE_SHIFTS | |
170 | /* | |
171 | * Handle carry bytes using shifts and masks. | |
172 | * | |
173 | * NOTE: the value the unused portion of carry is expected to always be zero. | |
174 | */ | |
175 | ||
176 | /* | |
177 | * "zero" shift - bit shift used to zero out upper bytes. Input is | |
178 | * the count of LSB bytes to preserve. | |
179 | */ | |
8638b77f | 180 | #define zshift(x) (8 * (8 - (x))) |
77241056 MM |
181 | |
182 | /* | |
183 | * "merge" shift - bit shift used to merge with carry bytes. Input is | |
184 | * the LSB byte count to move beyond. | |
185 | */ | |
186 | #define mshift(x) (8 * (x)) | |
187 | ||
188 | /* | |
189 | * Read nbytes bytes from "from" and return them in the LSB bytes | |
190 | * of pbuf->carry. Other bytes are zeroed. Any previous value | |
191 | * pbuf->carry is lost. | |
192 | * | |
193 | * NOTES: | |
194 | * o do not read from from if nbytes is zero | |
195 | * o from may _not_ be u64 aligned | |
196 | * o nbytes must not span a QW boundary | |
197 | */ | |
198 | static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, | |
199 | unsigned int nbytes) | |
200 | { | |
201 | unsigned long off; | |
202 | ||
203 | if (nbytes == 0) { | |
204 | pbuf->carry.val64 = 0; | |
205 | } else { | |
206 | /* align our pointer */ | |
207 | off = (unsigned long)from & 0x7; | |
208 | from = (void *)((unsigned long)from & ~0x7l); | |
209 | pbuf->carry.val64 = ((*(u64 *)from) | |
210 | << zshift(nbytes + off))/* zero upper bytes */ | |
211 | >> zshift(nbytes); /* place at bottom */ | |
212 | } | |
213 | pbuf->carry_bytes = nbytes; | |
214 | } | |
215 | ||
216 | /* | |
217 | * Read nbytes bytes from "from" and put them at the next significant bytes | |
218 | * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra | |
219 | * read does not overfill carry. | |
220 | * | |
221 | * NOTES: | |
222 | * o from may _not_ be u64 aligned | |
223 | * o nbytes may span a QW boundary | |
224 | */ | |
225 | static inline void read_extra_bytes(struct pio_buf *pbuf, | |
226 | const void *from, unsigned int nbytes) | |
227 | { | |
228 | unsigned long off = (unsigned long)from & 0x7; | |
229 | unsigned int room, xbytes; | |
230 | ||
231 | /* align our pointer */ | |
232 | from = (void *)((unsigned long)from & ~0x7l); | |
233 | ||
234 | /* check count first - don't read anything if count is zero */ | |
235 | while (nbytes) { | |
236 | /* find the number of bytes in this u64 */ | |
237 | room = 8 - off; /* this u64 has room for this many bytes */ | |
238 | xbytes = nbytes > room ? room : nbytes; | |
239 | ||
240 | /* | |
241 | * shift down to zero lower bytes, shift up to zero upper | |
242 | * bytes, shift back down to move into place | |
243 | */ | |
244 | pbuf->carry.val64 |= (((*(u64 *)from) | |
245 | >> mshift(off)) | |
246 | << zshift(xbytes)) | |
8638b77f | 247 | >> zshift(xbytes + pbuf->carry_bytes); |
77241056 MM |
248 | off = 0; |
249 | pbuf->carry_bytes += xbytes; | |
250 | nbytes -= xbytes; | |
251 | from += sizeof(u64); | |
252 | } | |
253 | } | |
254 | ||
255 | /* | |
256 | * Zero extra bytes from the end of pbuf->carry. | |
257 | * | |
258 | * NOTES: | |
259 | * o zbytes <= old_bytes | |
260 | */ | |
261 | static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) | |
262 | { | |
263 | unsigned int remaining; | |
264 | ||
265 | if (zbytes == 0) /* nothing to do */ | |
266 | return; | |
267 | ||
268 | remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */ | |
269 | ||
270 | /* NOTE: zshift only guaranteed to work if remaining != 0 */ | |
271 | if (remaining) | |
272 | pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining)) | |
273 | >> zshift(remaining); | |
274 | else | |
275 | pbuf->carry.val64 = 0; | |
276 | pbuf->carry_bytes = remaining; | |
277 | } | |
278 | ||
279 | /* | |
280 | * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. | |
281 | * Put the unused part of the next 8 bytes of src into the LSB bytes of | |
282 | * pbuf->carry with the upper bytes zeroed.. | |
283 | * | |
284 | * NOTES: | |
285 | * o result must keep unused bytes zeroed | |
286 | * o src must be u64 aligned | |
287 | */ | |
288 | static inline void merge_write8( | |
289 | struct pio_buf *pbuf, | |
290 | void __iomem *dest, | |
291 | const void *src) | |
292 | { | |
293 | u64 new, temp; | |
294 | ||
295 | new = *(u64 *)src; | |
296 | temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); | |
297 | writeq(temp, dest); | |
298 | pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); | |
299 | } | |
300 | ||
301 | /* | |
302 | * Write a quad word using all bytes of carry. | |
303 | */ | |
304 | static inline void carry8_write8(union mix carry, void __iomem *dest) | |
305 | { | |
306 | writeq(carry.val64, dest); | |
307 | } | |
308 | ||
309 | /* | |
310 | * Write a quad word using all the valid bytes of carry. If carry | |
311 | * has zero valid bytes, nothing is written. | |
312 | * Returns 0 on nothing written, non-zero on quad word written. | |
313 | */ | |
314 | static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) | |
315 | { | |
316 | if (pbuf->carry_bytes) { | |
317 | /* unused bytes are always kept zeroed, so just write */ | |
318 | writeq(pbuf->carry.val64, dest); | |
319 | return 1; | |
320 | } | |
321 | ||
322 | return 0; | |
323 | } | |
324 | ||
325 | #else /* USE_SHIFTS */ | |
326 | /* | |
327 | * Handle carry bytes using byte copies. | |
328 | * | |
329 | * NOTE: the value the unused portion of carry is left uninitialized. | |
330 | */ | |
331 | ||
332 | /* | |
333 | * Jump copy - no-loop copy for < 8 bytes. | |
334 | */ | |
335 | static inline void jcopy(u8 *dest, const u8 *src, u32 n) | |
336 | { | |
337 | switch (n) { | |
338 | case 7: | |
339 | *dest++ = *src++; | |
340 | case 6: | |
341 | *dest++ = *src++; | |
342 | case 5: | |
343 | *dest++ = *src++; | |
344 | case 4: | |
345 | *dest++ = *src++; | |
346 | case 3: | |
347 | *dest++ = *src++; | |
348 | case 2: | |
349 | *dest++ = *src++; | |
350 | case 1: | |
351 | *dest++ = *src++; | |
352 | } | |
353 | } | |
354 | ||
355 | /* | |
356 | * Read nbytes from "from" and and place them in the low bytes | |
357 | * of pbuf->carry. Other bytes are left as-is. Any previous | |
358 | * value in pbuf->carry is lost. | |
359 | * | |
360 | * NOTES: | |
361 | * o do not read from from if nbytes is zero | |
362 | * o from may _not_ be u64 aligned. | |
363 | */ | |
364 | static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, | |
365 | unsigned int nbytes) | |
366 | { | |
367 | jcopy(&pbuf->carry.val8[0], from, nbytes); | |
368 | pbuf->carry_bytes = nbytes; | |
369 | } | |
370 | ||
371 | /* | |
372 | * Read nbytes bytes from "from" and put them at the end of pbuf->carry. | |
373 | * It is expected that the extra read does not overfill carry. | |
374 | * | |
375 | * NOTES: | |
376 | * o from may _not_ be u64 aligned | |
377 | * o nbytes may span a QW boundary | |
378 | */ | |
379 | static inline void read_extra_bytes(struct pio_buf *pbuf, | |
380 | const void *from, unsigned int nbytes) | |
381 | { | |
382 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes); | |
383 | pbuf->carry_bytes += nbytes; | |
384 | } | |
385 | ||
386 | /* | |
387 | * Zero extra bytes from the end of pbuf->carry. | |
388 | * | |
389 | * We do not care about the value of unused bytes in carry, so just | |
390 | * reduce the byte count. | |
391 | * | |
392 | * NOTES: | |
393 | * o zbytes <= old_bytes | |
394 | */ | |
395 | static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) | |
396 | { | |
397 | pbuf->carry_bytes -= zbytes; | |
398 | } | |
399 | ||
400 | /* | |
401 | * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. | |
402 | * Put the unused part of the next 8 bytes of src into the low bytes of | |
403 | * pbuf->carry. | |
404 | */ | |
405 | static inline void merge_write8( | |
406 | struct pio_buf *pbuf, | |
407 | void *dest, | |
408 | const void *src) | |
409 | { | |
410 | u32 remainder = 8 - pbuf->carry_bytes; | |
411 | ||
412 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); | |
413 | writeq(pbuf->carry.val64, dest); | |
8638b77f | 414 | jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes); |
77241056 MM |
415 | } |
416 | ||
417 | /* | |
418 | * Write a quad word using all bytes of carry. | |
419 | */ | |
420 | static inline void carry8_write8(union mix carry, void *dest) | |
421 | { | |
422 | writeq(carry.val64, dest); | |
423 | } | |
424 | ||
425 | /* | |
426 | * Write a quad word using all the valid bytes of carry. If carry | |
427 | * has zero valid bytes, nothing is written. | |
428 | * Returns 0 on nothing written, non-zero on quad word written. | |
429 | */ | |
430 | static inline int carry_write8(struct pio_buf *pbuf, void *dest) | |
431 | { | |
432 | if (pbuf->carry_bytes) { | |
433 | u64 zero = 0; | |
434 | ||
435 | jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, | |
436 | 8 - pbuf->carry_bytes); | |
437 | writeq(pbuf->carry.val64, dest); | |
438 | return 1; | |
439 | } | |
440 | ||
441 | return 0; | |
442 | } | |
443 | #endif /* USE_SHIFTS */ | |
444 | ||
445 | /* | |
446 | * Segmented PIO Copy - start | |
447 | * | |
448 | * Start a PIO copy. | |
449 | * | |
450 | * @pbuf: destination buffer | |
451 | * @pbc: the PBC for the PIO buffer | |
452 | * @from: data source, QWORD aligned | |
453 | * @nbytes: bytes to copy | |
454 | */ | |
455 | void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, | |
456 | const void *from, size_t nbytes) | |
457 | { | |
458 | void __iomem *dest = pbuf->start + SOP_DISTANCE; | |
459 | void __iomem *send = dest + PIO_BLOCK_SIZE; | |
460 | void __iomem *dend; /* 8-byte data end */ | |
461 | ||
462 | writeq(pbc, dest); | |
463 | dest += sizeof(u64); | |
464 | ||
465 | /* calculate where the QWORD data ends - in SOP=1 space */ | |
8638b77f | 466 | dend = dest + ((nbytes >> 3) * sizeof(u64)); |
77241056 MM |
467 | |
468 | if (dend < send) { | |
469 | /* all QWORD data is within the SOP block, does *not* | |
470 | reach the end of the SOP block */ | |
471 | ||
472 | while (dest < dend) { | |
473 | writeq(*(u64 *)from, dest); | |
474 | from += sizeof(u64); | |
475 | dest += sizeof(u64); | |
476 | } | |
477 | /* | |
478 | * No boundary checks are needed here: | |
479 | * 0. We're not on the SOP block boundary | |
480 | * 1. The possible DWORD dangle will still be within | |
481 | * the SOP block | |
482 | * 2. We cannot wrap except on a block boundary. | |
483 | */ | |
484 | } else { | |
485 | /* QWORD data extends _to_ or beyond the SOP block */ | |
486 | ||
487 | /* write 8-byte SOP chunk data */ | |
488 | while (dest < send) { | |
489 | writeq(*(u64 *)from, dest); | |
490 | from += sizeof(u64); | |
491 | dest += sizeof(u64); | |
492 | } | |
493 | /* drop out of the SOP range */ | |
494 | dest -= SOP_DISTANCE; | |
495 | dend -= SOP_DISTANCE; | |
496 | ||
497 | /* | |
498 | * If the wrap comes before or matches the data end, | |
499 | * copy until until the wrap, then wrap. | |
500 | * | |
501 | * If the data ends at the end of the SOP above and | |
502 | * the buffer wraps, then pbuf->end == dend == dest | |
503 | * and nothing will get written, but we will wrap in | |
504 | * case there is a dangling DWORD. | |
505 | */ | |
506 | if (pbuf->end <= dend) { | |
507 | while (dest < pbuf->end) { | |
508 | writeq(*(u64 *)from, dest); | |
509 | from += sizeof(u64); | |
510 | dest += sizeof(u64); | |
511 | } | |
512 | ||
513 | dest -= pbuf->size; | |
514 | dend -= pbuf->size; | |
515 | } | |
516 | ||
517 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
518 | while (dest < dend) { | |
519 | writeq(*(u64 *)from, dest); | |
520 | from += sizeof(u64); | |
521 | dest += sizeof(u64); | |
522 | } | |
523 | } | |
524 | /* at this point we have wrapped if we are going to wrap */ | |
525 | ||
526 | /* ...but it doesn't matter as we're done writing */ | |
527 | ||
528 | /* save dangling bytes, if any */ | |
529 | read_low_bytes(pbuf, from, nbytes & 0x7); | |
530 | ||
531 | pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3); | |
532 | } | |
533 | ||
534 | /* | |
535 | * Mid copy helper, "mixed case" - source is 64-bit aligned but carry | |
536 | * bytes are non-zero. | |
537 | * | |
538 | * Whole u64s must be written to the chip, so bytes must be manually merged. | |
539 | * | |
540 | * @pbuf: destination buffer | |
541 | * @from: data source, is QWORD aligned. | |
542 | * @nbytes: bytes to copy | |
543 | * | |
544 | * Must handle nbytes < 8. | |
545 | */ | |
546 | static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) | |
547 | { | |
548 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
549 | void __iomem *dend; /* 8-byte data end */ | |
550 | unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3; | |
551 | unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7; | |
552 | ||
553 | /* calculate 8-byte data end */ | |
554 | dend = dest + (qw_to_write * sizeof(u64)); | |
555 | ||
556 | if (pbuf->qw_written < PIO_BLOCK_QWS) { | |
557 | /* | |
558 | * Still within SOP block. We don't need to check for | |
559 | * wrap because we are still in the first block and | |
560 | * can only wrap on block boundaries. | |
561 | */ | |
562 | void __iomem *send; /* SOP end */ | |
563 | void __iomem *xend; | |
564 | ||
565 | /* calculate the end of data or end of block, whichever | |
566 | comes first */ | |
567 | send = pbuf->start + PIO_BLOCK_SIZE; | |
568 | xend = send < dend ? send : dend; | |
569 | ||
570 | /* shift up to SOP=1 space */ | |
571 | dest += SOP_DISTANCE; | |
572 | xend += SOP_DISTANCE; | |
573 | ||
574 | /* write 8-byte chunk data */ | |
575 | while (dest < xend) { | |
576 | merge_write8(pbuf, dest, from); | |
577 | from += sizeof(u64); | |
578 | dest += sizeof(u64); | |
579 | } | |
580 | ||
581 | /* shift down to SOP=0 space */ | |
582 | dest -= SOP_DISTANCE; | |
583 | } | |
584 | /* | |
585 | * At this point dest could be (either, both, or neither): | |
586 | * - at dend | |
587 | * - at the wrap | |
588 | */ | |
589 | ||
590 | /* | |
591 | * If the wrap comes before or matches the data end, | |
592 | * copy until until the wrap, then wrap. | |
593 | * | |
594 | * If dest is at the wrap, we will fall into the if, | |
595 | * not do the loop, when wrap. | |
596 | * | |
597 | * If the data ends at the end of the SOP above and | |
598 | * the buffer wraps, then pbuf->end == dend == dest | |
599 | * and nothing will get written. | |
600 | */ | |
601 | if (pbuf->end <= dend) { | |
602 | while (dest < pbuf->end) { | |
603 | merge_write8(pbuf, dest, from); | |
604 | from += sizeof(u64); | |
605 | dest += sizeof(u64); | |
606 | } | |
607 | ||
608 | dest -= pbuf->size; | |
609 | dend -= pbuf->size; | |
610 | } | |
611 | ||
612 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
613 | while (dest < dend) { | |
614 | merge_write8(pbuf, dest, from); | |
615 | from += sizeof(u64); | |
616 | dest += sizeof(u64); | |
617 | } | |
618 | ||
619 | /* adjust carry */ | |
620 | if (pbuf->carry_bytes < bytes_left) { | |
621 | /* need to read more */ | |
622 | read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes); | |
623 | } else { | |
624 | /* remove invalid bytes */ | |
625 | zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left); | |
626 | } | |
627 | ||
628 | pbuf->qw_written += qw_to_write; | |
629 | } | |
630 | ||
631 | /* | |
632 | * Mid copy helper, "straight case" - source pointer is 64-bit aligned | |
633 | * with no carry bytes. | |
634 | * | |
635 | * @pbuf: destination buffer | |
636 | * @from: data source, is QWORD aligned | |
637 | * @nbytes: bytes to copy | |
638 | * | |
639 | * Must handle nbytes < 8. | |
640 | */ | |
641 | static void mid_copy_straight(struct pio_buf *pbuf, | |
642 | const void *from, size_t nbytes) | |
643 | { | |
644 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
645 | void __iomem *dend; /* 8-byte data end */ | |
646 | ||
647 | /* calculate 8-byte data end */ | |
8638b77f | 648 | dend = dest + ((nbytes >> 3) * sizeof(u64)); |
77241056 MM |
649 | |
650 | if (pbuf->qw_written < PIO_BLOCK_QWS) { | |
651 | /* | |
652 | * Still within SOP block. We don't need to check for | |
653 | * wrap because we are still in the first block and | |
654 | * can only wrap on block boundaries. | |
655 | */ | |
656 | void __iomem *send; /* SOP end */ | |
657 | void __iomem *xend; | |
658 | ||
659 | /* calculate the end of data or end of block, whichever | |
660 | comes first */ | |
661 | send = pbuf->start + PIO_BLOCK_SIZE; | |
662 | xend = send < dend ? send : dend; | |
663 | ||
664 | /* shift up to SOP=1 space */ | |
665 | dest += SOP_DISTANCE; | |
666 | xend += SOP_DISTANCE; | |
667 | ||
668 | /* write 8-byte chunk data */ | |
669 | while (dest < xend) { | |
670 | writeq(*(u64 *)from, dest); | |
671 | from += sizeof(u64); | |
672 | dest += sizeof(u64); | |
673 | } | |
674 | ||
675 | /* shift down to SOP=0 space */ | |
676 | dest -= SOP_DISTANCE; | |
677 | } | |
678 | /* | |
679 | * At this point dest could be (either, both, or neither): | |
680 | * - at dend | |
681 | * - at the wrap | |
682 | */ | |
683 | ||
684 | /* | |
685 | * If the wrap comes before or matches the data end, | |
686 | * copy until until the wrap, then wrap. | |
687 | * | |
688 | * If dest is at the wrap, we will fall into the if, | |
689 | * not do the loop, when wrap. | |
690 | * | |
691 | * If the data ends at the end of the SOP above and | |
692 | * the buffer wraps, then pbuf->end == dend == dest | |
693 | * and nothing will get written. | |
694 | */ | |
695 | if (pbuf->end <= dend) { | |
696 | while (dest < pbuf->end) { | |
697 | writeq(*(u64 *)from, dest); | |
698 | from += sizeof(u64); | |
699 | dest += sizeof(u64); | |
700 | } | |
701 | ||
702 | dest -= pbuf->size; | |
703 | dend -= pbuf->size; | |
704 | } | |
705 | ||
706 | /* write 8-byte non-SOP, non-wrap chunk data */ | |
707 | while (dest < dend) { | |
708 | writeq(*(u64 *)from, dest); | |
709 | from += sizeof(u64); | |
710 | dest += sizeof(u64); | |
711 | } | |
712 | ||
713 | /* we know carry_bytes was zero on entry to this routine */ | |
714 | read_low_bytes(pbuf, from, nbytes & 0x7); | |
715 | ||
8638b77f | 716 | pbuf->qw_written += nbytes >> 3; |
77241056 MM |
717 | } |
718 | ||
719 | /* | |
720 | * Segmented PIO Copy - middle | |
721 | * | |
722 | * Must handle any aligned tail and any aligned source with any byte count. | |
723 | * | |
724 | * @pbuf: a number of blocks allocated within a PIO send context | |
725 | * @from: data source | |
726 | * @nbytes: number of bytes to copy | |
727 | */ | |
728 | void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) | |
729 | { | |
730 | unsigned long from_align = (unsigned long)from & 0x7; | |
731 | ||
732 | if (pbuf->carry_bytes + nbytes < 8) { | |
733 | /* not enough bytes to fill a QW */ | |
734 | read_extra_bytes(pbuf, from, nbytes); | |
735 | return; | |
736 | } | |
737 | ||
738 | if (from_align) { | |
739 | /* misaligned source pointer - align it */ | |
740 | unsigned long to_align; | |
741 | ||
742 | /* bytes to read to align "from" */ | |
743 | to_align = 8 - from_align; | |
744 | ||
745 | /* | |
746 | * In the advance-to-alignment logic below, we do not need | |
747 | * to check if we are using more than nbytes. This is because | |
748 | * if we are here, we already know that carry+nbytes will | |
749 | * fill at least one QW. | |
750 | */ | |
751 | if (pbuf->carry_bytes + to_align < 8) { | |
752 | /* not enough align bytes to fill a QW */ | |
753 | read_extra_bytes(pbuf, from, to_align); | |
754 | from += to_align; | |
755 | nbytes -= to_align; | |
756 | } else { | |
757 | /* bytes to fill carry */ | |
758 | unsigned long to_fill = 8 - pbuf->carry_bytes; | |
759 | /* bytes left over to be read */ | |
760 | unsigned long extra = to_align - to_fill; | |
761 | void __iomem *dest; | |
762 | ||
763 | /* fill carry... */ | |
764 | read_extra_bytes(pbuf, from, to_fill); | |
765 | from += to_fill; | |
766 | nbytes -= to_fill; | |
767 | ||
768 | /* ...now write carry */ | |
769 | dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
770 | ||
771 | /* | |
772 | * The two checks immediately below cannot both be | |
773 | * true, hence the else. If we have wrapped, we | |
774 | * cannot still be within the first block. | |
775 | * Conversely, if we are still in the first block, we | |
776 | * cannot have wrapped. We do the wrap check first | |
777 | * as that is more likely. | |
778 | */ | |
779 | /* adjust if we've wrapped */ | |
780 | if (dest >= pbuf->end) | |
781 | dest -= pbuf->size; | |
782 | /* jump to SOP range if within the first block */ | |
783 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | |
784 | dest += SOP_DISTANCE; | |
785 | ||
786 | carry8_write8(pbuf->carry, dest); | |
787 | pbuf->qw_written++; | |
788 | ||
789 | /* read any extra bytes to do final alignment */ | |
790 | /* this will overwrite anything in pbuf->carry */ | |
791 | read_low_bytes(pbuf, from, extra); | |
792 | from += extra; | |
793 | nbytes -= extra; | |
794 | } | |
795 | ||
796 | /* at this point, from is QW aligned */ | |
797 | } | |
798 | ||
799 | if (pbuf->carry_bytes) | |
800 | mid_copy_mix(pbuf, from, nbytes); | |
801 | else | |
802 | mid_copy_straight(pbuf, from, nbytes); | |
803 | } | |
804 | ||
805 | /* | |
806 | * Segmented PIO Copy - end | |
807 | * | |
808 | * Write any remainder (in pbuf->carry) and finish writing the whole block. | |
809 | * | |
810 | * @pbuf: a number of blocks allocated within a PIO send context | |
811 | */ | |
812 | void seg_pio_copy_end(struct pio_buf *pbuf) | |
813 | { | |
814 | void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); | |
815 | ||
816 | /* | |
817 | * The two checks immediately below cannot both be true, hence the | |
818 | * else. If we have wrapped, we cannot still be within the first | |
819 | * block. Conversely, if we are still in the first block, we | |
820 | * cannot have wrapped. We do the wrap check first as that is | |
821 | * more likely. | |
822 | */ | |
823 | /* adjust if we have wrapped */ | |
824 | if (dest >= pbuf->end) | |
825 | dest -= pbuf->size; | |
826 | /* jump to the SOP range if within the first block */ | |
827 | else if (pbuf->qw_written < PIO_BLOCK_QWS) | |
828 | dest += SOP_DISTANCE; | |
829 | ||
830 | /* write final bytes, if any */ | |
831 | if (carry_write8(pbuf, dest)) { | |
832 | dest += sizeof(u64); | |
833 | /* | |
834 | * NOTE: We do not need to recalculate whether dest needs | |
835 | * SOP_DISTANCE or not. | |
836 | * | |
837 | * If we are in the first block and the dangle write | |
838 | * keeps us in the same block, dest will need | |
839 | * to retain SOP_DISTANCE in the loop below. | |
840 | * | |
841 | * If we are in the first block and the dangle write pushes | |
842 | * us to the next block, then loop below will not run | |
843 | * and dest is not used. Hence we do not need to update | |
844 | * it. | |
845 | * | |
846 | * If we are past the first block, then SOP_DISTANCE | |
847 | * was never added, so there is nothing to do. | |
848 | */ | |
849 | } | |
850 | ||
851 | /* fill in rest of block */ | |
852 | while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { | |
853 | writeq(0, dest); | |
854 | dest += sizeof(u64); | |
855 | } | |
856 | ||
857 | /* finished with this buffer */ | |
a054374f MM |
858 | this_cpu_dec(*pbuf->sc->buffers_allocated); |
859 | preempt_enable(); | |
77241056 | 860 | } |