2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/mlx4/cq.h>
36 #include <linux/slab.h>
37 #include <linux/mlx4/qp.h>
38 #include <linux/skbuff.h>
39 #include <linux/if_vlan.h>
40 #include <linux/vmalloc.h>
41 #include <linux/tcp.h>
42 #include <linux/moduleparam.h>
47 MAX_INLINE
= 104, /* 128 - 16 - 4 - 4 */
51 static int inline_thold __read_mostly
= MAX_INLINE
;
53 module_param_named(inline_thold
, inline_thold
, int, 0444);
54 MODULE_PARM_DESC(inline_thold
, "threshold for using inline data");
56 int mlx4_en_create_tx_ring(struct mlx4_en_priv
*priv
,
57 struct mlx4_en_tx_ring
**pring
, int qpn
, u32 size
,
60 struct mlx4_en_dev
*mdev
= priv
->mdev
;
61 struct mlx4_en_tx_ring
*ring
;
65 ring
= kzalloc_node(sizeof(*ring
), GFP_KERNEL
, node
);
67 ring
= kzalloc(sizeof(*ring
), GFP_KERNEL
);
69 en_err(priv
, "Failed allocating TX ring\n");
75 ring
->size_mask
= size
- 1;
76 ring
->stride
= stride
;
78 inline_thold
= min(inline_thold
, MAX_INLINE
);
80 tmp
= size
* sizeof(struct mlx4_en_tx_info
);
81 ring
->tx_info
= vmalloc_node(tmp
, node
);
83 ring
->tx_info
= vmalloc(tmp
);
90 en_dbg(DRV
, priv
, "Allocated tx_info ring at addr:%p size:%d\n",
93 ring
->bounce_buf
= kmalloc_node(MAX_DESC_SIZE
, GFP_KERNEL
, node
);
94 if (!ring
->bounce_buf
) {
95 ring
->bounce_buf
= kmalloc(MAX_DESC_SIZE
, GFP_KERNEL
);
96 if (!ring
->bounce_buf
) {
101 ring
->buf_size
= ALIGN(size
* ring
->stride
, MLX4_EN_PAGE_SIZE
);
103 /* Allocate HW buffers on provided NUMA node */
104 set_dev_node(&mdev
->dev
->pdev
->dev
, node
);
105 err
= mlx4_alloc_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
,
107 set_dev_node(&mdev
->dev
->pdev
->dev
, mdev
->dev
->numa_node
);
109 en_err(priv
, "Failed allocating hwq resources\n");
113 err
= mlx4_en_map_buffer(&ring
->wqres
.buf
);
115 en_err(priv
, "Failed to map TX buffer\n");
119 ring
->buf
= ring
->wqres
.buf
.direct
.buf
;
121 en_dbg(DRV
, priv
, "Allocated TX ring (addr:%p) - buf:%p size:%d "
122 "buf_size:%d dma:%llx\n", ring
, ring
->buf
, ring
->size
,
123 ring
->buf_size
, (unsigned long long) ring
->wqres
.buf
.direct
.map
);
126 err
= mlx4_qp_alloc(mdev
->dev
, ring
->qpn
, &ring
->qp
);
128 en_err(priv
, "Failed allocating qp %d\n", ring
->qpn
);
131 ring
->qp
.event
= mlx4_en_sqp_event
;
133 err
= mlx4_bf_alloc(mdev
->dev
, &ring
->bf
, node
);
135 en_dbg(DRV
, priv
, "working without blueflame (%d)", err
);
136 ring
->bf
.uar
= &mdev
->priv_uar
;
137 ring
->bf
.uar
->map
= mdev
->uar_map
;
138 ring
->bf_enabled
= false;
140 ring
->bf_enabled
= true;
142 ring
->hwtstamp_tx_type
= priv
->hwtstamp_config
.tx_type
;
148 mlx4_en_unmap_buffer(&ring
->wqres
.buf
);
150 mlx4_free_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
);
152 kfree(ring
->bounce_buf
);
153 ring
->bounce_buf
= NULL
;
155 vfree(ring
->tx_info
);
156 ring
->tx_info
= NULL
;
163 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv
*priv
,
164 struct mlx4_en_tx_ring
**pring
)
166 struct mlx4_en_dev
*mdev
= priv
->mdev
;
167 struct mlx4_en_tx_ring
*ring
= *pring
;
168 en_dbg(DRV
, priv
, "Destroying tx ring, qpn: %d\n", ring
->qpn
);
170 if (ring
->bf_enabled
)
171 mlx4_bf_free(mdev
->dev
, &ring
->bf
);
172 mlx4_qp_remove(mdev
->dev
, &ring
->qp
);
173 mlx4_qp_free(mdev
->dev
, &ring
->qp
);
174 mlx4_en_unmap_buffer(&ring
->wqres
.buf
);
175 mlx4_free_hwq_res(mdev
->dev
, &ring
->wqres
, ring
->buf_size
);
176 kfree(ring
->bounce_buf
);
177 ring
->bounce_buf
= NULL
;
178 vfree(ring
->tx_info
);
179 ring
->tx_info
= NULL
;
184 int mlx4_en_activate_tx_ring(struct mlx4_en_priv
*priv
,
185 struct mlx4_en_tx_ring
*ring
,
186 int cq
, int user_prio
)
188 struct mlx4_en_dev
*mdev
= priv
->mdev
;
193 ring
->cons
= 0xffffffff;
194 ring
->last_nr_txbb
= 1;
196 memset(ring
->tx_info
, 0, ring
->size
* sizeof(struct mlx4_en_tx_info
));
197 memset(ring
->buf
, 0, ring
->buf_size
);
199 ring
->qp_state
= MLX4_QP_STATE_RST
;
200 ring
->doorbell_qpn
= ring
->qp
.qpn
<< 8;
202 mlx4_en_fill_qp_context(priv
, ring
->size
, ring
->stride
, 1, 0, ring
->qpn
,
203 ring
->cqn
, user_prio
, &ring
->context
);
204 if (ring
->bf_enabled
)
205 ring
->context
.usr_page
= cpu_to_be32(ring
->bf
.uar
->index
);
207 err
= mlx4_qp_to_ready(mdev
->dev
, &ring
->wqres
.mtt
, &ring
->context
,
208 &ring
->qp
, &ring
->qp_state
);
213 void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv
*priv
,
214 struct mlx4_en_tx_ring
*ring
)
216 struct mlx4_en_dev
*mdev
= priv
->mdev
;
218 mlx4_qp_modify(mdev
->dev
, NULL
, ring
->qp_state
,
219 MLX4_QP_STATE_RST
, NULL
, 0, 0, &ring
->qp
);
222 static void mlx4_en_stamp_wqe(struct mlx4_en_priv
*priv
,
223 struct mlx4_en_tx_ring
*ring
, int index
,
226 __be32 stamp
= cpu_to_be32(STAMP_VAL
| (!!owner
<< STAMP_SHIFT
));
227 struct mlx4_en_tx_desc
*tx_desc
= ring
->buf
+ index
* TXBB_SIZE
;
228 struct mlx4_en_tx_info
*tx_info
= &ring
->tx_info
[index
];
229 void *end
= ring
->buf
+ ring
->buf_size
;
230 __be32
*ptr
= (__be32
*)tx_desc
;
233 /* Optimize the common case when there are no wraparounds */
234 if (likely((void *)tx_desc
+ tx_info
->nr_txbb
* TXBB_SIZE
<= end
)) {
235 /* Stamp the freed descriptor */
236 for (i
= 0; i
< tx_info
->nr_txbb
* TXBB_SIZE
;
242 /* Stamp the freed descriptor */
243 for (i
= 0; i
< tx_info
->nr_txbb
* TXBB_SIZE
;
247 if ((void *)ptr
>= end
) {
249 stamp
^= cpu_to_be32(0x80000000);
256 static u32
mlx4_en_free_tx_desc(struct mlx4_en_priv
*priv
,
257 struct mlx4_en_tx_ring
*ring
,
258 int index
, u8 owner
, u64 timestamp
)
260 struct mlx4_en_dev
*mdev
= priv
->mdev
;
261 struct mlx4_en_tx_info
*tx_info
= &ring
->tx_info
[index
];
262 struct mlx4_en_tx_desc
*tx_desc
= ring
->buf
+ index
* TXBB_SIZE
;
263 struct mlx4_wqe_data_seg
*data
= (void *) tx_desc
+ tx_info
->data_offset
;
264 struct sk_buff
*skb
= tx_info
->skb
;
265 struct skb_frag_struct
*frag
;
266 void *end
= ring
->buf
+ ring
->buf_size
;
267 int frags
= skb_shinfo(skb
)->nr_frags
;
269 struct skb_shared_hwtstamps hwts
;
272 mlx4_en_fill_hwtstamps(mdev
, &hwts
, timestamp
);
273 skb_tstamp_tx(skb
, &hwts
);
276 /* Optimize the common case when there are no wraparounds */
277 if (likely((void *) tx_desc
+ tx_info
->nr_txbb
* TXBB_SIZE
<= end
)) {
279 if (tx_info
->linear
) {
280 dma_unmap_single(priv
->ddev
,
281 (dma_addr_t
) be64_to_cpu(data
->addr
),
282 be32_to_cpu(data
->byte_count
),
287 for (i
= 0; i
< frags
; i
++) {
288 frag
= &skb_shinfo(skb
)->frags
[i
];
289 dma_unmap_page(priv
->ddev
,
290 (dma_addr_t
) be64_to_cpu(data
[i
].addr
),
291 skb_frag_size(frag
), PCI_DMA_TODEVICE
);
296 if ((void *) data
>= end
) {
297 data
= ring
->buf
+ ((void *)data
- end
);
300 if (tx_info
->linear
) {
301 dma_unmap_single(priv
->ddev
,
302 (dma_addr_t
) be64_to_cpu(data
->addr
),
303 be32_to_cpu(data
->byte_count
),
308 for (i
= 0; i
< frags
; i
++) {
309 /* Check for wraparound before unmapping */
310 if ((void *) data
>= end
)
312 frag
= &skb_shinfo(skb
)->frags
[i
];
313 dma_unmap_page(priv
->ddev
,
314 (dma_addr_t
) be64_to_cpu(data
->addr
),
315 skb_frag_size(frag
), PCI_DMA_TODEVICE
);
320 dev_kfree_skb_any(skb
);
321 return tx_info
->nr_txbb
;
325 int mlx4_en_free_tx_buf(struct net_device
*dev
, struct mlx4_en_tx_ring
*ring
)
327 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
330 /* Skip last polled descriptor */
331 ring
->cons
+= ring
->last_nr_txbb
;
332 en_dbg(DRV
, priv
, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
333 ring
->cons
, ring
->prod
);
335 if ((u32
) (ring
->prod
- ring
->cons
) > ring
->size
) {
336 if (netif_msg_tx_err(priv
))
337 en_warn(priv
, "Tx consumer passed producer!\n");
341 while (ring
->cons
!= ring
->prod
) {
342 ring
->last_nr_txbb
= mlx4_en_free_tx_desc(priv
, ring
,
343 ring
->cons
& ring
->size_mask
,
344 !!(ring
->cons
& ring
->size
), 0);
345 ring
->cons
+= ring
->last_nr_txbb
;
349 netdev_tx_reset_queue(ring
->tx_queue
);
352 en_dbg(DRV
, priv
, "Freed %d uncompleted tx descriptors\n", cnt
);
357 static void mlx4_en_process_tx_cq(struct net_device
*dev
, struct mlx4_en_cq
*cq
)
359 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
360 struct mlx4_cq
*mcq
= &cq
->mcq
;
361 struct mlx4_en_tx_ring
*ring
= priv
->tx_ring
[cq
->ring
];
362 struct mlx4_cqe
*cqe
;
364 u16 new_index
, ring_index
, stamp_index
;
365 u32 txbbs_skipped
= 0;
367 u32 cons_index
= mcq
->cons_index
;
369 u32 size_mask
= ring
->size_mask
;
370 struct mlx4_cqe
*buf
= cq
->buf
;
373 int factor
= priv
->cqe_factor
;
379 index
= cons_index
& size_mask
;
380 cqe
= &buf
[(index
<< factor
) + factor
];
381 ring_index
= ring
->cons
& size_mask
;
382 stamp_index
= ring_index
;
384 /* Process all completed CQEs */
385 while (XNOR(cqe
->owner_sr_opcode
& MLX4_CQE_OWNER_MASK
,
386 cons_index
& size
)) {
388 * make sure we read the CQE after we read the
393 if (unlikely((cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) ==
394 MLX4_CQE_OPCODE_ERROR
)) {
395 struct mlx4_err_cqe
*cqe_err
= (struct mlx4_err_cqe
*)cqe
;
397 en_err(priv
, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n",
398 cqe_err
->vendor_err_syndrome
,
402 /* Skip over last polled CQE */
403 new_index
= be16_to_cpu(cqe
->wqe_index
) & size_mask
;
406 txbbs_skipped
+= ring
->last_nr_txbb
;
407 ring_index
= (ring_index
+ ring
->last_nr_txbb
) & size_mask
;
408 if (ring
->tx_info
[ring_index
].ts_requested
)
409 timestamp
= mlx4_en_get_cqe_ts(cqe
);
411 /* free next descriptor */
412 ring
->last_nr_txbb
= mlx4_en_free_tx_desc(
413 priv
, ring
, ring_index
,
414 !!((ring
->cons
+ txbbs_skipped
) &
415 ring
->size
), timestamp
);
417 mlx4_en_stamp_wqe(priv
, ring
, stamp_index
,
418 !!((ring
->cons
+ txbbs_stamp
) &
420 stamp_index
= ring_index
;
421 txbbs_stamp
= txbbs_skipped
;
423 bytes
+= ring
->tx_info
[ring_index
].nr_bytes
;
424 } while (ring_index
!= new_index
);
427 index
= cons_index
& size_mask
;
428 cqe
= &buf
[(index
<< factor
) + factor
];
433 * To prevent CQ overflow we first update CQ consumer and only then
436 mcq
->cons_index
= cons_index
;
439 ring
->cons
+= txbbs_skipped
;
440 netdev_tx_completed_queue(ring
->tx_queue
, packets
, bytes
);
443 * Wakeup Tx queue if this stopped, and at least 1 packet
446 if (netif_tx_queue_stopped(ring
->tx_queue
) && txbbs_skipped
> 0) {
447 netif_tx_wake_queue(ring
->tx_queue
);
448 priv
->port_stats
.wake_queue
++;
452 void mlx4_en_tx_irq(struct mlx4_cq
*mcq
)
454 struct mlx4_en_cq
*cq
= container_of(mcq
, struct mlx4_en_cq
, mcq
);
455 struct mlx4_en_priv
*priv
= netdev_priv(cq
->dev
);
457 mlx4_en_process_tx_cq(cq
->dev
, cq
);
458 mlx4_en_arm_cq(priv
, cq
);
462 static struct mlx4_en_tx_desc
*mlx4_en_bounce_to_desc(struct mlx4_en_priv
*priv
,
463 struct mlx4_en_tx_ring
*ring
,
465 unsigned int desc_size
)
467 u32 copy
= (ring
->size
- index
) * TXBB_SIZE
;
470 for (i
= desc_size
- copy
- 4; i
>= 0; i
-= 4) {
471 if ((i
& (TXBB_SIZE
- 1)) == 0)
474 *((u32
*) (ring
->buf
+ i
)) =
475 *((u32
*) (ring
->bounce_buf
+ copy
+ i
));
478 for (i
= copy
- 4; i
>= 4 ; i
-= 4) {
479 if ((i
& (TXBB_SIZE
- 1)) == 0)
482 *((u32
*) (ring
->buf
+ index
* TXBB_SIZE
+ i
)) =
483 *((u32
*) (ring
->bounce_buf
+ i
));
486 /* Return real descriptor location */
487 return ring
->buf
+ index
* TXBB_SIZE
;
490 static int is_inline(struct sk_buff
*skb
, void **pfrag
)
494 if (inline_thold
&& !skb_is_gso(skb
) && skb
->len
<= inline_thold
) {
495 if (skb_shinfo(skb
)->nr_frags
== 1) {
496 ptr
= skb_frag_address_safe(&skb_shinfo(skb
)->frags
[0]);
504 } else if (unlikely(skb_shinfo(skb
)->nr_frags
))
513 static int inline_size(struct sk_buff
*skb
)
515 if (skb
->len
+ CTRL_SIZE
+ sizeof(struct mlx4_wqe_inline_seg
)
516 <= MLX4_INLINE_ALIGN
)
517 return ALIGN(skb
->len
+ CTRL_SIZE
+
518 sizeof(struct mlx4_wqe_inline_seg
), 16);
520 return ALIGN(skb
->len
+ CTRL_SIZE
+ 2 *
521 sizeof(struct mlx4_wqe_inline_seg
), 16);
524 static int get_real_size(struct sk_buff
*skb
, struct net_device
*dev
,
525 int *lso_header_size
)
527 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
530 if (skb_is_gso(skb
)) {
531 *lso_header_size
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
532 real_size
= CTRL_SIZE
+ skb_shinfo(skb
)->nr_frags
* DS_SIZE
+
533 ALIGN(*lso_header_size
+ 4, DS_SIZE
);
534 if (unlikely(*lso_header_size
!= skb_headlen(skb
))) {
535 /* We add a segment for the skb linear buffer only if
536 * it contains data */
537 if (*lso_header_size
< skb_headlen(skb
))
538 real_size
+= DS_SIZE
;
540 if (netif_msg_tx_err(priv
))
541 en_warn(priv
, "Non-linear headers\n");
546 *lso_header_size
= 0;
547 if (!is_inline(skb
, NULL
))
548 real_size
= CTRL_SIZE
+ (skb_shinfo(skb
)->nr_frags
+ 1) * DS_SIZE
;
550 real_size
= inline_size(skb
);
556 static void build_inline_wqe(struct mlx4_en_tx_desc
*tx_desc
, struct sk_buff
*skb
,
557 int real_size
, u16
*vlan_tag
, int tx_ind
, void *fragptr
)
559 struct mlx4_wqe_inline_seg
*inl
= &tx_desc
->inl
;
560 int spc
= MLX4_INLINE_ALIGN
- CTRL_SIZE
- sizeof *inl
;
562 if (skb
->len
<= spc
) {
563 inl
->byte_count
= cpu_to_be32(1 << 31 | skb
->len
);
564 skb_copy_from_linear_data(skb
, inl
+ 1, skb_headlen(skb
));
565 if (skb_shinfo(skb
)->nr_frags
)
566 memcpy(((void *)(inl
+ 1)) + skb_headlen(skb
), fragptr
,
567 skb_frag_size(&skb_shinfo(skb
)->frags
[0]));
570 inl
->byte_count
= cpu_to_be32(1 << 31 | spc
);
571 if (skb_headlen(skb
) <= spc
) {
572 skb_copy_from_linear_data(skb
, inl
+ 1, skb_headlen(skb
));
573 if (skb_headlen(skb
) < spc
) {
574 memcpy(((void *)(inl
+ 1)) + skb_headlen(skb
),
575 fragptr
, spc
- skb_headlen(skb
));
576 fragptr
+= spc
- skb_headlen(skb
);
578 inl
= (void *) (inl
+ 1) + spc
;
579 memcpy(((void *)(inl
+ 1)), fragptr
, skb
->len
- spc
);
581 skb_copy_from_linear_data(skb
, inl
+ 1, spc
);
582 inl
= (void *) (inl
+ 1) + spc
;
583 skb_copy_from_linear_data_offset(skb
, spc
, inl
+ 1,
584 skb_headlen(skb
) - spc
);
585 if (skb_shinfo(skb
)->nr_frags
)
586 memcpy(((void *)(inl
+ 1)) + skb_headlen(skb
) - spc
,
587 fragptr
, skb_frag_size(&skb_shinfo(skb
)->frags
[0]));
591 inl
->byte_count
= cpu_to_be32(1 << 31 | (skb
->len
- spc
));
595 u16
mlx4_en_select_queue(struct net_device
*dev
, struct sk_buff
*skb
)
597 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
598 u16 rings_p_up
= priv
->num_tx_rings_p_up
;
602 return skb_tx_hash(dev
, skb
);
604 if (vlan_tx_tag_present(skb
))
605 up
= vlan_tx_tag_get(skb
) >> VLAN_PRIO_SHIFT
;
607 return __netdev_pick_tx(dev
, skb
) % rings_p_up
+ up
* rings_p_up
;
610 static void mlx4_bf_copy(void __iomem
*dst
, unsigned long *src
, unsigned bytecnt
)
612 __iowrite64_copy(dst
, src
, bytecnt
/ 8);
615 netdev_tx_t
mlx4_en_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
617 struct mlx4_en_priv
*priv
= netdev_priv(dev
);
618 struct mlx4_en_dev
*mdev
= priv
->mdev
;
619 struct device
*ddev
= priv
->ddev
;
620 struct mlx4_en_tx_ring
*ring
;
621 struct mlx4_en_tx_desc
*tx_desc
;
622 struct mlx4_wqe_data_seg
*data
;
623 struct mlx4_en_tx_info
*tx_info
;
639 real_size
= get_real_size(skb
, dev
, &lso_header_size
);
640 if (unlikely(!real_size
))
643 /* Align descriptor to TXBB size */
644 desc_size
= ALIGN(real_size
, TXBB_SIZE
);
645 nr_txbb
= desc_size
/ TXBB_SIZE
;
646 if (unlikely(nr_txbb
> MAX_DESC_TXBBS
)) {
647 if (netif_msg_tx_err(priv
))
648 en_warn(priv
, "Oversized header or SG list\n");
652 tx_ind
= skb
->queue_mapping
;
653 ring
= priv
->tx_ring
[tx_ind
];
654 if (vlan_tx_tag_present(skb
))
655 vlan_tag
= vlan_tx_tag_get(skb
);
657 /* Check available TXBBs And 2K spare for prefetch */
658 if (unlikely(((int)(ring
->prod
- ring
->cons
)) >
659 ring
->size
- HEADROOM
- MAX_DESC_TXBBS
)) {
660 /* every full Tx ring stops queue */
661 netif_tx_stop_queue(ring
->tx_queue
);
662 priv
->port_stats
.queue_stopped
++;
664 /* If queue was emptied after the if, and before the
665 * stop_queue - need to wake the queue, or else it will remain
667 * Need a memory barrier to make sure ring->cons was not
668 * updated before queue was stopped.
672 if (unlikely(((int)(ring
->prod
- ring
->cons
)) <=
673 ring
->size
- HEADROOM
- MAX_DESC_TXBBS
)) {
674 netif_tx_wake_queue(ring
->tx_queue
);
675 priv
->port_stats
.wake_queue
++;
677 return NETDEV_TX_BUSY
;
681 /* Track current inflight packets for performance analysis */
682 AVG_PERF_COUNTER(priv
->pstats
.inflight_avg
,
683 (u32
) (ring
->prod
- ring
->cons
- 1));
685 /* Packet is good - grab an index and transmit it */
686 index
= ring
->prod
& ring
->size_mask
;
687 bf_index
= ring
->prod
;
689 /* See if we have enough space for whole descriptor TXBB for setting
690 * SW ownership on next descriptor; if not, use a bounce buffer. */
691 if (likely(index
+ nr_txbb
<= ring
->size
))
692 tx_desc
= ring
->buf
+ index
* TXBB_SIZE
;
694 tx_desc
= (struct mlx4_en_tx_desc
*) ring
->bounce_buf
;
698 /* Save skb in tx_info ring */
699 tx_info
= &ring
->tx_info
[index
];
701 tx_info
->nr_txbb
= nr_txbb
;
704 data
= ((void *)&tx_desc
->lso
+ ALIGN(lso_header_size
+ 4,
707 data
= &tx_desc
->data
;
709 /* valid only for none inline segments */
710 tx_info
->data_offset
= (void *)data
- (void *)tx_desc
;
712 tx_info
->linear
= (lso_header_size
< skb_headlen(skb
) &&
713 !is_inline(skb
, NULL
)) ? 1 : 0;
715 data
+= skb_shinfo(skb
)->nr_frags
+ tx_info
->linear
- 1;
717 if (is_inline(skb
, &fragptr
)) {
721 for (i
= skb_shinfo(skb
)->nr_frags
- 1; i
>= 0; i
--) {
722 struct skb_frag_struct
*frag
;
725 frag
= &skb_shinfo(skb
)->frags
[i
];
726 dma
= skb_frag_dma_map(ddev
, frag
,
727 0, skb_frag_size(frag
),
729 if (dma_mapping_error(ddev
, dma
))
732 data
->addr
= cpu_to_be64(dma
);
733 data
->lkey
= cpu_to_be32(mdev
->mr
.key
);
735 data
->byte_count
= cpu_to_be32(skb_frag_size(frag
));
739 /* Map linear part */
740 if (tx_info
->linear
) {
741 u32 byte_count
= skb_headlen(skb
) - lso_header_size
;
744 dma
= dma_map_single(ddev
, skb
->data
+
745 lso_header_size
, byte_count
,
747 if (dma_mapping_error(ddev
, dma
))
750 data
->addr
= cpu_to_be64(dma
);
751 data
->lkey
= cpu_to_be32(mdev
->mr
.key
);
753 data
->byte_count
= cpu_to_be32(byte_count
);
759 * For timestamping add flag to skb_shinfo and
760 * set flag for further reference
762 if (ring
->hwtstamp_tx_type
== HWTSTAMP_TX_ON
&&
763 skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
) {
764 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
765 tx_info
->ts_requested
= 1;
768 /* Prepare ctrl segement apart opcode+ownership, which depends on
769 * whether LSO is used */
770 tx_desc
->ctrl
.vlan_tag
= cpu_to_be16(vlan_tag
);
771 tx_desc
->ctrl
.ins_vlan
= MLX4_WQE_CTRL_INS_VLAN
*
772 !!vlan_tx_tag_present(skb
);
773 tx_desc
->ctrl
.fence_size
= (real_size
/ 16) & 0x3f;
774 tx_desc
->ctrl
.srcrb_flags
= priv
->ctrl_flags
;
775 if (likely(skb
->ip_summed
== CHECKSUM_PARTIAL
)) {
776 tx_desc
->ctrl
.srcrb_flags
|= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM
|
777 MLX4_WQE_CTRL_TCP_UDP_CSUM
);
781 if (priv
->flags
& MLX4_EN_FLAG_ENABLE_HW_LOOPBACK
) {
784 /* Copy dst mac address to wqe. This allows loopback in eSwitch,
785 * so that VFs and PF can communicate with each other
787 ethh
= (struct ethhdr
*)skb
->data
;
788 tx_desc
->ctrl
.srcrb_flags16
[0] = get_unaligned((__be16
*)ethh
->h_dest
);
789 tx_desc
->ctrl
.imm
= get_unaligned((__be32
*)(ethh
->h_dest
+ 2));
792 /* Handle LSO (TSO) packets */
793 if (lso_header_size
) {
794 /* Mark opcode as LSO */
795 op_own
= cpu_to_be32(MLX4_OPCODE_LSO
| (1 << 6)) |
796 ((ring
->prod
& ring
->size
) ?
797 cpu_to_be32(MLX4_EN_BIT_DESC_OWN
) : 0);
799 /* Fill in the LSO prefix */
800 tx_desc
->lso
.mss_hdr_size
= cpu_to_be32(
801 skb_shinfo(skb
)->gso_size
<< 16 | lso_header_size
);
804 * note that we already verified that it is linear */
805 memcpy(tx_desc
->lso
.header
, skb
->data
, lso_header_size
);
807 priv
->port_stats
.tso_packets
++;
808 i
= ((skb
->len
- lso_header_size
) / skb_shinfo(skb
)->gso_size
) +
809 !!((skb
->len
- lso_header_size
) % skb_shinfo(skb
)->gso_size
);
810 tx_info
->nr_bytes
= skb
->len
+ (i
- 1) * lso_header_size
;
813 /* Normal (Non LSO) packet */
814 op_own
= cpu_to_be32(MLX4_OPCODE_SEND
) |
815 ((ring
->prod
& ring
->size
) ?
816 cpu_to_be32(MLX4_EN_BIT_DESC_OWN
) : 0);
817 tx_info
->nr_bytes
= max_t(unsigned int, skb
->len
, ETH_ZLEN
);
821 ring
->bytes
+= tx_info
->nr_bytes
;
822 netdev_tx_sent_queue(ring
->tx_queue
, tx_info
->nr_bytes
);
823 AVG_PERF_COUNTER(priv
->pstats
.tx_pktsz_avg
, skb
->len
);
826 build_inline_wqe(tx_desc
, skb
, real_size
, &vlan_tag
, tx_ind
, fragptr
);
830 ring
->prod
+= nr_txbb
;
832 /* If we used a bounce buffer then copy descriptor back into place */
834 tx_desc
= mlx4_en_bounce_to_desc(priv
, ring
, index
, desc_size
);
836 skb_tx_timestamp(skb
);
838 if (ring
->bf_enabled
&& desc_size
<= MAX_BF
&& !bounce
&& !vlan_tx_tag_present(skb
)) {
839 *(__be32
*) (&tx_desc
->ctrl
.vlan_tag
) |= cpu_to_be32(ring
->doorbell_qpn
);
840 op_own
|= htonl((bf_index
& 0xffff) << 8);
841 /* Ensure new descirptor hits memory
842 * before setting ownership of this descriptor to HW */
844 tx_desc
->ctrl
.owner_opcode
= op_own
;
848 mlx4_bf_copy(ring
->bf
.reg
+ ring
->bf
.offset
, (unsigned long *) &tx_desc
->ctrl
,
853 ring
->bf
.offset
^= ring
->bf
.buf_size
;
855 /* Ensure new descirptor hits memory
856 * before setting ownership of this descriptor to HW */
858 tx_desc
->ctrl
.owner_opcode
= op_own
;
860 iowrite32be(ring
->doorbell_qpn
, ring
->bf
.uar
->map
+ MLX4_SEND_DOORBELL
);
866 en_err(priv
, "DMA mapping error\n");
868 for (i
++; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
870 dma_unmap_page(ddev
, (dma_addr_t
) be64_to_cpu(data
->addr
),
871 be32_to_cpu(data
->byte_count
),
876 dev_kfree_skb_any(skb
);
877 priv
->stats
.tx_dropped
++;