2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 #include <linux/highmem.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
48 #include <asm/xen/hypercall.h>
49 #include <asm/xen/page.h>
51 /* Provide an option to disable split event channels at load time as
52 * event channels are limited resource. Split event channels are
55 bool separate_tx_rx_irq
= 1;
56 module_param(separate_tx_rx_irq
, bool, 0644);
58 /* When guest ring is filled up, qdisc queues the packets for us, but we have
59 * to timeout them, otherwise other guests' packets can get stucked there
61 unsigned int rx_drain_timeout_msecs
= 10000;
62 module_param(rx_drain_timeout_msecs
, uint
, 0444);
63 unsigned int rx_drain_timeout_jiffies
;
66 * This is the maximum slots a skb can have. If a guest sends a skb
67 * which exceeds this limit it is considered malicious.
69 #define FATAL_SKB_SLOTS_DEFAULT 20
70 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
71 module_param(fatal_skb_slots
, uint
, 0444);
73 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
76 static void make_tx_response(struct xenvif
*vif
,
77 struct xen_netif_tx_request
*txp
,
80 static inline int tx_work_todo(struct xenvif
*vif
);
81 static inline int rx_work_todo(struct xenvif
*vif
);
83 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
90 static inline unsigned long idx_to_pfn(struct xenvif
*vif
,
93 return page_to_pfn(vif
->mmap_pages
[idx
]);
96 static inline unsigned long idx_to_kaddr(struct xenvif
*vif
,
99 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif
, idx
));
102 /* Find the containing VIF's structure from a pointer in pending_tx_info array
104 static inline struct xenvif
* ubuf_to_vif(struct ubuf_info
*ubuf
)
106 u16 pending_idx
= ubuf
->desc
;
107 struct pending_tx_info
*temp
=
108 container_of(ubuf
, struct pending_tx_info
, callback_struct
);
109 return container_of(temp
- pending_idx
,
114 /* This is a miniumum size for the linear area to avoid lots of
115 * calls to __pskb_pull_tail() as we set up checksum offsets. The
116 * value 128 was chosen as it covers all IPv4 and most likely
119 #define PKT_PROT_LEN 128
121 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
123 return (u16
)frag
->page_offset
;
126 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
128 frag
->page_offset
= pending_idx
;
131 static inline pending_ring_idx_t
pending_index(unsigned i
)
133 return i
& (MAX_PENDING_REQS
-1);
136 bool xenvif_rx_ring_slots_available(struct xenvif
*vif
, int needed
)
141 prod
= vif
->rx
.sring
->req_prod
;
142 cons
= vif
->rx
.req_cons
;
144 if (prod
- cons
>= needed
)
147 vif
->rx
.sring
->req_event
= prod
+ 1;
149 /* Make sure event is visible before we check prod
153 } while (vif
->rx
.sring
->req_prod
!= prod
);
159 * Returns true if we should start a new receive buffer instead of
160 * adding 'size' bytes to a buffer which currently contains 'offset'
163 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
)
165 /* simple case: we have completely filled the current buffer. */
166 if (offset
== MAX_BUFFER_OFFSET
)
170 * complex case: start a fresh buffer if the current frag
171 * would overflow the current buffer but only if:
172 * (i) this frag would fit completely in the next buffer
173 * and (ii) there is already some data in the current buffer
174 * and (iii) this is not the head buffer.
177 * - (i) stops us splitting a frag into two copies
178 * unless the frag is too large for a single buffer.
179 * - (ii) stops us from leaving a buffer pointlessly empty.
180 * - (iii) stops us leaving the first buffer
181 * empty. Strictly speaking this is already covered
182 * by (ii) but is explicitly checked because
183 * netfront relies on the first buffer being
184 * non-empty and can crash otherwise.
186 * This means we will effectively linearise small
187 * frags but do not needlessly split large buffers
188 * into multiple copies tend to give large frags their
189 * own buffers as before.
191 if ((offset
+ size
> MAX_BUFFER_OFFSET
) &&
192 (size
<= MAX_BUFFER_OFFSET
) && offset
&& !head
)
198 struct netrx_pending_operations
{
199 unsigned copy_prod
, copy_cons
;
200 unsigned meta_prod
, meta_cons
;
201 struct gnttab_copy
*copy
;
202 struct xenvif_rx_meta
*meta
;
204 grant_ref_t copy_gref
;
207 static struct xenvif_rx_meta
*get_next_rx_buffer(struct xenvif
*vif
,
208 struct netrx_pending_operations
*npo
)
210 struct xenvif_rx_meta
*meta
;
211 struct xen_netif_rx_request
*req
;
213 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
215 meta
= npo
->meta
+ npo
->meta_prod
++;
216 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
222 npo
->copy_gref
= req
->gref
;
228 * Set up the grant operations for this fragment. If it's a flipping
229 * interface, we also set up the unmap request from here.
231 static void xenvif_gop_frag_copy(struct xenvif
*vif
, struct sk_buff
*skb
,
232 struct netrx_pending_operations
*npo
,
233 struct page
*page
, unsigned long size
,
234 unsigned long offset
, int *head
,
235 struct xenvif
*foreign_vif
,
236 grant_ref_t foreign_gref
)
238 struct gnttab_copy
*copy_gop
;
239 struct xenvif_rx_meta
*meta
;
241 int gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
243 /* Data must not cross a page boundary. */
244 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
246 meta
= npo
->meta
+ npo
->meta_prod
- 1;
248 /* Skip unused frames from start of page */
249 page
+= offset
>> PAGE_SHIFT
;
250 offset
&= ~PAGE_MASK
;
253 BUG_ON(offset
>= PAGE_SIZE
);
254 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
256 bytes
= PAGE_SIZE
- offset
;
261 if (start_new_rx_buffer(npo
->copy_off
, bytes
, *head
)) {
263 * Netfront requires there to be some data in the head
268 meta
= get_next_rx_buffer(vif
, npo
);
271 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
272 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
274 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
275 copy_gop
->flags
= GNTCOPY_dest_gref
;
276 copy_gop
->len
= bytes
;
279 copy_gop
->source
.domid
= foreign_vif
->domid
;
280 copy_gop
->source
.u
.ref
= foreign_gref
;
281 copy_gop
->flags
|= GNTCOPY_source_gref
;
283 copy_gop
->source
.domid
= DOMID_SELF
;
284 copy_gop
->source
.u
.gmfn
=
285 virt_to_mfn(page_address(page
));
287 copy_gop
->source
.offset
= offset
;
289 copy_gop
->dest
.domid
= vif
->domid
;
290 copy_gop
->dest
.offset
= npo
->copy_off
;
291 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
293 npo
->copy_off
+= bytes
;
300 if (offset
== PAGE_SIZE
&& size
) {
301 BUG_ON(!PageCompound(page
));
306 /* Leave a gap for the GSO descriptor. */
307 if (skb_is_gso(skb
)) {
308 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
309 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
310 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
311 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
314 if (*head
&& ((1 << gso_type
) & vif
->gso_mask
))
317 *head
= 0; /* There must be something in this buffer now. */
323 * Prepare an SKB to be transmitted to the frontend.
325 * This function is responsible for allocating grant operations, meta
328 * It returns the number of meta structures consumed. The number of
329 * ring slots used is always equal to the number of meta slots used
330 * plus the number of GSO descriptors used. Currently, we use either
331 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
332 * frontend-side LRO).
334 static int xenvif_gop_skb(struct sk_buff
*skb
,
335 struct netrx_pending_operations
*npo
)
337 struct xenvif
*vif
= netdev_priv(skb
->dev
);
338 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
340 struct xen_netif_rx_request
*req
;
341 struct xenvif_rx_meta
*meta
;
346 struct ubuf_info
*ubuf
= skb_shinfo(skb
)->destructor_arg
;
347 grant_ref_t foreign_grefs
[MAX_SKB_FRAGS
];
348 struct xenvif
*foreign_vif
= NULL
;
350 old_meta_prod
= npo
->meta_prod
;
352 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
353 if (skb_is_gso(skb
)) {
354 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
355 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
356 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
357 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
360 /* Set up a GSO prefix descriptor, if necessary */
361 if ((1 << gso_type
) & vif
->gso_prefix_mask
) {
362 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
363 meta
= npo
->meta
+ npo
->meta_prod
++;
364 meta
->gso_type
= gso_type
;
365 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
370 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
371 meta
= npo
->meta
+ npo
->meta_prod
++;
373 if ((1 << gso_type
) & vif
->gso_mask
) {
374 meta
->gso_type
= gso_type
;
375 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
377 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
384 npo
->copy_gref
= req
->gref
;
386 if ((skb_shinfo(skb
)->tx_flags
& SKBTX_DEV_ZEROCOPY
) &&
387 (ubuf
->callback
== &xenvif_zerocopy_callback
)) {
389 foreign_vif
= ubuf_to_vif(ubuf
);
392 u16 pending_idx
= ubuf
->desc
;
394 foreign_vif
->pending_tx_info
[pending_idx
].req
.gref
;
395 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
400 while (data
< skb_tail_pointer(skb
)) {
401 unsigned int offset
= offset_in_page(data
);
402 unsigned int len
= PAGE_SIZE
- offset
;
404 if (data
+ len
> skb_tail_pointer(skb
))
405 len
= skb_tail_pointer(skb
) - data
;
407 xenvif_gop_frag_copy(vif
, skb
, npo
,
408 virt_to_page(data
), len
, offset
, &head
,
414 for (i
= 0; i
< nr_frags
; i
++) {
415 xenvif_gop_frag_copy(vif
, skb
, npo
,
416 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
417 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
418 skb_shinfo(skb
)->frags
[i
].page_offset
,
424 return npo
->meta_prod
- old_meta_prod
;
428 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
429 * used to set up the operations on the top of
430 * netrx_pending_operations, which have since been done. Check that
431 * they didn't give any errors and advance over them.
433 static int xenvif_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
434 struct netrx_pending_operations
*npo
)
436 struct gnttab_copy
*copy_op
;
437 int status
= XEN_NETIF_RSP_OKAY
;
440 for (i
= 0; i
< nr_meta_slots
; i
++) {
441 copy_op
= npo
->copy
+ npo
->copy_cons
++;
442 if (copy_op
->status
!= GNTST_okay
) {
444 "Bad status %d from copy to DOM%d.\n",
445 copy_op
->status
, vif
->domid
);
446 status
= XEN_NETIF_RSP_ERROR
;
453 static void xenvif_add_frag_responses(struct xenvif
*vif
, int status
,
454 struct xenvif_rx_meta
*meta
,
458 unsigned long offset
;
460 /* No fragments used */
461 if (nr_meta_slots
<= 1)
466 for (i
= 0; i
< nr_meta_slots
; i
++) {
468 if (i
== nr_meta_slots
- 1)
471 flags
= XEN_NETRXF_more_data
;
474 make_rx_response(vif
, meta
[i
].id
, status
, offset
,
475 meta
[i
].size
, flags
);
479 struct xenvif_rx_cb
{
483 #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
485 void xenvif_kick_thread(struct xenvif
*vif
)
490 static void xenvif_rx_action(struct xenvif
*vif
)
494 struct xen_netif_rx_response
*resp
;
495 struct sk_buff_head rxq
;
499 unsigned long offset
;
500 bool need_to_notify
= false;
502 struct netrx_pending_operations npo
= {
503 .copy
= vif
->grant_copy_op
,
507 skb_queue_head_init(&rxq
);
509 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
) {
510 RING_IDX max_slots_needed
;
513 /* We need a cheap worse case estimate for the number of
517 max_slots_needed
= DIV_ROUND_UP(offset_in_page(skb
->data
) +
520 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
522 size
= skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
523 max_slots_needed
+= DIV_ROUND_UP(size
, PAGE_SIZE
);
525 if (skb_is_gso(skb
) &&
526 (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
||
527 skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
))
530 /* If the skb may not fit then bail out now */
531 if (!xenvif_rx_ring_slots_available(vif
, max_slots_needed
)) {
532 skb_queue_head(&vif
->rx_queue
, skb
);
533 need_to_notify
= true;
534 vif
->rx_last_skb_slots
= max_slots_needed
;
537 vif
->rx_last_skb_slots
= 0;
539 XENVIF_RX_CB(skb
)->meta_slots_used
= xenvif_gop_skb(skb
, &npo
);
540 BUG_ON(XENVIF_RX_CB(skb
)->meta_slots_used
> max_slots_needed
);
542 __skb_queue_tail(&rxq
, skb
);
545 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(vif
->meta
));
550 BUG_ON(npo
.copy_prod
> MAX_GRANT_COPY_OPS
);
551 gnttab_batch_copy(vif
->grant_copy_op
, npo
.copy_prod
);
553 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
555 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
556 vif
->gso_prefix_mask
) {
557 resp
= RING_GET_RESPONSE(&vif
->rx
,
558 vif
->rx
.rsp_prod_pvt
++);
560 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
562 resp
->offset
= vif
->meta
[npo
.meta_cons
].gso_size
;
563 resp
->id
= vif
->meta
[npo
.meta_cons
].id
;
564 resp
->status
= XENVIF_RX_CB(skb
)->meta_slots_used
;
567 XENVIF_RX_CB(skb
)->meta_slots_used
--;
571 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
572 vif
->dev
->stats
.tx_packets
++;
574 status
= xenvif_check_gop(vif
,
575 XENVIF_RX_CB(skb
)->meta_slots_used
,
578 if (XENVIF_RX_CB(skb
)->meta_slots_used
== 1)
581 flags
= XEN_NETRXF_more_data
;
583 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
584 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
585 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
586 /* remote but checksummed. */
587 flags
|= XEN_NETRXF_data_validated
;
590 resp
= make_rx_response(vif
, vif
->meta
[npo
.meta_cons
].id
,
592 vif
->meta
[npo
.meta_cons
].size
,
595 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
597 struct xen_netif_extra_info
*gso
=
598 (struct xen_netif_extra_info
*)
599 RING_GET_RESPONSE(&vif
->rx
,
600 vif
->rx
.rsp_prod_pvt
++);
602 resp
->flags
|= XEN_NETRXF_extra_info
;
604 gso
->u
.gso
.type
= vif
->meta
[npo
.meta_cons
].gso_type
;
605 gso
->u
.gso
.size
= vif
->meta
[npo
.meta_cons
].gso_size
;
607 gso
->u
.gso
.features
= 0;
609 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
613 xenvif_add_frag_responses(vif
, status
,
614 vif
->meta
+ npo
.meta_cons
+ 1,
615 XENVIF_RX_CB(skb
)->meta_slots_used
);
617 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->rx
, ret
);
619 need_to_notify
|= !!ret
;
621 npo
.meta_cons
+= XENVIF_RX_CB(skb
)->meta_slots_used
;
627 notify_remote_via_irq(vif
->rx_irq
);
630 void xenvif_check_rx_xenvif(struct xenvif
*vif
)
634 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
637 napi_schedule(&vif
->napi
);
640 static void tx_add_credit(struct xenvif
*vif
)
642 unsigned long max_burst
, max_credit
;
645 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
646 * Otherwise the interface can seize up due to insufficient credit.
648 max_burst
= RING_GET_REQUEST(&vif
->tx
, vif
->tx
.req_cons
)->size
;
649 max_burst
= min(max_burst
, 131072UL);
650 max_burst
= max(max_burst
, vif
->credit_bytes
);
652 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
653 max_credit
= vif
->remaining_credit
+ vif
->credit_bytes
;
654 if (max_credit
< vif
->remaining_credit
)
655 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
657 vif
->remaining_credit
= min(max_credit
, max_burst
);
660 static void tx_credit_callback(unsigned long data
)
662 struct xenvif
*vif
= (struct xenvif
*)data
;
664 xenvif_check_rx_xenvif(vif
);
667 static void xenvif_tx_err(struct xenvif
*vif
,
668 struct xen_netif_tx_request
*txp
, RING_IDX end
)
670 RING_IDX cons
= vif
->tx
.req_cons
;
674 spin_lock_irqsave(&vif
->response_lock
, flags
);
675 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
676 spin_unlock_irqrestore(&vif
->response_lock
, flags
);
679 txp
= RING_GET_REQUEST(&vif
->tx
, cons
++);
681 vif
->tx
.req_cons
= cons
;
684 static void xenvif_fatal_tx_err(struct xenvif
*vif
)
686 netdev_err(vif
->dev
, "fatal error; disabling device\n");
687 xenvif_carrier_off(vif
);
690 static int xenvif_count_requests(struct xenvif
*vif
,
691 struct xen_netif_tx_request
*first
,
692 struct xen_netif_tx_request
*txp
,
695 RING_IDX cons
= vif
->tx
.req_cons
;
700 if (!(first
->flags
& XEN_NETTXF_more_data
))
704 struct xen_netif_tx_request dropped_tx
= { 0 };
706 if (slots
>= work_to_do
) {
708 "Asked for %d slots but exceeds this limit\n",
710 xenvif_fatal_tx_err(vif
);
714 /* This guest is really using too many slots and
715 * considered malicious.
717 if (unlikely(slots
>= fatal_skb_slots
)) {
719 "Malicious frontend using %d slots, threshold %u\n",
720 slots
, fatal_skb_slots
);
721 xenvif_fatal_tx_err(vif
);
725 /* Xen network protocol had implicit dependency on
726 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
727 * the historical MAX_SKB_FRAGS value 18 to honor the
728 * same behavior as before. Any packet using more than
729 * 18 slots but less than fatal_skb_slots slots is
732 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
735 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
736 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
743 memcpy(txp
, RING_GET_REQUEST(&vif
->tx
, cons
+ slots
),
746 /* If the guest submitted a frame >= 64 KiB then
747 * first->size overflowed and following slots will
748 * appear to be larger than the frame.
750 * This cannot be fatal error as there are buggy
751 * frontends that do this.
753 * Consume all slots and drop the packet.
755 if (!drop_err
&& txp
->size
> first
->size
) {
758 "Invalid tx request, slot size %u > remaining size %u\n",
759 txp
->size
, first
->size
);
763 first
->size
-= txp
->size
;
766 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
767 netdev_err(vif
->dev
, "Cross page boundary, txp->offset: %x, size: %u\n",
768 txp
->offset
, txp
->size
);
769 xenvif_fatal_tx_err(vif
);
773 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
781 xenvif_tx_err(vif
, first
, cons
+ slots
);
789 struct xenvif_tx_cb
{
793 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
795 static inline void xenvif_tx_create_gop(struct xenvif
*vif
,
797 struct xen_netif_tx_request
*txp
,
798 struct gnttab_map_grant_ref
*gop
)
800 vif
->pages_to_map
[gop
-vif
->tx_map_ops
] = vif
->mmap_pages
[pending_idx
];
801 gnttab_set_map_op(gop
, idx_to_kaddr(vif
, pending_idx
),
802 GNTMAP_host_map
| GNTMAP_readonly
,
803 txp
->gref
, vif
->domid
);
805 memcpy(&vif
->pending_tx_info
[pending_idx
].req
, txp
,
809 static inline struct sk_buff
*xenvif_alloc_skb(unsigned int size
)
811 struct sk_buff
*skb
=
812 alloc_skb(size
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
813 GFP_ATOMIC
| __GFP_NOWARN
);
814 if (unlikely(skb
== NULL
))
817 /* Packets passed to netif_rx() must have some headroom. */
818 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
820 /* Initialize it here to avoid later surprises */
821 skb_shinfo(skb
)->destructor_arg
= NULL
;
826 static struct gnttab_map_grant_ref
*xenvif_get_requests(struct xenvif
*vif
,
828 struct xen_netif_tx_request
*txp
,
829 struct gnttab_map_grant_ref
*gop
)
831 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
832 skb_frag_t
*frags
= shinfo
->frags
;
833 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
835 pending_ring_idx_t index
;
836 unsigned int nr_slots
, frag_overflow
= 0;
838 /* At this point shinfo->nr_frags is in fact the number of
839 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
841 if (shinfo
->nr_frags
> MAX_SKB_FRAGS
) {
842 frag_overflow
= shinfo
->nr_frags
- MAX_SKB_FRAGS
;
843 BUG_ON(frag_overflow
> MAX_SKB_FRAGS
);
844 shinfo
->nr_frags
= MAX_SKB_FRAGS
;
846 nr_slots
= shinfo
->nr_frags
;
848 /* Skip first skb fragment if it is on same page as header fragment. */
849 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
851 for (shinfo
->nr_frags
= start
; shinfo
->nr_frags
< nr_slots
;
852 shinfo
->nr_frags
++, txp
++, gop
++) {
853 index
= pending_index(vif
->pending_cons
++);
854 pending_idx
= vif
->pending_ring
[index
];
855 xenvif_tx_create_gop(vif
, pending_idx
, txp
, gop
);
856 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], pending_idx
);
860 struct sk_buff
*nskb
= xenvif_alloc_skb(0);
861 if (unlikely(nskb
== NULL
)) {
864 "Can't allocate the frag_list skb.\n");
868 shinfo
= skb_shinfo(nskb
);
869 frags
= shinfo
->frags
;
871 for (shinfo
->nr_frags
= 0; shinfo
->nr_frags
< frag_overflow
;
872 shinfo
->nr_frags
++, txp
++, gop
++) {
873 index
= pending_index(vif
->pending_cons
++);
874 pending_idx
= vif
->pending_ring
[index
];
875 xenvif_tx_create_gop(vif
, pending_idx
, txp
, gop
);
876 frag_set_pending_idx(&frags
[shinfo
->nr_frags
],
880 skb_shinfo(skb
)->frag_list
= nskb
;
886 static inline void xenvif_grant_handle_set(struct xenvif
*vif
,
888 grant_handle_t handle
)
890 if (unlikely(vif
->grant_tx_handle
[pending_idx
] !=
891 NETBACK_INVALID_HANDLE
)) {
893 "Trying to overwrite active handle! pending_idx: %x\n",
897 vif
->grant_tx_handle
[pending_idx
] = handle
;
900 static inline void xenvif_grant_handle_reset(struct xenvif
*vif
,
903 if (unlikely(vif
->grant_tx_handle
[pending_idx
] ==
904 NETBACK_INVALID_HANDLE
)) {
906 "Trying to unmap invalid handle! pending_idx: %x\n",
910 vif
->grant_tx_handle
[pending_idx
] = NETBACK_INVALID_HANDLE
;
913 static int xenvif_tx_check_gop(struct xenvif
*vif
,
915 struct gnttab_map_grant_ref
**gopp
)
917 struct gnttab_map_grant_ref
*gop
= *gopp
;
918 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
919 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
920 struct pending_tx_info
*tx_info
;
921 int nr_frags
= shinfo
->nr_frags
;
923 struct sk_buff
*first_skb
= NULL
;
925 /* Check status of header. */
928 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
930 xenvif_grant_handle_set(vif
, pending_idx
, gop
->handle
);
932 /* Skip first skb fragment if it is on same page as header fragment. */
933 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
936 for (i
= start
; i
< nr_frags
; i
++) {
939 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
940 tx_info
= &vif
->pending_tx_info
[pending_idx
];
942 /* Check error status: if okay then remember grant handle. */
943 newerr
= (++gop
)->status
;
945 if (likely(!newerr
)) {
946 xenvif_grant_handle_set(vif
, pending_idx
, gop
->handle
);
947 /* Had a previous error? Invalidate this fragment. */
949 xenvif_idx_unmap(vif
, pending_idx
);
953 /* Error on this fragment: respond to client with an error. */
954 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
956 /* Not the first error? Preceding frags already invalidated. */
959 /* First error: invalidate header and preceding fragments. */
961 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
963 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
964 xenvif_idx_unmap(vif
, pending_idx
);
965 for (j
= start
; j
< i
; j
++) {
966 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
967 xenvif_idx_unmap(vif
, pending_idx
);
970 /* Remember the error: invalidate all subsequent fragments. */
974 if (skb_has_frag_list(skb
)) {
976 skb
= shinfo
->frag_list
;
977 shinfo
= skb_shinfo(skb
);
978 nr_frags
= shinfo
->nr_frags
;
984 /* There was a mapping error in the frag_list skb. We have to unmap
985 * the first skb's frags
987 if (first_skb
&& err
) {
989 shinfo
= skb_shinfo(first_skb
);
990 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
991 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
992 for (j
= start
; j
< shinfo
->nr_frags
; j
++) {
993 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
994 xenvif_idx_unmap(vif
, pending_idx
);
1002 static void xenvif_fill_frags(struct xenvif
*vif
, struct sk_buff
*skb
)
1004 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1005 int nr_frags
= shinfo
->nr_frags
;
1007 u16 prev_pending_idx
= INVALID_PENDING_IDX
;
1009 if (skb_shinfo(skb
)->destructor_arg
)
1010 prev_pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1012 for (i
= 0; i
< nr_frags
; i
++) {
1013 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1014 struct xen_netif_tx_request
*txp
;
1018 pending_idx
= frag_get_pending_idx(frag
);
1020 /* If this is not the first frag, chain it to the previous*/
1021 if (unlikely(prev_pending_idx
== INVALID_PENDING_IDX
))
1022 skb_shinfo(skb
)->destructor_arg
=
1023 &vif
->pending_tx_info
[pending_idx
].callback_struct
;
1024 else if (likely(pending_idx
!= prev_pending_idx
))
1025 vif
->pending_tx_info
[prev_pending_idx
].callback_struct
.ctx
=
1026 &(vif
->pending_tx_info
[pending_idx
].callback_struct
);
1028 vif
->pending_tx_info
[pending_idx
].callback_struct
.ctx
= NULL
;
1029 prev_pending_idx
= pending_idx
;
1031 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1032 page
= virt_to_page(idx_to_kaddr(vif
, pending_idx
));
1033 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1034 skb
->len
+= txp
->size
;
1035 skb
->data_len
+= txp
->size
;
1036 skb
->truesize
+= txp
->size
;
1038 /* Take an extra reference to offset network stack's put_page */
1039 get_page(vif
->mmap_pages
[pending_idx
]);
1041 /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1042 * overlaps with "index", and "mapping" is not set. I think mapping
1043 * should be set. If delivered to local stack, it would drop this
1044 * skb in sk_filter unless the socket has the right to use it.
1046 skb
->pfmemalloc
= false;
1049 static int xenvif_get_extras(struct xenvif
*vif
,
1050 struct xen_netif_extra_info
*extras
,
1053 struct xen_netif_extra_info extra
;
1054 RING_IDX cons
= vif
->tx
.req_cons
;
1057 if (unlikely(work_to_do
-- <= 0)) {
1058 netdev_err(vif
->dev
, "Missing extra info\n");
1059 xenvif_fatal_tx_err(vif
);
1063 memcpy(&extra
, RING_GET_REQUEST(&vif
->tx
, cons
),
1065 if (unlikely(!extra
.type
||
1066 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1067 vif
->tx
.req_cons
= ++cons
;
1068 netdev_err(vif
->dev
,
1069 "Invalid extra type: %d\n", extra
.type
);
1070 xenvif_fatal_tx_err(vif
);
1074 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1075 vif
->tx
.req_cons
= ++cons
;
1076 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1081 static int xenvif_set_skb_gso(struct xenvif
*vif
,
1082 struct sk_buff
*skb
,
1083 struct xen_netif_extra_info
*gso
)
1085 if (!gso
->u
.gso
.size
) {
1086 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1087 xenvif_fatal_tx_err(vif
);
1091 switch (gso
->u
.gso
.type
) {
1092 case XEN_NETIF_GSO_TYPE_TCPV4
:
1093 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1095 case XEN_NETIF_GSO_TYPE_TCPV6
:
1096 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1099 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1100 xenvif_fatal_tx_err(vif
);
1104 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1105 /* gso_segs will be calculated later */
1110 static int checksum_setup(struct xenvif
*vif
, struct sk_buff
*skb
)
1112 bool recalculate_partial_csum
= false;
1114 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1115 * peers can fail to set NETRXF_csum_blank when sending a GSO
1116 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1117 * recalculate the partial checksum.
1119 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1120 vif
->rx_gso_checksum_fixup
++;
1121 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1122 recalculate_partial_csum
= true;
1125 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1126 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1129 return skb_checksum_setup(skb
, recalculate_partial_csum
);
1132 static bool tx_credit_exceeded(struct xenvif
*vif
, unsigned size
)
1134 u64 now
= get_jiffies_64();
1135 u64 next_credit
= vif
->credit_window_start
+
1136 msecs_to_jiffies(vif
->credit_usec
/ 1000);
1138 /* Timer could already be pending in rare cases. */
1139 if (timer_pending(&vif
->credit_timeout
))
1142 /* Passed the point where we can replenish credit? */
1143 if (time_after_eq64(now
, next_credit
)) {
1144 vif
->credit_window_start
= now
;
1148 /* Still too big to send right now? Set a callback. */
1149 if (size
> vif
->remaining_credit
) {
1150 vif
->credit_timeout
.data
=
1152 vif
->credit_timeout
.function
=
1154 mod_timer(&vif
->credit_timeout
,
1156 vif
->credit_window_start
= next_credit
;
1164 static unsigned xenvif_tx_build_gops(struct xenvif
*vif
, int budget
)
1166 struct gnttab_map_grant_ref
*gop
= vif
->tx_map_ops
, *request_gop
;
1167 struct sk_buff
*skb
;
1170 while (xenvif_tx_pending_slots_available(vif
) &&
1171 (skb_queue_len(&vif
->tx_queue
) < budget
)) {
1172 struct xen_netif_tx_request txreq
;
1173 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1174 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1178 unsigned int data_len
;
1179 pending_ring_idx_t index
;
1181 if (vif
->tx
.sring
->req_prod
- vif
->tx
.req_cons
>
1182 XEN_NETIF_TX_RING_SIZE
) {
1183 netdev_err(vif
->dev
,
1184 "Impossible number of requests. "
1185 "req_prod %d, req_cons %d, size %ld\n",
1186 vif
->tx
.sring
->req_prod
, vif
->tx
.req_cons
,
1187 XEN_NETIF_TX_RING_SIZE
);
1188 xenvif_fatal_tx_err(vif
);
1192 work_to_do
= RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
);
1196 idx
= vif
->tx
.req_cons
;
1197 rmb(); /* Ensure that we see the request before we copy it. */
1198 memcpy(&txreq
, RING_GET_REQUEST(&vif
->tx
, idx
), sizeof(txreq
));
1200 /* Credit-based scheduling. */
1201 if (txreq
.size
> vif
->remaining_credit
&&
1202 tx_credit_exceeded(vif
, txreq
.size
))
1205 vif
->remaining_credit
-= txreq
.size
;
1208 vif
->tx
.req_cons
= ++idx
;
1210 memset(extras
, 0, sizeof(extras
));
1211 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1212 work_to_do
= xenvif_get_extras(vif
, extras
,
1214 idx
= vif
->tx
.req_cons
;
1215 if (unlikely(work_to_do
< 0))
1219 ret
= xenvif_count_requests(vif
, &txreq
, txfrags
, work_to_do
);
1220 if (unlikely(ret
< 0))
1225 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1226 netdev_dbg(vif
->dev
,
1227 "Bad packet size: %d\n", txreq
.size
);
1228 xenvif_tx_err(vif
, &txreq
, idx
);
1232 /* No crossing a page as the payload mustn't fragment. */
1233 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1234 netdev_err(vif
->dev
,
1235 "txreq.offset: %x, size: %u, end: %lu\n",
1236 txreq
.offset
, txreq
.size
,
1237 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1238 xenvif_fatal_tx_err(vif
);
1242 index
= pending_index(vif
->pending_cons
);
1243 pending_idx
= vif
->pending_ring
[index
];
1245 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1246 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1247 PKT_PROT_LEN
: txreq
.size
;
1249 skb
= xenvif_alloc_skb(data_len
);
1250 if (unlikely(skb
== NULL
)) {
1251 netdev_dbg(vif
->dev
,
1252 "Can't allocate a skb in start_xmit.\n");
1253 xenvif_tx_err(vif
, &txreq
, idx
);
1257 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1258 struct xen_netif_extra_info
*gso
;
1259 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1261 if (xenvif_set_skb_gso(vif
, skb
, gso
)) {
1262 /* Failure in xenvif_set_skb_gso is fatal. */
1268 xenvif_tx_create_gop(vif
, pending_idx
, &txreq
, gop
);
1272 XENVIF_TX_CB(skb
)->pending_idx
= pending_idx
;
1274 __skb_put(skb
, data_len
);
1276 skb_shinfo(skb
)->nr_frags
= ret
;
1277 if (data_len
< txreq
.size
) {
1278 skb_shinfo(skb
)->nr_frags
++;
1279 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1282 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1283 INVALID_PENDING_IDX
);
1286 vif
->pending_cons
++;
1288 request_gop
= xenvif_get_requests(vif
, skb
, txfrags
, gop
);
1289 if (request_gop
== NULL
) {
1291 xenvif_tx_err(vif
, &txreq
, idx
);
1296 __skb_queue_tail(&vif
->tx_queue
, skb
);
1298 vif
->tx
.req_cons
= idx
;
1300 if ((gop
-vif
->tx_map_ops
) >= ARRAY_SIZE(vif
->tx_map_ops
))
1304 return gop
- vif
->tx_map_ops
;
1307 /* Consolidate skb with a frag_list into a brand new one with local pages on
1308 * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1310 static int xenvif_handle_frag_list(struct xenvif
*vif
, struct sk_buff
*skb
)
1312 unsigned int offset
= skb_headlen(skb
);
1313 skb_frag_t frags
[MAX_SKB_FRAGS
];
1315 struct ubuf_info
*uarg
;
1316 struct sk_buff
*nskb
= skb_shinfo(skb
)->frag_list
;
1318 vif
->tx_zerocopy_sent
+= 2;
1319 vif
->tx_frag_overflow
++;
1321 xenvif_fill_frags(vif
, nskb
);
1322 /* Subtract frags size, we will correct it later */
1323 skb
->truesize
-= skb
->data_len
;
1324 skb
->len
+= nskb
->len
;
1325 skb
->data_len
+= nskb
->len
;
1327 /* create a brand new frags array and coalesce there */
1328 for (i
= 0; offset
< skb
->len
; i
++) {
1332 BUG_ON(i
>= MAX_SKB_FRAGS
);
1333 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
1336 skb
->truesize
+= skb
->data_len
;
1337 for (j
= 0; j
< i
; j
++)
1338 put_page(frags
[j
].page
.p
);
1342 if (offset
+ PAGE_SIZE
< skb
->len
)
1345 len
= skb
->len
- offset
;
1346 if (skb_copy_bits(skb
, offset
, page_address(page
), len
))
1350 frags
[i
].page
.p
= page
;
1351 frags
[i
].page_offset
= 0;
1352 skb_frag_size_set(&frags
[i
], len
);
1354 /* swap out with old one */
1355 memcpy(skb_shinfo(skb
)->frags
,
1357 i
* sizeof(skb_frag_t
));
1358 skb_shinfo(skb
)->nr_frags
= i
;
1359 skb
->truesize
+= i
* PAGE_SIZE
;
1361 /* remove traces of mapped pages and frag_list */
1362 skb_frag_list_init(skb
);
1363 uarg
= skb_shinfo(skb
)->destructor_arg
;
1364 uarg
->callback(uarg
, true);
1365 skb_shinfo(skb
)->destructor_arg
= NULL
;
1367 skb_shinfo(nskb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1373 static int xenvif_tx_submit(struct xenvif
*vif
)
1375 struct gnttab_map_grant_ref
*gop
= vif
->tx_map_ops
;
1376 struct sk_buff
*skb
;
1379 while ((skb
= __skb_dequeue(&vif
->tx_queue
)) != NULL
) {
1380 struct xen_netif_tx_request
*txp
;
1384 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1385 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1387 /* Check the remap error code. */
1388 if (unlikely(xenvif_tx_check_gop(vif
, skb
, &gop
))) {
1389 netdev_dbg(vif
->dev
, "netback grant failed.\n");
1390 skb_shinfo(skb
)->nr_frags
= 0;
1395 data_len
= skb
->len
;
1397 (void *)(idx_to_kaddr(vif
, pending_idx
)|txp
->offset
),
1399 vif
->pending_tx_info
[pending_idx
].callback_struct
.ctx
= NULL
;
1400 if (data_len
< txp
->size
) {
1401 /* Append the packet payload as a fragment. */
1402 txp
->offset
+= data_len
;
1403 txp
->size
-= data_len
;
1404 skb_shinfo(skb
)->destructor_arg
=
1405 &vif
->pending_tx_info
[pending_idx
].callback_struct
;
1407 /* Schedule a response immediately. */
1408 xenvif_idx_unmap(vif
, pending_idx
);
1411 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1412 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1413 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1414 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1416 xenvif_fill_frags(vif
, skb
);
1418 if (unlikely(skb_has_frag_list(skb
))) {
1419 if (xenvif_handle_frag_list(vif
, skb
)) {
1420 if (net_ratelimit())
1421 netdev_err(vif
->dev
,
1422 "Not enough memory to consolidate frag_list!\n");
1423 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1429 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < PKT_PROT_LEN
) {
1430 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1431 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1434 skb
->dev
= vif
->dev
;
1435 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1436 skb_reset_network_header(skb
);
1438 if (checksum_setup(vif
, skb
)) {
1439 netdev_dbg(vif
->dev
,
1440 "Can't setup checksum in net_tx_action\n");
1441 /* We have to set this flag to trigger the callback */
1442 if (skb_shinfo(skb
)->destructor_arg
)
1443 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1448 skb_probe_transport_header(skb
, 0);
1450 /* If the packet is GSO then we will have just set up the
1451 * transport header offset in checksum_setup so it's now
1452 * straightforward to calculate gso_segs.
1454 if (skb_is_gso(skb
)) {
1455 int mss
= skb_shinfo(skb
)->gso_size
;
1456 int hdrlen
= skb_transport_header(skb
) -
1457 skb_mac_header(skb
) +
1460 skb_shinfo(skb
)->gso_segs
=
1461 DIV_ROUND_UP(skb
->len
- hdrlen
, mss
);
1464 vif
->dev
->stats
.rx_bytes
+= skb
->len
;
1465 vif
->dev
->stats
.rx_packets
++;
1469 /* Set this flag right before netif_receive_skb, otherwise
1470 * someone might think this packet already left netback, and
1471 * do a skb_copy_ubufs while we are still in control of the
1472 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1474 if (skb_shinfo(skb
)->destructor_arg
) {
1475 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1476 vif
->tx_zerocopy_sent
++;
1479 netif_receive_skb(skb
);
1485 void xenvif_zerocopy_callback(struct ubuf_info
*ubuf
, bool zerocopy_success
)
1487 unsigned long flags
;
1488 pending_ring_idx_t index
;
1489 struct xenvif
*vif
= ubuf_to_vif(ubuf
);
1491 /* This is the only place where we grab this lock, to protect callbacks
1494 spin_lock_irqsave(&vif
->callback_lock
, flags
);
1496 u16 pending_idx
= ubuf
->desc
;
1497 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
1498 BUG_ON(vif
->dealloc_prod
- vif
->dealloc_cons
>=
1500 index
= pending_index(vif
->dealloc_prod
);
1501 vif
->dealloc_ring
[index
] = pending_idx
;
1502 /* Sync with xenvif_tx_dealloc_action:
1503 * insert idx then incr producer.
1506 vif
->dealloc_prod
++;
1508 wake_up(&vif
->dealloc_wq
);
1509 spin_unlock_irqrestore(&vif
->callback_lock
, flags
);
1511 if (RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
) &&
1512 xenvif_tx_pending_slots_available(vif
)) {
1514 napi_schedule(&vif
->napi
);
1518 if (likely(zerocopy_success
))
1519 vif
->tx_zerocopy_success
++;
1521 vif
->tx_zerocopy_fail
++;
1524 static inline void xenvif_tx_dealloc_action(struct xenvif
*vif
)
1526 struct gnttab_unmap_grant_ref
*gop
;
1527 pending_ring_idx_t dc
, dp
;
1528 u16 pending_idx
, pending_idx_release
[MAX_PENDING_REQS
];
1531 dc
= vif
->dealloc_cons
;
1532 gop
= vif
->tx_unmap_ops
;
1534 /* Free up any grants we have finished using */
1536 dp
= vif
->dealloc_prod
;
1538 /* Ensure we see all indices enqueued by all
1539 * xenvif_zerocopy_callback().
1544 BUG_ON(gop
- vif
->tx_unmap_ops
> MAX_PENDING_REQS
);
1546 vif
->dealloc_ring
[pending_index(dc
++)];
1548 pending_idx_release
[gop
-vif
->tx_unmap_ops
] =
1550 vif
->pages_to_unmap
[gop
-vif
->tx_unmap_ops
] =
1551 vif
->mmap_pages
[pending_idx
];
1552 gnttab_set_unmap_op(gop
,
1553 idx_to_kaddr(vif
, pending_idx
),
1555 vif
->grant_tx_handle
[pending_idx
]);
1556 /* Btw. already unmapped? */
1557 xenvif_grant_handle_reset(vif
, pending_idx
);
1561 } while (dp
!= vif
->dealloc_prod
);
1563 vif
->dealloc_cons
= dc
;
1565 if (gop
- vif
->tx_unmap_ops
> 0) {
1567 ret
= gnttab_unmap_refs(vif
->tx_unmap_ops
,
1569 vif
->pages_to_unmap
,
1570 gop
- vif
->tx_unmap_ops
);
1572 netdev_err(vif
->dev
, "Unmap fail: nr_ops %tx ret %d\n",
1573 gop
- vif
->tx_unmap_ops
, ret
);
1574 for (i
= 0; i
< gop
- vif
->tx_unmap_ops
; ++i
) {
1575 if (gop
[i
].status
!= GNTST_okay
)
1576 netdev_err(vif
->dev
,
1577 " host_addr: %llx handle: %x status: %d\n",
1586 for (i
= 0; i
< gop
- vif
->tx_unmap_ops
; ++i
)
1587 xenvif_idx_release(vif
, pending_idx_release
[i
],
1588 XEN_NETIF_RSP_OKAY
);
1592 /* Called after netfront has transmitted */
1593 int xenvif_tx_action(struct xenvif
*vif
, int budget
)
1598 if (unlikely(!tx_work_todo(vif
)))
1601 nr_gops
= xenvif_tx_build_gops(vif
, budget
);
1606 ret
= gnttab_map_refs(vif
->tx_map_ops
,
1612 work_done
= xenvif_tx_submit(vif
);
1617 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
1620 struct pending_tx_info
*pending_tx_info
;
1621 pending_ring_idx_t index
;
1622 unsigned long flags
;
1624 pending_tx_info
= &vif
->pending_tx_info
[pending_idx
];
1625 spin_lock_irqsave(&vif
->response_lock
, flags
);
1626 make_tx_response(vif
, &pending_tx_info
->req
, status
);
1627 index
= pending_index(vif
->pending_prod
);
1628 vif
->pending_ring
[index
] = pending_idx
;
1629 /* TX shouldn't use the index before we give it back here */
1631 vif
->pending_prod
++;
1632 spin_unlock_irqrestore(&vif
->response_lock
, flags
);
1636 static void make_tx_response(struct xenvif
*vif
,
1637 struct xen_netif_tx_request
*txp
,
1640 RING_IDX i
= vif
->tx
.rsp_prod_pvt
;
1641 struct xen_netif_tx_response
*resp
;
1644 resp
= RING_GET_RESPONSE(&vif
->tx
, i
);
1648 if (txp
->flags
& XEN_NETTXF_extra_info
)
1649 RING_GET_RESPONSE(&vif
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1651 vif
->tx
.rsp_prod_pvt
= ++i
;
1652 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->tx
, notify
);
1654 notify_remote_via_irq(vif
->tx_irq
);
1657 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
1664 RING_IDX i
= vif
->rx
.rsp_prod_pvt
;
1665 struct xen_netif_rx_response
*resp
;
1667 resp
= RING_GET_RESPONSE(&vif
->rx
, i
);
1668 resp
->offset
= offset
;
1669 resp
->flags
= flags
;
1671 resp
->status
= (s16
)size
;
1673 resp
->status
= (s16
)st
;
1675 vif
->rx
.rsp_prod_pvt
= ++i
;
1680 void xenvif_idx_unmap(struct xenvif
*vif
, u16 pending_idx
)
1683 struct gnttab_unmap_grant_ref tx_unmap_op
;
1685 gnttab_set_unmap_op(&tx_unmap_op
,
1686 idx_to_kaddr(vif
, pending_idx
),
1688 vif
->grant_tx_handle
[pending_idx
]);
1689 /* Btw. already unmapped? */
1690 xenvif_grant_handle_reset(vif
, pending_idx
);
1692 ret
= gnttab_unmap_refs(&tx_unmap_op
, NULL
,
1693 &vif
->mmap_pages
[pending_idx
], 1);
1696 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1699 static inline int rx_work_todo(struct xenvif
*vif
)
1701 return (!skb_queue_empty(&vif
->rx_queue
) &&
1702 xenvif_rx_ring_slots_available(vif
, vif
->rx_last_skb_slots
)) ||
1703 vif
->rx_queue_purge
;
1706 static inline int tx_work_todo(struct xenvif
*vif
)
1709 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
)) &&
1710 xenvif_tx_pending_slots_available(vif
))
1716 static inline bool tx_dealloc_work_todo(struct xenvif
*vif
)
1718 return vif
->dealloc_cons
!= vif
->dealloc_prod
;
1721 void xenvif_unmap_frontend_rings(struct xenvif
*vif
)
1724 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1727 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1731 int xenvif_map_frontend_rings(struct xenvif
*vif
,
1732 grant_ref_t tx_ring_ref
,
1733 grant_ref_t rx_ring_ref
)
1736 struct xen_netif_tx_sring
*txs
;
1737 struct xen_netif_rx_sring
*rxs
;
1741 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1742 tx_ring_ref
, &addr
);
1746 txs
= (struct xen_netif_tx_sring
*)addr
;
1747 BACK_RING_INIT(&vif
->tx
, txs
, PAGE_SIZE
);
1749 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1750 rx_ring_ref
, &addr
);
1754 rxs
= (struct xen_netif_rx_sring
*)addr
;
1755 BACK_RING_INIT(&vif
->rx
, rxs
, PAGE_SIZE
);
1760 xenvif_unmap_frontend_rings(vif
);
1764 void xenvif_stop_queue(struct xenvif
*vif
)
1766 if (!vif
->can_queue
)
1769 netif_stop_queue(vif
->dev
);
1772 static void xenvif_start_queue(struct xenvif
*vif
)
1774 if (xenvif_schedulable(vif
))
1775 netif_wake_queue(vif
->dev
);
1778 int xenvif_kthread_guest_rx(void *data
)
1780 struct xenvif
*vif
= data
;
1781 struct sk_buff
*skb
;
1783 while (!kthread_should_stop()) {
1784 wait_event_interruptible(vif
->wq
,
1785 rx_work_todo(vif
) ||
1786 kthread_should_stop());
1787 if (kthread_should_stop())
1790 if (vif
->rx_queue_purge
) {
1791 skb_queue_purge(&vif
->rx_queue
);
1792 vif
->rx_queue_purge
= false;
1795 if (!skb_queue_empty(&vif
->rx_queue
))
1796 xenvif_rx_action(vif
);
1798 if (skb_queue_empty(&vif
->rx_queue
) &&
1799 netif_queue_stopped(vif
->dev
)) {
1800 del_timer_sync(&vif
->wake_queue
);
1801 xenvif_start_queue(vif
);
1807 /* Bin any remaining skbs */
1808 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
)
1814 int xenvif_dealloc_kthread(void *data
)
1816 struct xenvif
*vif
= data
;
1818 while (!kthread_should_stop()) {
1819 wait_event_interruptible(vif
->dealloc_wq
,
1820 tx_dealloc_work_todo(vif
) ||
1821 kthread_should_stop());
1822 if (kthread_should_stop())
1825 xenvif_tx_dealloc_action(vif
);
1829 /* Unmap anything remaining*/
1830 if (tx_dealloc_work_todo(vif
))
1831 xenvif_tx_dealloc_action(vif
);
1836 static int __init
netback_init(void)
1843 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
1844 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1845 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
1846 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
1849 rc
= xenvif_xenbus_init();
1853 rx_drain_timeout_jiffies
= msecs_to_jiffies(rx_drain_timeout_msecs
);
1861 module_init(netback_init
);
1863 static void __exit
netback_fini(void)
1865 xenvif_xenbus_fini();
1867 module_exit(netback_fini
);
1869 MODULE_LICENSE("Dual BSD/GPL");
1870 MODULE_ALIAS("xen-backend:vif");