2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 #include <linux/highmem.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
48 #include <asm/xen/hypercall.h>
49 #include <asm/xen/page.h>
51 /* Provide an option to disable split event channels at load time as
52 * event channels are limited resource. Split event channels are
55 bool separate_tx_rx_irq
= 1;
56 module_param(separate_tx_rx_irq
, bool, 0644);
58 /* When guest ring is filled up, qdisc queues the packets for us, but we have
59 * to timeout them, otherwise other guests' packets can get stuck there
61 unsigned int rx_drain_timeout_msecs
= 10000;
62 module_param(rx_drain_timeout_msecs
, uint
, 0444);
63 unsigned int rx_drain_timeout_jiffies
;
66 * This is the maximum slots a skb can have. If a guest sends a skb
67 * which exceeds this limit it is considered malicious.
69 #define FATAL_SKB_SLOTS_DEFAULT 20
70 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
71 module_param(fatal_skb_slots
, uint
, 0444);
73 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
76 static void make_tx_response(struct xenvif
*vif
,
77 struct xen_netif_tx_request
*txp
,
80 static inline int tx_work_todo(struct xenvif
*vif
);
81 static inline int rx_work_todo(struct xenvif
*vif
);
83 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
90 static inline unsigned long idx_to_pfn(struct xenvif
*vif
,
93 return page_to_pfn(vif
->mmap_pages
[idx
]);
96 static inline unsigned long idx_to_kaddr(struct xenvif
*vif
,
99 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif
, idx
));
102 #define callback_param(vif, pending_idx) \
103 (vif->pending_tx_info[pending_idx].callback_struct)
105 /* Find the containing VIF's structure from a pointer in pending_tx_info array
107 static inline struct xenvif
*ubuf_to_vif(const struct ubuf_info
*ubuf
)
109 u16 pending_idx
= ubuf
->desc
;
110 struct pending_tx_info
*temp
=
111 container_of(ubuf
, struct pending_tx_info
, callback_struct
);
112 return container_of(temp
- pending_idx
,
117 /* This is a miniumum size for the linear area to avoid lots of
118 * calls to __pskb_pull_tail() as we set up checksum offsets. The
119 * value 128 was chosen as it covers all IPv4 and most likely
122 #define PKT_PROT_LEN 128
124 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
126 return (u16
)frag
->page_offset
;
129 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
131 frag
->page_offset
= pending_idx
;
134 static inline pending_ring_idx_t
pending_index(unsigned i
)
136 return i
& (MAX_PENDING_REQS
-1);
139 bool xenvif_rx_ring_slots_available(struct xenvif
*vif
, int needed
)
144 prod
= vif
->rx
.sring
->req_prod
;
145 cons
= vif
->rx
.req_cons
;
147 if (prod
- cons
>= needed
)
150 vif
->rx
.sring
->req_event
= prod
+ 1;
152 /* Make sure event is visible before we check prod
156 } while (vif
->rx
.sring
->req_prod
!= prod
);
162 * Returns true if we should start a new receive buffer instead of
163 * adding 'size' bytes to a buffer which currently contains 'offset'
166 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
)
168 /* simple case: we have completely filled the current buffer. */
169 if (offset
== MAX_BUFFER_OFFSET
)
173 * complex case: start a fresh buffer if the current frag
174 * would overflow the current buffer but only if:
175 * (i) this frag would fit completely in the next buffer
176 * and (ii) there is already some data in the current buffer
177 * and (iii) this is not the head buffer.
180 * - (i) stops us splitting a frag into two copies
181 * unless the frag is too large for a single buffer.
182 * - (ii) stops us from leaving a buffer pointlessly empty.
183 * - (iii) stops us leaving the first buffer
184 * empty. Strictly speaking this is already covered
185 * by (ii) but is explicitly checked because
186 * netfront relies on the first buffer being
187 * non-empty and can crash otherwise.
189 * This means we will effectively linearise small
190 * frags but do not needlessly split large buffers
191 * into multiple copies tend to give large frags their
192 * own buffers as before.
194 BUG_ON(size
> MAX_BUFFER_OFFSET
);
195 if ((offset
+ size
> MAX_BUFFER_OFFSET
) && offset
&& !head
)
201 struct netrx_pending_operations
{
202 unsigned copy_prod
, copy_cons
;
203 unsigned meta_prod
, meta_cons
;
204 struct gnttab_copy
*copy
;
205 struct xenvif_rx_meta
*meta
;
207 grant_ref_t copy_gref
;
210 static struct xenvif_rx_meta
*get_next_rx_buffer(struct xenvif
*vif
,
211 struct netrx_pending_operations
*npo
)
213 struct xenvif_rx_meta
*meta
;
214 struct xen_netif_rx_request
*req
;
216 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
218 meta
= npo
->meta
+ npo
->meta_prod
++;
219 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
225 npo
->copy_gref
= req
->gref
;
231 * Set up the grant operations for this fragment. If it's a flipping
232 * interface, we also set up the unmap request from here.
234 static void xenvif_gop_frag_copy(struct xenvif
*vif
, struct sk_buff
*skb
,
235 struct netrx_pending_operations
*npo
,
236 struct page
*page
, unsigned long size
,
237 unsigned long offset
, int *head
,
238 struct xenvif
*foreign_vif
,
239 grant_ref_t foreign_gref
)
241 struct gnttab_copy
*copy_gop
;
242 struct xenvif_rx_meta
*meta
;
244 int gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
246 /* Data must not cross a page boundary. */
247 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
249 meta
= npo
->meta
+ npo
->meta_prod
- 1;
251 /* Skip unused frames from start of page */
252 page
+= offset
>> PAGE_SHIFT
;
253 offset
&= ~PAGE_MASK
;
256 BUG_ON(offset
>= PAGE_SIZE
);
257 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
259 bytes
= PAGE_SIZE
- offset
;
264 if (start_new_rx_buffer(npo
->copy_off
, bytes
, *head
)) {
266 * Netfront requires there to be some data in the head
271 meta
= get_next_rx_buffer(vif
, npo
);
274 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
275 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
277 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
278 copy_gop
->flags
= GNTCOPY_dest_gref
;
279 copy_gop
->len
= bytes
;
282 copy_gop
->source
.domid
= foreign_vif
->domid
;
283 copy_gop
->source
.u
.ref
= foreign_gref
;
284 copy_gop
->flags
|= GNTCOPY_source_gref
;
286 copy_gop
->source
.domid
= DOMID_SELF
;
287 copy_gop
->source
.u
.gmfn
=
288 virt_to_mfn(page_address(page
));
290 copy_gop
->source
.offset
= offset
;
292 copy_gop
->dest
.domid
= vif
->domid
;
293 copy_gop
->dest
.offset
= npo
->copy_off
;
294 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
296 npo
->copy_off
+= bytes
;
303 if (offset
== PAGE_SIZE
&& size
) {
304 BUG_ON(!PageCompound(page
));
309 /* Leave a gap for the GSO descriptor. */
310 if (skb_is_gso(skb
)) {
311 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
312 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
313 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
314 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
317 if (*head
&& ((1 << gso_type
) & vif
->gso_mask
))
320 *head
= 0; /* There must be something in this buffer now. */
326 * Find the grant ref for a given frag in a chain of struct ubuf_info's
327 * skb: the skb itself
328 * i: the frag's number
329 * ubuf: a pointer to an element in the chain. It should not be NULL
331 * Returns a pointer to the element in the chain where the page were found. If
332 * not found, returns NULL.
333 * See the definition of callback_struct in common.h for more details about
336 static const struct ubuf_info
*xenvif_find_gref(const struct sk_buff
*const skb
,
338 const struct ubuf_info
*ubuf
)
340 struct xenvif
*foreign_vif
= ubuf_to_vif(ubuf
);
343 u16 pending_idx
= ubuf
->desc
;
345 if (skb_shinfo(skb
)->frags
[i
].page
.p
==
346 foreign_vif
->mmap_pages
[pending_idx
])
348 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
355 * Prepare an SKB to be transmitted to the frontend.
357 * This function is responsible for allocating grant operations, meta
360 * It returns the number of meta structures consumed. The number of
361 * ring slots used is always equal to the number of meta slots used
362 * plus the number of GSO descriptors used. Currently, we use either
363 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
364 * frontend-side LRO).
366 static int xenvif_gop_skb(struct sk_buff
*skb
,
367 struct netrx_pending_operations
*npo
)
369 struct xenvif
*vif
= netdev_priv(skb
->dev
);
370 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
372 struct xen_netif_rx_request
*req
;
373 struct xenvif_rx_meta
*meta
;
378 const struct ubuf_info
*ubuf
= skb_shinfo(skb
)->destructor_arg
;
379 const struct ubuf_info
*const head_ubuf
= ubuf
;
381 old_meta_prod
= npo
->meta_prod
;
383 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
384 if (skb_is_gso(skb
)) {
385 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
386 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
387 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
388 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
391 /* Set up a GSO prefix descriptor, if necessary */
392 if ((1 << gso_type
) & vif
->gso_prefix_mask
) {
393 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
394 meta
= npo
->meta
+ npo
->meta_prod
++;
395 meta
->gso_type
= gso_type
;
396 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
401 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
402 meta
= npo
->meta
+ npo
->meta_prod
++;
404 if ((1 << gso_type
) & vif
->gso_mask
) {
405 meta
->gso_type
= gso_type
;
406 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
408 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
415 npo
->copy_gref
= req
->gref
;
418 while (data
< skb_tail_pointer(skb
)) {
419 unsigned int offset
= offset_in_page(data
);
420 unsigned int len
= PAGE_SIZE
- offset
;
422 if (data
+ len
> skb_tail_pointer(skb
))
423 len
= skb_tail_pointer(skb
) - data
;
425 xenvif_gop_frag_copy(vif
, skb
, npo
,
426 virt_to_page(data
), len
, offset
, &head
,
432 for (i
= 0; i
< nr_frags
; i
++) {
433 /* This variable also signals whether foreign_gref has a real
436 struct xenvif
*foreign_vif
= NULL
;
437 grant_ref_t foreign_gref
;
439 if ((skb_shinfo(skb
)->tx_flags
& SKBTX_DEV_ZEROCOPY
) &&
440 (ubuf
->callback
== &xenvif_zerocopy_callback
)) {
441 const struct ubuf_info
*const startpoint
= ubuf
;
443 /* Ideally ubuf points to the chain element which
444 * belongs to this frag. Or if frags were removed from
445 * the beginning, then shortly before it.
447 ubuf
= xenvif_find_gref(skb
, i
, ubuf
);
449 /* Try again from the beginning of the list, if we
450 * haven't tried from there. This only makes sense in
451 * the unlikely event of reordering the original frags.
452 * For injected local pages it's an unnecessary second
455 if (unlikely(!ubuf
) && startpoint
!= head_ubuf
)
456 ubuf
= xenvif_find_gref(skb
, i
, head_ubuf
);
459 u16 pending_idx
= ubuf
->desc
;
461 foreign_vif
= ubuf_to_vif(ubuf
);
462 foreign_gref
= foreign_vif
->pending_tx_info
[pending_idx
].req
.gref
;
463 /* Just a safety measure. If this was the last
464 * element on the list, the for loop will
465 * iterate again if a local page were added to
466 * the end. Using head_ubuf here prevents the
467 * second search on the chain. Or the original
468 * frags changed order, but that's less likely.
469 * In any way, ubuf shouldn't be NULL.
472 (struct ubuf_info
*) ubuf
->ctx
:
475 /* This frag was a local page, added to the
476 * array after the skb left netback.
480 xenvif_gop_frag_copy(vif
, skb
, npo
,
481 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
482 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
483 skb_shinfo(skb
)->frags
[i
].page_offset
,
486 foreign_vif
? foreign_gref
: UINT_MAX
);
489 return npo
->meta_prod
- old_meta_prod
;
493 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
494 * used to set up the operations on the top of
495 * netrx_pending_operations, which have since been done. Check that
496 * they didn't give any errors and advance over them.
498 static int xenvif_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
499 struct netrx_pending_operations
*npo
)
501 struct gnttab_copy
*copy_op
;
502 int status
= XEN_NETIF_RSP_OKAY
;
505 for (i
= 0; i
< nr_meta_slots
; i
++) {
506 copy_op
= npo
->copy
+ npo
->copy_cons
++;
507 if (copy_op
->status
!= GNTST_okay
) {
509 "Bad status %d from copy to DOM%d.\n",
510 copy_op
->status
, vif
->domid
);
511 status
= XEN_NETIF_RSP_ERROR
;
518 static void xenvif_add_frag_responses(struct xenvif
*vif
, int status
,
519 struct xenvif_rx_meta
*meta
,
523 unsigned long offset
;
525 /* No fragments used */
526 if (nr_meta_slots
<= 1)
531 for (i
= 0; i
< nr_meta_slots
; i
++) {
533 if (i
== nr_meta_slots
- 1)
536 flags
= XEN_NETRXF_more_data
;
539 make_rx_response(vif
, meta
[i
].id
, status
, offset
,
540 meta
[i
].size
, flags
);
544 struct xenvif_rx_cb
{
548 #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
550 void xenvif_kick_thread(struct xenvif
*vif
)
555 static void xenvif_rx_action(struct xenvif
*vif
)
559 struct xen_netif_rx_response
*resp
;
560 struct sk_buff_head rxq
;
564 unsigned long offset
;
565 bool need_to_notify
= false;
567 struct netrx_pending_operations npo
= {
568 .copy
= vif
->grant_copy_op
,
572 skb_queue_head_init(&rxq
);
574 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
) {
575 RING_IDX max_slots_needed
;
576 RING_IDX old_req_cons
;
577 RING_IDX ring_slots_used
;
580 /* We need a cheap worse case estimate for the number of
584 max_slots_needed
= DIV_ROUND_UP(offset_in_page(skb
->data
) +
587 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
591 size
= skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
592 offset
= skb_shinfo(skb
)->frags
[i
].page_offset
;
594 /* For a worse-case estimate we need to factor in
595 * the fragment page offset as this will affect the
596 * number of times xenvif_gop_frag_copy() will
597 * call start_new_rx_buffer().
599 max_slots_needed
+= DIV_ROUND_UP(offset
+ size
,
603 /* To avoid the estimate becoming too pessimal for some
604 * frontends that limit posted rx requests, cap the estimate
607 if (max_slots_needed
> MAX_SKB_FRAGS
)
608 max_slots_needed
= MAX_SKB_FRAGS
;
610 /* We may need one more slot for GSO metadata */
611 if (skb_is_gso(skb
) &&
612 (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
||
613 skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
))
616 /* If the skb may not fit then bail out now */
617 if (!xenvif_rx_ring_slots_available(vif
, max_slots_needed
)) {
618 skb_queue_head(&vif
->rx_queue
, skb
);
619 need_to_notify
= true;
620 vif
->rx_last_skb_slots
= max_slots_needed
;
623 vif
->rx_last_skb_slots
= 0;
625 old_req_cons
= vif
->rx
.req_cons
;
626 XENVIF_RX_CB(skb
)->meta_slots_used
= xenvif_gop_skb(skb
, &npo
);
627 ring_slots_used
= vif
->rx
.req_cons
- old_req_cons
;
629 BUG_ON(ring_slots_used
> max_slots_needed
);
631 __skb_queue_tail(&rxq
, skb
);
634 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(vif
->meta
));
639 BUG_ON(npo
.copy_prod
> MAX_GRANT_COPY_OPS
);
640 gnttab_batch_copy(vif
->grant_copy_op
, npo
.copy_prod
);
642 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
644 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
645 vif
->gso_prefix_mask
) {
646 resp
= RING_GET_RESPONSE(&vif
->rx
,
647 vif
->rx
.rsp_prod_pvt
++);
649 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
651 resp
->offset
= vif
->meta
[npo
.meta_cons
].gso_size
;
652 resp
->id
= vif
->meta
[npo
.meta_cons
].id
;
653 resp
->status
= XENVIF_RX_CB(skb
)->meta_slots_used
;
656 XENVIF_RX_CB(skb
)->meta_slots_used
--;
660 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
661 vif
->dev
->stats
.tx_packets
++;
663 status
= xenvif_check_gop(vif
,
664 XENVIF_RX_CB(skb
)->meta_slots_used
,
667 if (XENVIF_RX_CB(skb
)->meta_slots_used
== 1)
670 flags
= XEN_NETRXF_more_data
;
672 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
673 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
674 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
675 /* remote but checksummed. */
676 flags
|= XEN_NETRXF_data_validated
;
679 resp
= make_rx_response(vif
, vif
->meta
[npo
.meta_cons
].id
,
681 vif
->meta
[npo
.meta_cons
].size
,
684 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
686 struct xen_netif_extra_info
*gso
=
687 (struct xen_netif_extra_info
*)
688 RING_GET_RESPONSE(&vif
->rx
,
689 vif
->rx
.rsp_prod_pvt
++);
691 resp
->flags
|= XEN_NETRXF_extra_info
;
693 gso
->u
.gso
.type
= vif
->meta
[npo
.meta_cons
].gso_type
;
694 gso
->u
.gso
.size
= vif
->meta
[npo
.meta_cons
].gso_size
;
696 gso
->u
.gso
.features
= 0;
698 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
702 xenvif_add_frag_responses(vif
, status
,
703 vif
->meta
+ npo
.meta_cons
+ 1,
704 XENVIF_RX_CB(skb
)->meta_slots_used
);
706 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->rx
, ret
);
708 need_to_notify
|= !!ret
;
710 npo
.meta_cons
+= XENVIF_RX_CB(skb
)->meta_slots_used
;
716 notify_remote_via_irq(vif
->rx_irq
);
719 void xenvif_napi_schedule_or_enable_events(struct xenvif
*vif
)
723 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
726 napi_schedule(&vif
->napi
);
729 static void tx_add_credit(struct xenvif
*vif
)
731 unsigned long max_burst
, max_credit
;
734 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
735 * Otherwise the interface can seize up due to insufficient credit.
737 max_burst
= RING_GET_REQUEST(&vif
->tx
, vif
->tx
.req_cons
)->size
;
738 max_burst
= min(max_burst
, 131072UL);
739 max_burst
= max(max_burst
, vif
->credit_bytes
);
741 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
742 max_credit
= vif
->remaining_credit
+ vif
->credit_bytes
;
743 if (max_credit
< vif
->remaining_credit
)
744 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
746 vif
->remaining_credit
= min(max_credit
, max_burst
);
749 static void tx_credit_callback(unsigned long data
)
751 struct xenvif
*vif
= (struct xenvif
*)data
;
753 xenvif_napi_schedule_or_enable_events(vif
);
756 static void xenvif_tx_err(struct xenvif
*vif
,
757 struct xen_netif_tx_request
*txp
, RING_IDX end
)
759 RING_IDX cons
= vif
->tx
.req_cons
;
763 spin_lock_irqsave(&vif
->response_lock
, flags
);
764 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
765 spin_unlock_irqrestore(&vif
->response_lock
, flags
);
768 txp
= RING_GET_REQUEST(&vif
->tx
, cons
++);
770 vif
->tx
.req_cons
= cons
;
773 static void xenvif_fatal_tx_err(struct xenvif
*vif
)
775 netdev_err(vif
->dev
, "fatal error; disabling device\n");
776 vif
->disabled
= true;
777 xenvif_kick_thread(vif
);
780 static int xenvif_count_requests(struct xenvif
*vif
,
781 struct xen_netif_tx_request
*first
,
782 struct xen_netif_tx_request
*txp
,
785 RING_IDX cons
= vif
->tx
.req_cons
;
790 if (!(first
->flags
& XEN_NETTXF_more_data
))
794 struct xen_netif_tx_request dropped_tx
= { 0 };
796 if (slots
>= work_to_do
) {
798 "Asked for %d slots but exceeds this limit\n",
800 xenvif_fatal_tx_err(vif
);
804 /* This guest is really using too many slots and
805 * considered malicious.
807 if (unlikely(slots
>= fatal_skb_slots
)) {
809 "Malicious frontend using %d slots, threshold %u\n",
810 slots
, fatal_skb_slots
);
811 xenvif_fatal_tx_err(vif
);
815 /* Xen network protocol had implicit dependency on
816 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
817 * the historical MAX_SKB_FRAGS value 18 to honor the
818 * same behavior as before. Any packet using more than
819 * 18 slots but less than fatal_skb_slots slots is
822 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
825 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
826 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
833 memcpy(txp
, RING_GET_REQUEST(&vif
->tx
, cons
+ slots
),
836 /* If the guest submitted a frame >= 64 KiB then
837 * first->size overflowed and following slots will
838 * appear to be larger than the frame.
840 * This cannot be fatal error as there are buggy
841 * frontends that do this.
843 * Consume all slots and drop the packet.
845 if (!drop_err
&& txp
->size
> first
->size
) {
848 "Invalid tx request, slot size %u > remaining size %u\n",
849 txp
->size
, first
->size
);
853 first
->size
-= txp
->size
;
856 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
857 netdev_err(vif
->dev
, "Cross page boundary, txp->offset: %x, size: %u\n",
858 txp
->offset
, txp
->size
);
859 xenvif_fatal_tx_err(vif
);
863 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
871 xenvif_tx_err(vif
, first
, cons
+ slots
);
879 struct xenvif_tx_cb
{
883 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
885 static inline void xenvif_tx_create_map_op(struct xenvif
*vif
,
887 struct xen_netif_tx_request
*txp
,
888 struct gnttab_map_grant_ref
*mop
)
890 vif
->pages_to_map
[mop
-vif
->tx_map_ops
] = vif
->mmap_pages
[pending_idx
];
891 gnttab_set_map_op(mop
, idx_to_kaddr(vif
, pending_idx
),
892 GNTMAP_host_map
| GNTMAP_readonly
,
893 txp
->gref
, vif
->domid
);
895 memcpy(&vif
->pending_tx_info
[pending_idx
].req
, txp
,
899 static inline struct sk_buff
*xenvif_alloc_skb(unsigned int size
)
901 struct sk_buff
*skb
=
902 alloc_skb(size
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
903 GFP_ATOMIC
| __GFP_NOWARN
);
904 if (unlikely(skb
== NULL
))
907 /* Packets passed to netif_rx() must have some headroom. */
908 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
910 /* Initialize it here to avoid later surprises */
911 skb_shinfo(skb
)->destructor_arg
= NULL
;
916 static struct gnttab_map_grant_ref
*xenvif_get_requests(struct xenvif
*vif
,
918 struct xen_netif_tx_request
*txp
,
919 struct gnttab_map_grant_ref
*gop
)
921 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
922 skb_frag_t
*frags
= shinfo
->frags
;
923 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
925 pending_ring_idx_t index
;
926 unsigned int nr_slots
, frag_overflow
= 0;
928 /* At this point shinfo->nr_frags is in fact the number of
929 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
931 if (shinfo
->nr_frags
> MAX_SKB_FRAGS
) {
932 frag_overflow
= shinfo
->nr_frags
- MAX_SKB_FRAGS
;
933 BUG_ON(frag_overflow
> MAX_SKB_FRAGS
);
934 shinfo
->nr_frags
= MAX_SKB_FRAGS
;
936 nr_slots
= shinfo
->nr_frags
;
938 /* Skip first skb fragment if it is on same page as header fragment. */
939 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
941 for (shinfo
->nr_frags
= start
; shinfo
->nr_frags
< nr_slots
;
942 shinfo
->nr_frags
++, txp
++, gop
++) {
943 index
= pending_index(vif
->pending_cons
++);
944 pending_idx
= vif
->pending_ring
[index
];
945 xenvif_tx_create_map_op(vif
, pending_idx
, txp
, gop
);
946 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], pending_idx
);
950 struct sk_buff
*nskb
= xenvif_alloc_skb(0);
951 if (unlikely(nskb
== NULL
)) {
954 "Can't allocate the frag_list skb.\n");
958 shinfo
= skb_shinfo(nskb
);
959 frags
= shinfo
->frags
;
961 for (shinfo
->nr_frags
= 0; shinfo
->nr_frags
< frag_overflow
;
962 shinfo
->nr_frags
++, txp
++, gop
++) {
963 index
= pending_index(vif
->pending_cons
++);
964 pending_idx
= vif
->pending_ring
[index
];
965 xenvif_tx_create_map_op(vif
, pending_idx
, txp
, gop
);
966 frag_set_pending_idx(&frags
[shinfo
->nr_frags
],
970 skb_shinfo(skb
)->frag_list
= nskb
;
976 static inline void xenvif_grant_handle_set(struct xenvif
*vif
,
978 grant_handle_t handle
)
980 if (unlikely(vif
->grant_tx_handle
[pending_idx
] !=
981 NETBACK_INVALID_HANDLE
)) {
983 "Trying to overwrite active handle! pending_idx: %x\n",
987 vif
->grant_tx_handle
[pending_idx
] = handle
;
990 static inline void xenvif_grant_handle_reset(struct xenvif
*vif
,
993 if (unlikely(vif
->grant_tx_handle
[pending_idx
] ==
994 NETBACK_INVALID_HANDLE
)) {
996 "Trying to unmap invalid handle! pending_idx: %x\n",
1000 vif
->grant_tx_handle
[pending_idx
] = NETBACK_INVALID_HANDLE
;
1003 static int xenvif_tx_check_gop(struct xenvif
*vif
,
1004 struct sk_buff
*skb
,
1005 struct gnttab_map_grant_ref
**gopp_map
,
1006 struct gnttab_copy
**gopp_copy
)
1008 struct gnttab_map_grant_ref
*gop_map
= *gopp_map
;
1009 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1010 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1011 int nr_frags
= shinfo
->nr_frags
;
1013 struct sk_buff
*first_skb
= NULL
;
1015 /* Check status of header. */
1016 err
= (*gopp_copy
)->status
;
1018 if (unlikely(err
)) {
1019 if (net_ratelimit())
1020 netdev_dbg(vif
->dev
,
1021 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
1022 (*gopp_copy
)->status
,
1024 (*gopp_copy
)->source
.u
.ref
);
1025 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1029 for (i
= 0; i
< nr_frags
; i
++, gop_map
++) {
1032 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
1034 /* Check error status: if okay then remember grant handle. */
1035 newerr
= gop_map
->status
;
1037 if (likely(!newerr
)) {
1038 xenvif_grant_handle_set(vif
,
1041 /* Had a previous error? Invalidate this fragment. */
1043 xenvif_idx_unmap(vif
, pending_idx
);
1047 /* Error on this fragment: respond to client with an error. */
1048 if (net_ratelimit())
1049 netdev_dbg(vif
->dev
,
1050 "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
1055 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1057 /* Not the first error? Preceding frags already invalidated. */
1060 /* First error: invalidate preceding fragments. */
1061 for (j
= 0; j
< i
; j
++) {
1062 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1063 xenvif_idx_unmap(vif
, pending_idx
);
1066 /* Remember the error: invalidate all subsequent fragments. */
1070 if (skb_has_frag_list(skb
)) {
1072 skb
= shinfo
->frag_list
;
1073 shinfo
= skb_shinfo(skb
);
1074 nr_frags
= shinfo
->nr_frags
;
1079 /* There was a mapping error in the frag_list skb. We have to unmap
1080 * the first skb's frags
1082 if (first_skb
&& err
) {
1084 shinfo
= skb_shinfo(first_skb
);
1085 for (j
= 0; j
< shinfo
->nr_frags
; j
++) {
1086 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1087 xenvif_idx_unmap(vif
, pending_idx
);
1091 *gopp_map
= gop_map
;
1095 static void xenvif_fill_frags(struct xenvif
*vif
, struct sk_buff
*skb
)
1097 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1098 int nr_frags
= shinfo
->nr_frags
;
1100 u16 prev_pending_idx
= INVALID_PENDING_IDX
;
1102 for (i
= 0; i
< nr_frags
; i
++) {
1103 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1104 struct xen_netif_tx_request
*txp
;
1108 pending_idx
= frag_get_pending_idx(frag
);
1110 /* If this is not the first frag, chain it to the previous*/
1111 if (prev_pending_idx
== INVALID_PENDING_IDX
)
1112 skb_shinfo(skb
)->destructor_arg
=
1113 &callback_param(vif
, pending_idx
);
1115 callback_param(vif
, prev_pending_idx
).ctx
=
1116 &callback_param(vif
, pending_idx
);
1118 callback_param(vif
, pending_idx
).ctx
= NULL
;
1119 prev_pending_idx
= pending_idx
;
1121 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1122 page
= virt_to_page(idx_to_kaddr(vif
, pending_idx
));
1123 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1124 skb
->len
+= txp
->size
;
1125 skb
->data_len
+= txp
->size
;
1126 skb
->truesize
+= txp
->size
;
1128 /* Take an extra reference to offset network stack's put_page */
1129 get_page(vif
->mmap_pages
[pending_idx
]);
1131 /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1132 * overlaps with "index", and "mapping" is not set. I think mapping
1133 * should be set. If delivered to local stack, it would drop this
1134 * skb in sk_filter unless the socket has the right to use it.
1136 skb
->pfmemalloc
= false;
1139 static int xenvif_get_extras(struct xenvif
*vif
,
1140 struct xen_netif_extra_info
*extras
,
1143 struct xen_netif_extra_info extra
;
1144 RING_IDX cons
= vif
->tx
.req_cons
;
1147 if (unlikely(work_to_do
-- <= 0)) {
1148 netdev_err(vif
->dev
, "Missing extra info\n");
1149 xenvif_fatal_tx_err(vif
);
1153 memcpy(&extra
, RING_GET_REQUEST(&vif
->tx
, cons
),
1155 if (unlikely(!extra
.type
||
1156 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1157 vif
->tx
.req_cons
= ++cons
;
1158 netdev_err(vif
->dev
,
1159 "Invalid extra type: %d\n", extra
.type
);
1160 xenvif_fatal_tx_err(vif
);
1164 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1165 vif
->tx
.req_cons
= ++cons
;
1166 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1171 static int xenvif_set_skb_gso(struct xenvif
*vif
,
1172 struct sk_buff
*skb
,
1173 struct xen_netif_extra_info
*gso
)
1175 if (!gso
->u
.gso
.size
) {
1176 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1177 xenvif_fatal_tx_err(vif
);
1181 switch (gso
->u
.gso
.type
) {
1182 case XEN_NETIF_GSO_TYPE_TCPV4
:
1183 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1185 case XEN_NETIF_GSO_TYPE_TCPV6
:
1186 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1189 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1190 xenvif_fatal_tx_err(vif
);
1194 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1195 /* gso_segs will be calculated later */
1200 static int checksum_setup(struct xenvif
*vif
, struct sk_buff
*skb
)
1202 bool recalculate_partial_csum
= false;
1204 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1205 * peers can fail to set NETRXF_csum_blank when sending a GSO
1206 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1207 * recalculate the partial checksum.
1209 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1210 vif
->rx_gso_checksum_fixup
++;
1211 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1212 recalculate_partial_csum
= true;
1215 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1216 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1219 return skb_checksum_setup(skb
, recalculate_partial_csum
);
1222 static bool tx_credit_exceeded(struct xenvif
*vif
, unsigned size
)
1224 u64 now
= get_jiffies_64();
1225 u64 next_credit
= vif
->credit_window_start
+
1226 msecs_to_jiffies(vif
->credit_usec
/ 1000);
1228 /* Timer could already be pending in rare cases. */
1229 if (timer_pending(&vif
->credit_timeout
))
1232 /* Passed the point where we can replenish credit? */
1233 if (time_after_eq64(now
, next_credit
)) {
1234 vif
->credit_window_start
= now
;
1238 /* Still too big to send right now? Set a callback. */
1239 if (size
> vif
->remaining_credit
) {
1240 vif
->credit_timeout
.data
=
1242 vif
->credit_timeout
.function
=
1244 mod_timer(&vif
->credit_timeout
,
1246 vif
->credit_window_start
= next_credit
;
1254 static void xenvif_tx_build_gops(struct xenvif
*vif
,
1259 struct gnttab_map_grant_ref
*gop
= vif
->tx_map_ops
, *request_gop
;
1260 struct sk_buff
*skb
;
1263 while (skb_queue_len(&vif
->tx_queue
) < budget
) {
1264 struct xen_netif_tx_request txreq
;
1265 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1266 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1270 unsigned int data_len
;
1271 pending_ring_idx_t index
;
1273 if (vif
->tx
.sring
->req_prod
- vif
->tx
.req_cons
>
1274 XEN_NETIF_TX_RING_SIZE
) {
1275 netdev_err(vif
->dev
,
1276 "Impossible number of requests. "
1277 "req_prod %d, req_cons %d, size %ld\n",
1278 vif
->tx
.sring
->req_prod
, vif
->tx
.req_cons
,
1279 XEN_NETIF_TX_RING_SIZE
);
1280 xenvif_fatal_tx_err(vif
);
1284 work_to_do
= RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
);
1288 idx
= vif
->tx
.req_cons
;
1289 rmb(); /* Ensure that we see the request before we copy it. */
1290 memcpy(&txreq
, RING_GET_REQUEST(&vif
->tx
, idx
), sizeof(txreq
));
1292 /* Credit-based scheduling. */
1293 if (txreq
.size
> vif
->remaining_credit
&&
1294 tx_credit_exceeded(vif
, txreq
.size
))
1297 vif
->remaining_credit
-= txreq
.size
;
1300 vif
->tx
.req_cons
= ++idx
;
1302 memset(extras
, 0, sizeof(extras
));
1303 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1304 work_to_do
= xenvif_get_extras(vif
, extras
,
1306 idx
= vif
->tx
.req_cons
;
1307 if (unlikely(work_to_do
< 0))
1311 ret
= xenvif_count_requests(vif
, &txreq
, txfrags
, work_to_do
);
1312 if (unlikely(ret
< 0))
1317 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1318 netdev_dbg(vif
->dev
,
1319 "Bad packet size: %d\n", txreq
.size
);
1320 xenvif_tx_err(vif
, &txreq
, idx
);
1324 /* No crossing a page as the payload mustn't fragment. */
1325 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1326 netdev_err(vif
->dev
,
1327 "txreq.offset: %x, size: %u, end: %lu\n",
1328 txreq
.offset
, txreq
.size
,
1329 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1330 xenvif_fatal_tx_err(vif
);
1334 index
= pending_index(vif
->pending_cons
);
1335 pending_idx
= vif
->pending_ring
[index
];
1337 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1338 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1339 PKT_PROT_LEN
: txreq
.size
;
1341 skb
= xenvif_alloc_skb(data_len
);
1342 if (unlikely(skb
== NULL
)) {
1343 netdev_dbg(vif
->dev
,
1344 "Can't allocate a skb in start_xmit.\n");
1345 xenvif_tx_err(vif
, &txreq
, idx
);
1349 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1350 struct xen_netif_extra_info
*gso
;
1351 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1353 if (xenvif_set_skb_gso(vif
, skb
, gso
)) {
1354 /* Failure in xenvif_set_skb_gso is fatal. */
1360 XENVIF_TX_CB(skb
)->pending_idx
= pending_idx
;
1362 __skb_put(skb
, data_len
);
1363 vif
->tx_copy_ops
[*copy_ops
].source
.u
.ref
= txreq
.gref
;
1364 vif
->tx_copy_ops
[*copy_ops
].source
.domid
= vif
->domid
;
1365 vif
->tx_copy_ops
[*copy_ops
].source
.offset
= txreq
.offset
;
1367 vif
->tx_copy_ops
[*copy_ops
].dest
.u
.gmfn
=
1368 virt_to_mfn(skb
->data
);
1369 vif
->tx_copy_ops
[*copy_ops
].dest
.domid
= DOMID_SELF
;
1370 vif
->tx_copy_ops
[*copy_ops
].dest
.offset
=
1371 offset_in_page(skb
->data
);
1373 vif
->tx_copy_ops
[*copy_ops
].len
= data_len
;
1374 vif
->tx_copy_ops
[*copy_ops
].flags
= GNTCOPY_source_gref
;
1378 skb_shinfo(skb
)->nr_frags
= ret
;
1379 if (data_len
< txreq
.size
) {
1380 skb_shinfo(skb
)->nr_frags
++;
1381 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1383 xenvif_tx_create_map_op(vif
, pending_idx
, &txreq
, gop
);
1386 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1387 INVALID_PENDING_IDX
);
1388 memcpy(&vif
->pending_tx_info
[pending_idx
].req
, &txreq
,
1392 vif
->pending_cons
++;
1394 request_gop
= xenvif_get_requests(vif
, skb
, txfrags
, gop
);
1395 if (request_gop
== NULL
) {
1397 xenvif_tx_err(vif
, &txreq
, idx
);
1402 __skb_queue_tail(&vif
->tx_queue
, skb
);
1404 vif
->tx
.req_cons
= idx
;
1406 if (((gop
-vif
->tx_map_ops
) >= ARRAY_SIZE(vif
->tx_map_ops
)) ||
1407 (*copy_ops
>= ARRAY_SIZE(vif
->tx_copy_ops
)))
1411 (*map_ops
) = gop
- vif
->tx_map_ops
;
1415 /* Consolidate skb with a frag_list into a brand new one with local pages on
1416 * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1418 static int xenvif_handle_frag_list(struct xenvif
*vif
, struct sk_buff
*skb
)
1420 unsigned int offset
= skb_headlen(skb
);
1421 skb_frag_t frags
[MAX_SKB_FRAGS
];
1423 struct ubuf_info
*uarg
;
1424 struct sk_buff
*nskb
= skb_shinfo(skb
)->frag_list
;
1426 vif
->tx_zerocopy_sent
+= 2;
1427 vif
->tx_frag_overflow
++;
1429 xenvif_fill_frags(vif
, nskb
);
1430 /* Subtract frags size, we will correct it later */
1431 skb
->truesize
-= skb
->data_len
;
1432 skb
->len
+= nskb
->len
;
1433 skb
->data_len
+= nskb
->len
;
1435 /* create a brand new frags array and coalesce there */
1436 for (i
= 0; offset
< skb
->len
; i
++) {
1440 BUG_ON(i
>= MAX_SKB_FRAGS
);
1441 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
1444 skb
->truesize
+= skb
->data_len
;
1445 for (j
= 0; j
< i
; j
++)
1446 put_page(frags
[j
].page
.p
);
1450 if (offset
+ PAGE_SIZE
< skb
->len
)
1453 len
= skb
->len
- offset
;
1454 if (skb_copy_bits(skb
, offset
, page_address(page
), len
))
1458 frags
[i
].page
.p
= page
;
1459 frags
[i
].page_offset
= 0;
1460 skb_frag_size_set(&frags
[i
], len
);
1462 /* swap out with old one */
1463 memcpy(skb_shinfo(skb
)->frags
,
1465 i
* sizeof(skb_frag_t
));
1466 skb_shinfo(skb
)->nr_frags
= i
;
1467 skb
->truesize
+= i
* PAGE_SIZE
;
1469 /* remove traces of mapped pages and frag_list */
1470 skb_frag_list_init(skb
);
1471 uarg
= skb_shinfo(skb
)->destructor_arg
;
1472 uarg
->callback(uarg
, true);
1473 skb_shinfo(skb
)->destructor_arg
= NULL
;
1475 skb_shinfo(nskb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1481 static int xenvif_tx_submit(struct xenvif
*vif
)
1483 struct gnttab_map_grant_ref
*gop_map
= vif
->tx_map_ops
;
1484 struct gnttab_copy
*gop_copy
= vif
->tx_copy_ops
;
1485 struct sk_buff
*skb
;
1488 while ((skb
= __skb_dequeue(&vif
->tx_queue
)) != NULL
) {
1489 struct xen_netif_tx_request
*txp
;
1493 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1494 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1496 /* Check the remap error code. */
1497 if (unlikely(xenvif_tx_check_gop(vif
, skb
, &gop_map
, &gop_copy
))) {
1498 skb_shinfo(skb
)->nr_frags
= 0;
1503 data_len
= skb
->len
;
1504 callback_param(vif
, pending_idx
).ctx
= NULL
;
1505 if (data_len
< txp
->size
) {
1506 /* Append the packet payload as a fragment. */
1507 txp
->offset
+= data_len
;
1508 txp
->size
-= data_len
;
1510 /* Schedule a response immediately. */
1511 xenvif_idx_release(vif
, pending_idx
,
1512 XEN_NETIF_RSP_OKAY
);
1515 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1516 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1517 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1518 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1520 xenvif_fill_frags(vif
, skb
);
1522 if (unlikely(skb_has_frag_list(skb
))) {
1523 if (xenvif_handle_frag_list(vif
, skb
)) {
1524 if (net_ratelimit())
1525 netdev_err(vif
->dev
,
1526 "Not enough memory to consolidate frag_list!\n");
1527 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1533 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < PKT_PROT_LEN
) {
1534 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1535 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1538 skb
->dev
= vif
->dev
;
1539 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1540 skb_reset_network_header(skb
);
1542 if (checksum_setup(vif
, skb
)) {
1543 netdev_dbg(vif
->dev
,
1544 "Can't setup checksum in net_tx_action\n");
1545 /* We have to set this flag to trigger the callback */
1546 if (skb_shinfo(skb
)->destructor_arg
)
1547 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1552 skb_probe_transport_header(skb
, 0);
1554 /* If the packet is GSO then we will have just set up the
1555 * transport header offset in checksum_setup so it's now
1556 * straightforward to calculate gso_segs.
1558 if (skb_is_gso(skb
)) {
1559 int mss
= skb_shinfo(skb
)->gso_size
;
1560 int hdrlen
= skb_transport_header(skb
) -
1561 skb_mac_header(skb
) +
1564 skb_shinfo(skb
)->gso_segs
=
1565 DIV_ROUND_UP(skb
->len
- hdrlen
, mss
);
1568 vif
->dev
->stats
.rx_bytes
+= skb
->len
;
1569 vif
->dev
->stats
.rx_packets
++;
1573 /* Set this flag right before netif_receive_skb, otherwise
1574 * someone might think this packet already left netback, and
1575 * do a skb_copy_ubufs while we are still in control of the
1576 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1578 if (skb_shinfo(skb
)->destructor_arg
) {
1579 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1580 vif
->tx_zerocopy_sent
++;
1583 netif_receive_skb(skb
);
1589 void xenvif_zerocopy_callback(struct ubuf_info
*ubuf
, bool zerocopy_success
)
1591 unsigned long flags
;
1592 pending_ring_idx_t index
;
1593 struct xenvif
*vif
= ubuf_to_vif(ubuf
);
1595 /* This is the only place where we grab this lock, to protect callbacks
1598 spin_lock_irqsave(&vif
->callback_lock
, flags
);
1600 u16 pending_idx
= ubuf
->desc
;
1601 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
1602 BUG_ON(vif
->dealloc_prod
- vif
->dealloc_cons
>=
1604 index
= pending_index(vif
->dealloc_prod
);
1605 vif
->dealloc_ring
[index
] = pending_idx
;
1606 /* Sync with xenvif_tx_dealloc_action:
1607 * insert idx then incr producer.
1610 vif
->dealloc_prod
++;
1612 wake_up(&vif
->dealloc_wq
);
1613 spin_unlock_irqrestore(&vif
->callback_lock
, flags
);
1615 if (likely(zerocopy_success
))
1616 vif
->tx_zerocopy_success
++;
1618 vif
->tx_zerocopy_fail
++;
1621 static inline void xenvif_tx_dealloc_action(struct xenvif
*vif
)
1623 struct gnttab_unmap_grant_ref
*gop
;
1624 pending_ring_idx_t dc
, dp
;
1625 u16 pending_idx
, pending_idx_release
[MAX_PENDING_REQS
];
1628 dc
= vif
->dealloc_cons
;
1629 gop
= vif
->tx_unmap_ops
;
1631 /* Free up any grants we have finished using */
1633 dp
= vif
->dealloc_prod
;
1635 /* Ensure we see all indices enqueued by all
1636 * xenvif_zerocopy_callback().
1641 BUG_ON(gop
- vif
->tx_unmap_ops
> MAX_PENDING_REQS
);
1643 vif
->dealloc_ring
[pending_index(dc
++)];
1645 pending_idx_release
[gop
-vif
->tx_unmap_ops
] =
1647 vif
->pages_to_unmap
[gop
-vif
->tx_unmap_ops
] =
1648 vif
->mmap_pages
[pending_idx
];
1649 gnttab_set_unmap_op(gop
,
1650 idx_to_kaddr(vif
, pending_idx
),
1652 vif
->grant_tx_handle
[pending_idx
]);
1653 xenvif_grant_handle_reset(vif
, pending_idx
);
1657 } while (dp
!= vif
->dealloc_prod
);
1659 vif
->dealloc_cons
= dc
;
1661 if (gop
- vif
->tx_unmap_ops
> 0) {
1663 ret
= gnttab_unmap_refs(vif
->tx_unmap_ops
,
1665 vif
->pages_to_unmap
,
1666 gop
- vif
->tx_unmap_ops
);
1668 netdev_err(vif
->dev
, "Unmap fail: nr_ops %tx ret %d\n",
1669 gop
- vif
->tx_unmap_ops
, ret
);
1670 for (i
= 0; i
< gop
- vif
->tx_unmap_ops
; ++i
) {
1671 if (gop
[i
].status
!= GNTST_okay
)
1672 netdev_err(vif
->dev
,
1673 " host_addr: %llx handle: %x status: %d\n",
1682 for (i
= 0; i
< gop
- vif
->tx_unmap_ops
; ++i
)
1683 xenvif_idx_release(vif
, pending_idx_release
[i
],
1684 XEN_NETIF_RSP_OKAY
);
1688 /* Called after netfront has transmitted */
1689 int xenvif_tx_action(struct xenvif
*vif
, int budget
)
1691 unsigned nr_mops
, nr_cops
= 0;
1694 if (unlikely(!tx_work_todo(vif
)))
1697 xenvif_tx_build_gops(vif
, budget
, &nr_cops
, &nr_mops
);
1702 gnttab_batch_copy(vif
->tx_copy_ops
, nr_cops
);
1704 ret
= gnttab_map_refs(vif
->tx_map_ops
,
1711 work_done
= xenvif_tx_submit(vif
);
1716 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
1719 struct pending_tx_info
*pending_tx_info
;
1720 pending_ring_idx_t index
;
1721 unsigned long flags
;
1723 pending_tx_info
= &vif
->pending_tx_info
[pending_idx
];
1724 spin_lock_irqsave(&vif
->response_lock
, flags
);
1725 make_tx_response(vif
, &pending_tx_info
->req
, status
);
1726 index
= pending_index(vif
->pending_prod
);
1727 vif
->pending_ring
[index
] = pending_idx
;
1728 /* TX shouldn't use the index before we give it back here */
1730 vif
->pending_prod
++;
1731 spin_unlock_irqrestore(&vif
->response_lock
, flags
);
1735 static void make_tx_response(struct xenvif
*vif
,
1736 struct xen_netif_tx_request
*txp
,
1739 RING_IDX i
= vif
->tx
.rsp_prod_pvt
;
1740 struct xen_netif_tx_response
*resp
;
1743 resp
= RING_GET_RESPONSE(&vif
->tx
, i
);
1747 if (txp
->flags
& XEN_NETTXF_extra_info
)
1748 RING_GET_RESPONSE(&vif
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1750 vif
->tx
.rsp_prod_pvt
= ++i
;
1751 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->tx
, notify
);
1753 notify_remote_via_irq(vif
->tx_irq
);
1756 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
1763 RING_IDX i
= vif
->rx
.rsp_prod_pvt
;
1764 struct xen_netif_rx_response
*resp
;
1766 resp
= RING_GET_RESPONSE(&vif
->rx
, i
);
1767 resp
->offset
= offset
;
1768 resp
->flags
= flags
;
1770 resp
->status
= (s16
)size
;
1772 resp
->status
= (s16
)st
;
1774 vif
->rx
.rsp_prod_pvt
= ++i
;
1779 void xenvif_idx_unmap(struct xenvif
*vif
, u16 pending_idx
)
1782 struct gnttab_unmap_grant_ref tx_unmap_op
;
1784 gnttab_set_unmap_op(&tx_unmap_op
,
1785 idx_to_kaddr(vif
, pending_idx
),
1787 vif
->grant_tx_handle
[pending_idx
]);
1788 xenvif_grant_handle_reset(vif
, pending_idx
);
1790 ret
= gnttab_unmap_refs(&tx_unmap_op
, NULL
,
1791 &vif
->mmap_pages
[pending_idx
], 1);
1793 netdev_err(vif
->dev
,
1794 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
1797 tx_unmap_op
.host_addr
,
1799 tx_unmap_op
.status
);
1803 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1806 static inline int rx_work_todo(struct xenvif
*vif
)
1808 return (!skb_queue_empty(&vif
->rx_queue
) &&
1809 xenvif_rx_ring_slots_available(vif
, vif
->rx_last_skb_slots
)) ||
1810 vif
->rx_queue_purge
;
1813 static inline int tx_work_todo(struct xenvif
*vif
)
1816 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
)))
1822 static inline bool tx_dealloc_work_todo(struct xenvif
*vif
)
1824 return vif
->dealloc_cons
!= vif
->dealloc_prod
;
1827 void xenvif_unmap_frontend_rings(struct xenvif
*vif
)
1830 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1833 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1837 int xenvif_map_frontend_rings(struct xenvif
*vif
,
1838 grant_ref_t tx_ring_ref
,
1839 grant_ref_t rx_ring_ref
)
1842 struct xen_netif_tx_sring
*txs
;
1843 struct xen_netif_rx_sring
*rxs
;
1847 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1848 tx_ring_ref
, &addr
);
1852 txs
= (struct xen_netif_tx_sring
*)addr
;
1853 BACK_RING_INIT(&vif
->tx
, txs
, PAGE_SIZE
);
1855 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1856 rx_ring_ref
, &addr
);
1860 rxs
= (struct xen_netif_rx_sring
*)addr
;
1861 BACK_RING_INIT(&vif
->rx
, rxs
, PAGE_SIZE
);
1866 xenvif_unmap_frontend_rings(vif
);
1870 void xenvif_stop_queue(struct xenvif
*vif
)
1872 if (!vif
->can_queue
)
1875 netif_stop_queue(vif
->dev
);
1878 static void xenvif_start_queue(struct xenvif
*vif
)
1880 if (xenvif_schedulable(vif
))
1881 netif_wake_queue(vif
->dev
);
1884 int xenvif_kthread_guest_rx(void *data
)
1886 struct xenvif
*vif
= data
;
1887 struct sk_buff
*skb
;
1889 while (!kthread_should_stop()) {
1890 wait_event_interruptible(vif
->wq
,
1891 rx_work_todo(vif
) ||
1893 kthread_should_stop());
1895 /* This frontend is found to be rogue, disable it in
1896 * kthread context. Currently this is only set when
1897 * netback finds out frontend sends malformed packet,
1898 * but we cannot disable the interface in softirq
1899 * context so we defer it here.
1901 if (unlikely(vif
->disabled
&& netif_carrier_ok(vif
->dev
)))
1902 xenvif_carrier_off(vif
);
1904 if (kthread_should_stop())
1907 if (vif
->rx_queue_purge
) {
1908 skb_queue_purge(&vif
->rx_queue
);
1909 vif
->rx_queue_purge
= false;
1912 if (!skb_queue_empty(&vif
->rx_queue
))
1913 xenvif_rx_action(vif
);
1915 if (skb_queue_empty(&vif
->rx_queue
) &&
1916 netif_queue_stopped(vif
->dev
)) {
1917 del_timer_sync(&vif
->wake_queue
);
1918 xenvif_start_queue(vif
);
1924 /* Bin any remaining skbs */
1925 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
)
1931 int xenvif_dealloc_kthread(void *data
)
1933 struct xenvif
*vif
= data
;
1935 while (!kthread_should_stop()) {
1936 wait_event_interruptible(vif
->dealloc_wq
,
1937 tx_dealloc_work_todo(vif
) ||
1938 kthread_should_stop());
1939 if (kthread_should_stop())
1942 xenvif_tx_dealloc_action(vif
);
1946 /* Unmap anything remaining*/
1947 if (tx_dealloc_work_todo(vif
))
1948 xenvif_tx_dealloc_action(vif
);
1953 static int __init
netback_init(void)
1960 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
1961 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1962 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
1963 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
1966 rc
= xenvif_xenbus_init();
1970 rx_drain_timeout_jiffies
= msecs_to_jiffies(rx_drain_timeout_msecs
);
1978 module_init(netback_init
);
1980 static void __exit
netback_fini(void)
1982 xenvif_xenbus_fini();
1984 module_exit(netback_fini
);
1986 MODULE_LICENSE("Dual BSD/GPL");
1987 MODULE_ALIAS("xen-backend:vif");