2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
40 #include <linux/highmem.h>
45 #include <xen/events.h>
46 #include <xen/interface/memory.h>
48 #include <asm/xen/hypercall.h>
49 #include <asm/xen/page.h>
51 /* Provide an option to disable split event channels at load time as
52 * event channels are limited resource. Split event channels are
55 bool separate_tx_rx_irq
= 1;
56 module_param(separate_tx_rx_irq
, bool, 0644);
58 /* When guest ring is filled up, qdisc queues the packets for us, but we have
59 * to timeout them, otherwise other guests' packets can get stuck there
61 unsigned int rx_drain_timeout_msecs
= 10000;
62 module_param(rx_drain_timeout_msecs
, uint
, 0444);
63 unsigned int rx_drain_timeout_jiffies
;
66 * This is the maximum slots a skb can have. If a guest sends a skb
67 * which exceeds this limit it is considered malicious.
69 #define FATAL_SKB_SLOTS_DEFAULT 20
70 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
71 module_param(fatal_skb_slots
, uint
, 0444);
73 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
76 static void make_tx_response(struct xenvif
*vif
,
77 struct xen_netif_tx_request
*txp
,
80 static inline int tx_work_todo(struct xenvif
*vif
);
81 static inline int rx_work_todo(struct xenvif
*vif
);
83 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
90 static inline unsigned long idx_to_pfn(struct xenvif
*vif
,
93 return page_to_pfn(vif
->mmap_pages
[idx
]);
96 static inline unsigned long idx_to_kaddr(struct xenvif
*vif
,
99 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif
, idx
));
102 #define callback_param(vif, pending_idx) \
103 (vif->pending_tx_info[pending_idx].callback_struct)
105 /* Find the containing VIF's structure from a pointer in pending_tx_info array
107 static inline struct xenvif
* ubuf_to_vif(struct ubuf_info
*ubuf
)
109 u16 pending_idx
= ubuf
->desc
;
110 struct pending_tx_info
*temp
=
111 container_of(ubuf
, struct pending_tx_info
, callback_struct
);
112 return container_of(temp
- pending_idx
,
117 /* This is a miniumum size for the linear area to avoid lots of
118 * calls to __pskb_pull_tail() as we set up checksum offsets. The
119 * value 128 was chosen as it covers all IPv4 and most likely
122 #define PKT_PROT_LEN 128
124 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
126 return (u16
)frag
->page_offset
;
129 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
131 frag
->page_offset
= pending_idx
;
134 static inline pending_ring_idx_t
pending_index(unsigned i
)
136 return i
& (MAX_PENDING_REQS
-1);
139 bool xenvif_rx_ring_slots_available(struct xenvif
*vif
, int needed
)
144 prod
= vif
->rx
.sring
->req_prod
;
145 cons
= vif
->rx
.req_cons
;
147 if (prod
- cons
>= needed
)
150 vif
->rx
.sring
->req_event
= prod
+ 1;
152 /* Make sure event is visible before we check prod
156 } while (vif
->rx
.sring
->req_prod
!= prod
);
162 * Returns true if we should start a new receive buffer instead of
163 * adding 'size' bytes to a buffer which currently contains 'offset'
166 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
)
168 /* simple case: we have completely filled the current buffer. */
169 if (offset
== MAX_BUFFER_OFFSET
)
173 * complex case: start a fresh buffer if the current frag
174 * would overflow the current buffer but only if:
175 * (i) this frag would fit completely in the next buffer
176 * and (ii) there is already some data in the current buffer
177 * and (iii) this is not the head buffer.
180 * - (i) stops us splitting a frag into two copies
181 * unless the frag is too large for a single buffer.
182 * - (ii) stops us from leaving a buffer pointlessly empty.
183 * - (iii) stops us leaving the first buffer
184 * empty. Strictly speaking this is already covered
185 * by (ii) but is explicitly checked because
186 * netfront relies on the first buffer being
187 * non-empty and can crash otherwise.
189 * This means we will effectively linearise small
190 * frags but do not needlessly split large buffers
191 * into multiple copies tend to give large frags their
192 * own buffers as before.
194 BUG_ON(size
> MAX_BUFFER_OFFSET
);
195 if ((offset
+ size
> MAX_BUFFER_OFFSET
) && offset
&& !head
)
201 struct netrx_pending_operations
{
202 unsigned copy_prod
, copy_cons
;
203 unsigned meta_prod
, meta_cons
;
204 struct gnttab_copy
*copy
;
205 struct xenvif_rx_meta
*meta
;
207 grant_ref_t copy_gref
;
210 static struct xenvif_rx_meta
*get_next_rx_buffer(struct xenvif
*vif
,
211 struct netrx_pending_operations
*npo
)
213 struct xenvif_rx_meta
*meta
;
214 struct xen_netif_rx_request
*req
;
216 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
218 meta
= npo
->meta
+ npo
->meta_prod
++;
219 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
225 npo
->copy_gref
= req
->gref
;
231 * Set up the grant operations for this fragment. If it's a flipping
232 * interface, we also set up the unmap request from here.
234 static void xenvif_gop_frag_copy(struct xenvif
*vif
, struct sk_buff
*skb
,
235 struct netrx_pending_operations
*npo
,
236 struct page
*page
, unsigned long size
,
237 unsigned long offset
, int *head
,
238 struct xenvif
*foreign_vif
,
239 grant_ref_t foreign_gref
)
241 struct gnttab_copy
*copy_gop
;
242 struct xenvif_rx_meta
*meta
;
244 int gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
246 /* Data must not cross a page boundary. */
247 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
249 meta
= npo
->meta
+ npo
->meta_prod
- 1;
251 /* Skip unused frames from start of page */
252 page
+= offset
>> PAGE_SHIFT
;
253 offset
&= ~PAGE_MASK
;
256 BUG_ON(offset
>= PAGE_SIZE
);
257 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
259 bytes
= PAGE_SIZE
- offset
;
264 if (start_new_rx_buffer(npo
->copy_off
, bytes
, *head
)) {
266 * Netfront requires there to be some data in the head
271 meta
= get_next_rx_buffer(vif
, npo
);
274 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
275 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
277 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
278 copy_gop
->flags
= GNTCOPY_dest_gref
;
279 copy_gop
->len
= bytes
;
282 copy_gop
->source
.domid
= foreign_vif
->domid
;
283 copy_gop
->source
.u
.ref
= foreign_gref
;
284 copy_gop
->flags
|= GNTCOPY_source_gref
;
286 copy_gop
->source
.domid
= DOMID_SELF
;
287 copy_gop
->source
.u
.gmfn
=
288 virt_to_mfn(page_address(page
));
290 copy_gop
->source
.offset
= offset
;
292 copy_gop
->dest
.domid
= vif
->domid
;
293 copy_gop
->dest
.offset
= npo
->copy_off
;
294 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
296 npo
->copy_off
+= bytes
;
303 if (offset
== PAGE_SIZE
&& size
) {
304 BUG_ON(!PageCompound(page
));
309 /* Leave a gap for the GSO descriptor. */
310 if (skb_is_gso(skb
)) {
311 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
312 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
313 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
314 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
317 if (*head
&& ((1 << gso_type
) & vif
->gso_mask
))
320 *head
= 0; /* There must be something in this buffer now. */
326 * Prepare an SKB to be transmitted to the frontend.
328 * This function is responsible for allocating grant operations, meta
331 * It returns the number of meta structures consumed. The number of
332 * ring slots used is always equal to the number of meta slots used
333 * plus the number of GSO descriptors used. Currently, we use either
334 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
335 * frontend-side LRO).
337 static int xenvif_gop_skb(struct sk_buff
*skb
,
338 struct netrx_pending_operations
*npo
)
340 struct xenvif
*vif
= netdev_priv(skb
->dev
);
341 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
343 struct xen_netif_rx_request
*req
;
344 struct xenvif_rx_meta
*meta
;
349 struct ubuf_info
*ubuf
= skb_shinfo(skb
)->destructor_arg
;
350 grant_ref_t foreign_grefs
[MAX_SKB_FRAGS
];
351 struct xenvif
*foreign_vif
= NULL
;
353 old_meta_prod
= npo
->meta_prod
;
355 gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
356 if (skb_is_gso(skb
)) {
357 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
)
358 gso_type
= XEN_NETIF_GSO_TYPE_TCPV4
;
359 else if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
)
360 gso_type
= XEN_NETIF_GSO_TYPE_TCPV6
;
363 /* Set up a GSO prefix descriptor, if necessary */
364 if ((1 << gso_type
) & vif
->gso_prefix_mask
) {
365 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
366 meta
= npo
->meta
+ npo
->meta_prod
++;
367 meta
->gso_type
= gso_type
;
368 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
373 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
374 meta
= npo
->meta
+ npo
->meta_prod
++;
376 if ((1 << gso_type
) & vif
->gso_mask
) {
377 meta
->gso_type
= gso_type
;
378 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
380 meta
->gso_type
= XEN_NETIF_GSO_TYPE_NONE
;
387 npo
->copy_gref
= req
->gref
;
389 if ((skb_shinfo(skb
)->tx_flags
& SKBTX_DEV_ZEROCOPY
) &&
390 (ubuf
->callback
== &xenvif_zerocopy_callback
)) {
392 foreign_vif
= ubuf_to_vif(ubuf
);
395 u16 pending_idx
= ubuf
->desc
;
397 foreign_vif
->pending_tx_info
[pending_idx
].req
.gref
;
398 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
403 while (data
< skb_tail_pointer(skb
)) {
404 unsigned int offset
= offset_in_page(data
);
405 unsigned int len
= PAGE_SIZE
- offset
;
407 if (data
+ len
> skb_tail_pointer(skb
))
408 len
= skb_tail_pointer(skb
) - data
;
410 xenvif_gop_frag_copy(vif
, skb
, npo
,
411 virt_to_page(data
), len
, offset
, &head
,
417 for (i
= 0; i
< nr_frags
; i
++) {
418 xenvif_gop_frag_copy(vif
, skb
, npo
,
419 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
420 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
421 skb_shinfo(skb
)->frags
[i
].page_offset
,
427 return npo
->meta_prod
- old_meta_prod
;
431 * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was
432 * used to set up the operations on the top of
433 * netrx_pending_operations, which have since been done. Check that
434 * they didn't give any errors and advance over them.
436 static int xenvif_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
437 struct netrx_pending_operations
*npo
)
439 struct gnttab_copy
*copy_op
;
440 int status
= XEN_NETIF_RSP_OKAY
;
443 for (i
= 0; i
< nr_meta_slots
; i
++) {
444 copy_op
= npo
->copy
+ npo
->copy_cons
++;
445 if (copy_op
->status
!= GNTST_okay
) {
447 "Bad status %d from copy to DOM%d.\n",
448 copy_op
->status
, vif
->domid
);
449 status
= XEN_NETIF_RSP_ERROR
;
456 static void xenvif_add_frag_responses(struct xenvif
*vif
, int status
,
457 struct xenvif_rx_meta
*meta
,
461 unsigned long offset
;
463 /* No fragments used */
464 if (nr_meta_slots
<= 1)
469 for (i
= 0; i
< nr_meta_slots
; i
++) {
471 if (i
== nr_meta_slots
- 1)
474 flags
= XEN_NETRXF_more_data
;
477 make_rx_response(vif
, meta
[i
].id
, status
, offset
,
478 meta
[i
].size
, flags
);
482 struct xenvif_rx_cb
{
486 #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
488 void xenvif_kick_thread(struct xenvif
*vif
)
493 static void xenvif_rx_action(struct xenvif
*vif
)
497 struct xen_netif_rx_response
*resp
;
498 struct sk_buff_head rxq
;
502 unsigned long offset
;
503 bool need_to_notify
= false;
505 struct netrx_pending_operations npo
= {
506 .copy
= vif
->grant_copy_op
,
510 skb_queue_head_init(&rxq
);
512 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
) {
513 RING_IDX max_slots_needed
;
514 RING_IDX old_req_cons
;
515 RING_IDX ring_slots_used
;
518 /* We need a cheap worse case estimate for the number of
522 max_slots_needed
= DIV_ROUND_UP(offset_in_page(skb
->data
) +
525 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
529 size
= skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
530 offset
= skb_shinfo(skb
)->frags
[i
].page_offset
;
532 /* For a worse-case estimate we need to factor in
533 * the fragment page offset as this will affect the
534 * number of times xenvif_gop_frag_copy() will
535 * call start_new_rx_buffer().
537 max_slots_needed
+= DIV_ROUND_UP(offset
+ size
,
541 /* To avoid the estimate becoming too pessimal for some
542 * frontends that limit posted rx requests, cap the estimate
545 if (max_slots_needed
> MAX_SKB_FRAGS
)
546 max_slots_needed
= MAX_SKB_FRAGS
;
548 /* We may need one more slot for GSO metadata */
549 if (skb_is_gso(skb
) &&
550 (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
||
551 skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
))
554 /* If the skb may not fit then bail out now */
555 if (!xenvif_rx_ring_slots_available(vif
, max_slots_needed
)) {
556 skb_queue_head(&vif
->rx_queue
, skb
);
557 need_to_notify
= true;
558 vif
->rx_last_skb_slots
= max_slots_needed
;
561 vif
->rx_last_skb_slots
= 0;
563 old_req_cons
= vif
->rx
.req_cons
;
564 XENVIF_RX_CB(skb
)->meta_slots_used
= xenvif_gop_skb(skb
, &npo
);
565 ring_slots_used
= vif
->rx
.req_cons
- old_req_cons
;
567 BUG_ON(ring_slots_used
> max_slots_needed
);
569 __skb_queue_tail(&rxq
, skb
);
572 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(vif
->meta
));
577 BUG_ON(npo
.copy_prod
> MAX_GRANT_COPY_OPS
);
578 gnttab_batch_copy(vif
->grant_copy_op
, npo
.copy_prod
);
580 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
582 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
583 vif
->gso_prefix_mask
) {
584 resp
= RING_GET_RESPONSE(&vif
->rx
,
585 vif
->rx
.rsp_prod_pvt
++);
587 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
589 resp
->offset
= vif
->meta
[npo
.meta_cons
].gso_size
;
590 resp
->id
= vif
->meta
[npo
.meta_cons
].id
;
591 resp
->status
= XENVIF_RX_CB(skb
)->meta_slots_used
;
594 XENVIF_RX_CB(skb
)->meta_slots_used
--;
598 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
599 vif
->dev
->stats
.tx_packets
++;
601 status
= xenvif_check_gop(vif
,
602 XENVIF_RX_CB(skb
)->meta_slots_used
,
605 if (XENVIF_RX_CB(skb
)->meta_slots_used
== 1)
608 flags
= XEN_NETRXF_more_data
;
610 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
611 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
612 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
613 /* remote but checksummed. */
614 flags
|= XEN_NETRXF_data_validated
;
617 resp
= make_rx_response(vif
, vif
->meta
[npo
.meta_cons
].id
,
619 vif
->meta
[npo
.meta_cons
].size
,
622 if ((1 << vif
->meta
[npo
.meta_cons
].gso_type
) &
624 struct xen_netif_extra_info
*gso
=
625 (struct xen_netif_extra_info
*)
626 RING_GET_RESPONSE(&vif
->rx
,
627 vif
->rx
.rsp_prod_pvt
++);
629 resp
->flags
|= XEN_NETRXF_extra_info
;
631 gso
->u
.gso
.type
= vif
->meta
[npo
.meta_cons
].gso_type
;
632 gso
->u
.gso
.size
= vif
->meta
[npo
.meta_cons
].gso_size
;
634 gso
->u
.gso
.features
= 0;
636 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
640 xenvif_add_frag_responses(vif
, status
,
641 vif
->meta
+ npo
.meta_cons
+ 1,
642 XENVIF_RX_CB(skb
)->meta_slots_used
);
644 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->rx
, ret
);
646 need_to_notify
|= !!ret
;
648 npo
.meta_cons
+= XENVIF_RX_CB(skb
)->meta_slots_used
;
654 notify_remote_via_irq(vif
->rx_irq
);
657 void xenvif_check_rx_xenvif(struct xenvif
*vif
)
661 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
664 napi_schedule(&vif
->napi
);
667 static void tx_add_credit(struct xenvif
*vif
)
669 unsigned long max_burst
, max_credit
;
672 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
673 * Otherwise the interface can seize up due to insufficient credit.
675 max_burst
= RING_GET_REQUEST(&vif
->tx
, vif
->tx
.req_cons
)->size
;
676 max_burst
= min(max_burst
, 131072UL);
677 max_burst
= max(max_burst
, vif
->credit_bytes
);
679 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
680 max_credit
= vif
->remaining_credit
+ vif
->credit_bytes
;
681 if (max_credit
< vif
->remaining_credit
)
682 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
684 vif
->remaining_credit
= min(max_credit
, max_burst
);
687 static void tx_credit_callback(unsigned long data
)
689 struct xenvif
*vif
= (struct xenvif
*)data
;
691 xenvif_check_rx_xenvif(vif
);
694 static void xenvif_tx_err(struct xenvif
*vif
,
695 struct xen_netif_tx_request
*txp
, RING_IDX end
)
697 RING_IDX cons
= vif
->tx
.req_cons
;
701 spin_lock_irqsave(&vif
->response_lock
, flags
);
702 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
703 spin_unlock_irqrestore(&vif
->response_lock
, flags
);
706 txp
= RING_GET_REQUEST(&vif
->tx
, cons
++);
708 vif
->tx
.req_cons
= cons
;
711 static void xenvif_fatal_tx_err(struct xenvif
*vif
)
713 netdev_err(vif
->dev
, "fatal error; disabling device\n");
714 vif
->disabled
= true;
715 xenvif_kick_thread(vif
);
718 static int xenvif_count_requests(struct xenvif
*vif
,
719 struct xen_netif_tx_request
*first
,
720 struct xen_netif_tx_request
*txp
,
723 RING_IDX cons
= vif
->tx
.req_cons
;
728 if (!(first
->flags
& XEN_NETTXF_more_data
))
732 struct xen_netif_tx_request dropped_tx
= { 0 };
734 if (slots
>= work_to_do
) {
736 "Asked for %d slots but exceeds this limit\n",
738 xenvif_fatal_tx_err(vif
);
742 /* This guest is really using too many slots and
743 * considered malicious.
745 if (unlikely(slots
>= fatal_skb_slots
)) {
747 "Malicious frontend using %d slots, threshold %u\n",
748 slots
, fatal_skb_slots
);
749 xenvif_fatal_tx_err(vif
);
753 /* Xen network protocol had implicit dependency on
754 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
755 * the historical MAX_SKB_FRAGS value 18 to honor the
756 * same behavior as before. Any packet using more than
757 * 18 slots but less than fatal_skb_slots slots is
760 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
763 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
764 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
771 memcpy(txp
, RING_GET_REQUEST(&vif
->tx
, cons
+ slots
),
774 /* If the guest submitted a frame >= 64 KiB then
775 * first->size overflowed and following slots will
776 * appear to be larger than the frame.
778 * This cannot be fatal error as there are buggy
779 * frontends that do this.
781 * Consume all slots and drop the packet.
783 if (!drop_err
&& txp
->size
> first
->size
) {
786 "Invalid tx request, slot size %u > remaining size %u\n",
787 txp
->size
, first
->size
);
791 first
->size
-= txp
->size
;
794 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
795 netdev_err(vif
->dev
, "Cross page boundary, txp->offset: %x, size: %u\n",
796 txp
->offset
, txp
->size
);
797 xenvif_fatal_tx_err(vif
);
801 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
809 xenvif_tx_err(vif
, first
, cons
+ slots
);
817 struct xenvif_tx_cb
{
821 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
823 static inline void xenvif_tx_create_map_op(struct xenvif
*vif
,
825 struct xen_netif_tx_request
*txp
,
826 struct gnttab_map_grant_ref
*mop
)
828 vif
->pages_to_map
[mop
-vif
->tx_map_ops
] = vif
->mmap_pages
[pending_idx
];
829 gnttab_set_map_op(mop
, idx_to_kaddr(vif
, pending_idx
),
830 GNTMAP_host_map
| GNTMAP_readonly
,
831 txp
->gref
, vif
->domid
);
833 memcpy(&vif
->pending_tx_info
[pending_idx
].req
, txp
,
837 static inline struct sk_buff
*xenvif_alloc_skb(unsigned int size
)
839 struct sk_buff
*skb
=
840 alloc_skb(size
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
841 GFP_ATOMIC
| __GFP_NOWARN
);
842 if (unlikely(skb
== NULL
))
845 /* Packets passed to netif_rx() must have some headroom. */
846 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
848 /* Initialize it here to avoid later surprises */
849 skb_shinfo(skb
)->destructor_arg
= NULL
;
854 static struct gnttab_map_grant_ref
*xenvif_get_requests(struct xenvif
*vif
,
856 struct xen_netif_tx_request
*txp
,
857 struct gnttab_map_grant_ref
*gop
)
859 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
860 skb_frag_t
*frags
= shinfo
->frags
;
861 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
863 pending_ring_idx_t index
;
864 unsigned int nr_slots
, frag_overflow
= 0;
866 /* At this point shinfo->nr_frags is in fact the number of
867 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
869 if (shinfo
->nr_frags
> MAX_SKB_FRAGS
) {
870 frag_overflow
= shinfo
->nr_frags
- MAX_SKB_FRAGS
;
871 BUG_ON(frag_overflow
> MAX_SKB_FRAGS
);
872 shinfo
->nr_frags
= MAX_SKB_FRAGS
;
874 nr_slots
= shinfo
->nr_frags
;
876 /* Skip first skb fragment if it is on same page as header fragment. */
877 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
879 for (shinfo
->nr_frags
= start
; shinfo
->nr_frags
< nr_slots
;
880 shinfo
->nr_frags
++, txp
++, gop
++) {
881 index
= pending_index(vif
->pending_cons
++);
882 pending_idx
= vif
->pending_ring
[index
];
883 xenvif_tx_create_map_op(vif
, pending_idx
, txp
, gop
);
884 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], pending_idx
);
888 struct sk_buff
*nskb
= xenvif_alloc_skb(0);
889 if (unlikely(nskb
== NULL
)) {
892 "Can't allocate the frag_list skb.\n");
896 shinfo
= skb_shinfo(nskb
);
897 frags
= shinfo
->frags
;
899 for (shinfo
->nr_frags
= 0; shinfo
->nr_frags
< frag_overflow
;
900 shinfo
->nr_frags
++, txp
++, gop
++) {
901 index
= pending_index(vif
->pending_cons
++);
902 pending_idx
= vif
->pending_ring
[index
];
903 xenvif_tx_create_map_op(vif
, pending_idx
, txp
, gop
);
904 frag_set_pending_idx(&frags
[shinfo
->nr_frags
],
908 skb_shinfo(skb
)->frag_list
= nskb
;
914 static inline void xenvif_grant_handle_set(struct xenvif
*vif
,
916 grant_handle_t handle
)
918 if (unlikely(vif
->grant_tx_handle
[pending_idx
] !=
919 NETBACK_INVALID_HANDLE
)) {
921 "Trying to overwrite active handle! pending_idx: %x\n",
925 vif
->grant_tx_handle
[pending_idx
] = handle
;
928 static inline void xenvif_grant_handle_reset(struct xenvif
*vif
,
931 if (unlikely(vif
->grant_tx_handle
[pending_idx
] ==
932 NETBACK_INVALID_HANDLE
)) {
934 "Trying to unmap invalid handle! pending_idx: %x\n",
938 vif
->grant_tx_handle
[pending_idx
] = NETBACK_INVALID_HANDLE
;
941 static int xenvif_tx_check_gop(struct xenvif
*vif
,
943 struct gnttab_map_grant_ref
**gopp_map
,
944 struct gnttab_copy
**gopp_copy
)
946 struct gnttab_map_grant_ref
*gop_map
= *gopp_map
;
947 u16 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
948 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
949 int nr_frags
= shinfo
->nr_frags
;
951 struct sk_buff
*first_skb
= NULL
;
953 /* Check status of header. */
954 err
= (*gopp_copy
)->status
;
959 "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
960 (*gopp_copy
)->status
,
962 (*gopp_copy
)->source
.u
.ref
);
963 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
967 for (i
= 0; i
< nr_frags
; i
++, gop_map
++) {
970 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
972 /* Check error status: if okay then remember grant handle. */
973 newerr
= gop_map
->status
;
975 if (likely(!newerr
)) {
976 xenvif_grant_handle_set(vif
,
979 /* Had a previous error? Invalidate this fragment. */
981 xenvif_idx_unmap(vif
, pending_idx
);
985 /* Error on this fragment: respond to client with an error. */
988 "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
993 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_ERROR
);
995 /* Not the first error? Preceding frags already invalidated. */
998 /* First error: invalidate preceding fragments. */
999 for (j
= 0; j
< i
; j
++) {
1000 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1001 xenvif_idx_unmap(vif
, pending_idx
);
1004 /* Remember the error: invalidate all subsequent fragments. */
1008 if (skb_has_frag_list(skb
)) {
1010 skb
= shinfo
->frag_list
;
1011 shinfo
= skb_shinfo(skb
);
1012 nr_frags
= shinfo
->nr_frags
;
1017 /* There was a mapping error in the frag_list skb. We have to unmap
1018 * the first skb's frags
1020 if (first_skb
&& err
) {
1022 shinfo
= skb_shinfo(first_skb
);
1023 for (j
= 0; j
< shinfo
->nr_frags
; j
++) {
1024 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1025 xenvif_idx_unmap(vif
, pending_idx
);
1029 *gopp_map
= gop_map
;
1033 static void xenvif_fill_frags(struct xenvif
*vif
, struct sk_buff
*skb
)
1035 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1036 int nr_frags
= shinfo
->nr_frags
;
1038 u16 prev_pending_idx
= INVALID_PENDING_IDX
;
1040 for (i
= 0; i
< nr_frags
; i
++) {
1041 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1042 struct xen_netif_tx_request
*txp
;
1046 pending_idx
= frag_get_pending_idx(frag
);
1048 /* If this is not the first frag, chain it to the previous*/
1049 if (prev_pending_idx
== INVALID_PENDING_IDX
)
1050 skb_shinfo(skb
)->destructor_arg
=
1051 &callback_param(vif
, pending_idx
);
1053 callback_param(vif
, prev_pending_idx
).ctx
=
1054 &callback_param(vif
, pending_idx
);
1056 callback_param(vif
, pending_idx
).ctx
= NULL
;
1057 prev_pending_idx
= pending_idx
;
1059 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1060 page
= virt_to_page(idx_to_kaddr(vif
, pending_idx
));
1061 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1062 skb
->len
+= txp
->size
;
1063 skb
->data_len
+= txp
->size
;
1064 skb
->truesize
+= txp
->size
;
1066 /* Take an extra reference to offset network stack's put_page */
1067 get_page(vif
->mmap_pages
[pending_idx
]);
1069 /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
1070 * overlaps with "index", and "mapping" is not set. I think mapping
1071 * should be set. If delivered to local stack, it would drop this
1072 * skb in sk_filter unless the socket has the right to use it.
1074 skb
->pfmemalloc
= false;
1077 static int xenvif_get_extras(struct xenvif
*vif
,
1078 struct xen_netif_extra_info
*extras
,
1081 struct xen_netif_extra_info extra
;
1082 RING_IDX cons
= vif
->tx
.req_cons
;
1085 if (unlikely(work_to_do
-- <= 0)) {
1086 netdev_err(vif
->dev
, "Missing extra info\n");
1087 xenvif_fatal_tx_err(vif
);
1091 memcpy(&extra
, RING_GET_REQUEST(&vif
->tx
, cons
),
1093 if (unlikely(!extra
.type
||
1094 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1095 vif
->tx
.req_cons
= ++cons
;
1096 netdev_err(vif
->dev
,
1097 "Invalid extra type: %d\n", extra
.type
);
1098 xenvif_fatal_tx_err(vif
);
1102 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1103 vif
->tx
.req_cons
= ++cons
;
1104 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1109 static int xenvif_set_skb_gso(struct xenvif
*vif
,
1110 struct sk_buff
*skb
,
1111 struct xen_netif_extra_info
*gso
)
1113 if (!gso
->u
.gso
.size
) {
1114 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1115 xenvif_fatal_tx_err(vif
);
1119 switch (gso
->u
.gso
.type
) {
1120 case XEN_NETIF_GSO_TYPE_TCPV4
:
1121 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1123 case XEN_NETIF_GSO_TYPE_TCPV6
:
1124 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV6
;
1127 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1128 xenvif_fatal_tx_err(vif
);
1132 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1133 /* gso_segs will be calculated later */
1138 static int checksum_setup(struct xenvif
*vif
, struct sk_buff
*skb
)
1140 bool recalculate_partial_csum
= false;
1142 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1143 * peers can fail to set NETRXF_csum_blank when sending a GSO
1144 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1145 * recalculate the partial checksum.
1147 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1148 vif
->rx_gso_checksum_fixup
++;
1149 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1150 recalculate_partial_csum
= true;
1153 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1154 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1157 return skb_checksum_setup(skb
, recalculate_partial_csum
);
1160 static bool tx_credit_exceeded(struct xenvif
*vif
, unsigned size
)
1162 u64 now
= get_jiffies_64();
1163 u64 next_credit
= vif
->credit_window_start
+
1164 msecs_to_jiffies(vif
->credit_usec
/ 1000);
1166 /* Timer could already be pending in rare cases. */
1167 if (timer_pending(&vif
->credit_timeout
))
1170 /* Passed the point where we can replenish credit? */
1171 if (time_after_eq64(now
, next_credit
)) {
1172 vif
->credit_window_start
= now
;
1176 /* Still too big to send right now? Set a callback. */
1177 if (size
> vif
->remaining_credit
) {
1178 vif
->credit_timeout
.data
=
1180 vif
->credit_timeout
.function
=
1182 mod_timer(&vif
->credit_timeout
,
1184 vif
->credit_window_start
= next_credit
;
1192 static void xenvif_tx_build_gops(struct xenvif
*vif
,
1197 struct gnttab_map_grant_ref
*gop
= vif
->tx_map_ops
, *request_gop
;
1198 struct sk_buff
*skb
;
1201 while (skb_queue_len(&vif
->tx_queue
) < budget
) {
1202 struct xen_netif_tx_request txreq
;
1203 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1204 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1208 unsigned int data_len
;
1209 pending_ring_idx_t index
;
1211 if (vif
->tx
.sring
->req_prod
- vif
->tx
.req_cons
>
1212 XEN_NETIF_TX_RING_SIZE
) {
1213 netdev_err(vif
->dev
,
1214 "Impossible number of requests. "
1215 "req_prod %d, req_cons %d, size %ld\n",
1216 vif
->tx
.sring
->req_prod
, vif
->tx
.req_cons
,
1217 XEN_NETIF_TX_RING_SIZE
);
1218 xenvif_fatal_tx_err(vif
);
1222 work_to_do
= RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
);
1226 idx
= vif
->tx
.req_cons
;
1227 rmb(); /* Ensure that we see the request before we copy it. */
1228 memcpy(&txreq
, RING_GET_REQUEST(&vif
->tx
, idx
), sizeof(txreq
));
1230 /* Credit-based scheduling. */
1231 if (txreq
.size
> vif
->remaining_credit
&&
1232 tx_credit_exceeded(vif
, txreq
.size
))
1235 vif
->remaining_credit
-= txreq
.size
;
1238 vif
->tx
.req_cons
= ++idx
;
1240 memset(extras
, 0, sizeof(extras
));
1241 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1242 work_to_do
= xenvif_get_extras(vif
, extras
,
1244 idx
= vif
->tx
.req_cons
;
1245 if (unlikely(work_to_do
< 0))
1249 ret
= xenvif_count_requests(vif
, &txreq
, txfrags
, work_to_do
);
1250 if (unlikely(ret
< 0))
1255 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1256 netdev_dbg(vif
->dev
,
1257 "Bad packet size: %d\n", txreq
.size
);
1258 xenvif_tx_err(vif
, &txreq
, idx
);
1262 /* No crossing a page as the payload mustn't fragment. */
1263 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1264 netdev_err(vif
->dev
,
1265 "txreq.offset: %x, size: %u, end: %lu\n",
1266 txreq
.offset
, txreq
.size
,
1267 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1268 xenvif_fatal_tx_err(vif
);
1272 index
= pending_index(vif
->pending_cons
);
1273 pending_idx
= vif
->pending_ring
[index
];
1275 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1276 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1277 PKT_PROT_LEN
: txreq
.size
;
1279 skb
= xenvif_alloc_skb(data_len
);
1280 if (unlikely(skb
== NULL
)) {
1281 netdev_dbg(vif
->dev
,
1282 "Can't allocate a skb in start_xmit.\n");
1283 xenvif_tx_err(vif
, &txreq
, idx
);
1287 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1288 struct xen_netif_extra_info
*gso
;
1289 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1291 if (xenvif_set_skb_gso(vif
, skb
, gso
)) {
1292 /* Failure in xenvif_set_skb_gso is fatal. */
1298 XENVIF_TX_CB(skb
)->pending_idx
= pending_idx
;
1300 __skb_put(skb
, data_len
);
1301 vif
->tx_copy_ops
[*copy_ops
].source
.u
.ref
= txreq
.gref
;
1302 vif
->tx_copy_ops
[*copy_ops
].source
.domid
= vif
->domid
;
1303 vif
->tx_copy_ops
[*copy_ops
].source
.offset
= txreq
.offset
;
1305 vif
->tx_copy_ops
[*copy_ops
].dest
.u
.gmfn
=
1306 virt_to_mfn(skb
->data
);
1307 vif
->tx_copy_ops
[*copy_ops
].dest
.domid
= DOMID_SELF
;
1308 vif
->tx_copy_ops
[*copy_ops
].dest
.offset
=
1309 offset_in_page(skb
->data
);
1311 vif
->tx_copy_ops
[*copy_ops
].len
= data_len
;
1312 vif
->tx_copy_ops
[*copy_ops
].flags
= GNTCOPY_source_gref
;
1316 skb_shinfo(skb
)->nr_frags
= ret
;
1317 if (data_len
< txreq
.size
) {
1318 skb_shinfo(skb
)->nr_frags
++;
1319 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1321 xenvif_tx_create_map_op(vif
, pending_idx
, &txreq
, gop
);
1324 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1325 INVALID_PENDING_IDX
);
1326 memcpy(&vif
->pending_tx_info
[pending_idx
].req
, &txreq
,
1330 vif
->pending_cons
++;
1332 request_gop
= xenvif_get_requests(vif
, skb
, txfrags
, gop
);
1333 if (request_gop
== NULL
) {
1335 xenvif_tx_err(vif
, &txreq
, idx
);
1340 __skb_queue_tail(&vif
->tx_queue
, skb
);
1342 vif
->tx
.req_cons
= idx
;
1344 if (((gop
-vif
->tx_map_ops
) >= ARRAY_SIZE(vif
->tx_map_ops
)) ||
1345 (*copy_ops
>= ARRAY_SIZE(vif
->tx_copy_ops
)))
1349 (*map_ops
) = gop
- vif
->tx_map_ops
;
1353 /* Consolidate skb with a frag_list into a brand new one with local pages on
1354 * frags. Returns 0 or -ENOMEM if can't allocate new pages.
1356 static int xenvif_handle_frag_list(struct xenvif
*vif
, struct sk_buff
*skb
)
1358 unsigned int offset
= skb_headlen(skb
);
1359 skb_frag_t frags
[MAX_SKB_FRAGS
];
1361 struct ubuf_info
*uarg
;
1362 struct sk_buff
*nskb
= skb_shinfo(skb
)->frag_list
;
1364 vif
->tx_zerocopy_sent
+= 2;
1365 vif
->tx_frag_overflow
++;
1367 xenvif_fill_frags(vif
, nskb
);
1368 /* Subtract frags size, we will correct it later */
1369 skb
->truesize
-= skb
->data_len
;
1370 skb
->len
+= nskb
->len
;
1371 skb
->data_len
+= nskb
->len
;
1373 /* create a brand new frags array and coalesce there */
1374 for (i
= 0; offset
< skb
->len
; i
++) {
1378 BUG_ON(i
>= MAX_SKB_FRAGS
);
1379 page
= alloc_page(GFP_ATOMIC
|__GFP_COLD
);
1382 skb
->truesize
+= skb
->data_len
;
1383 for (j
= 0; j
< i
; j
++)
1384 put_page(frags
[j
].page
.p
);
1388 if (offset
+ PAGE_SIZE
< skb
->len
)
1391 len
= skb
->len
- offset
;
1392 if (skb_copy_bits(skb
, offset
, page_address(page
), len
))
1396 frags
[i
].page
.p
= page
;
1397 frags
[i
].page_offset
= 0;
1398 skb_frag_size_set(&frags
[i
], len
);
1400 /* swap out with old one */
1401 memcpy(skb_shinfo(skb
)->frags
,
1403 i
* sizeof(skb_frag_t
));
1404 skb_shinfo(skb
)->nr_frags
= i
;
1405 skb
->truesize
+= i
* PAGE_SIZE
;
1407 /* remove traces of mapped pages and frag_list */
1408 skb_frag_list_init(skb
);
1409 uarg
= skb_shinfo(skb
)->destructor_arg
;
1410 uarg
->callback(uarg
, true);
1411 skb_shinfo(skb
)->destructor_arg
= NULL
;
1413 skb_shinfo(nskb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1419 static int xenvif_tx_submit(struct xenvif
*vif
)
1421 struct gnttab_map_grant_ref
*gop_map
= vif
->tx_map_ops
;
1422 struct gnttab_copy
*gop_copy
= vif
->tx_copy_ops
;
1423 struct sk_buff
*skb
;
1426 while ((skb
= __skb_dequeue(&vif
->tx_queue
)) != NULL
) {
1427 struct xen_netif_tx_request
*txp
;
1431 pending_idx
= XENVIF_TX_CB(skb
)->pending_idx
;
1432 txp
= &vif
->pending_tx_info
[pending_idx
].req
;
1434 /* Check the remap error code. */
1435 if (unlikely(xenvif_tx_check_gop(vif
, skb
, &gop_map
, &gop_copy
))) {
1436 skb_shinfo(skb
)->nr_frags
= 0;
1441 data_len
= skb
->len
;
1442 callback_param(vif
, pending_idx
).ctx
= NULL
;
1443 if (data_len
< txp
->size
) {
1444 /* Append the packet payload as a fragment. */
1445 txp
->offset
+= data_len
;
1446 txp
->size
-= data_len
;
1448 /* Schedule a response immediately. */
1449 xenvif_idx_release(vif
, pending_idx
,
1450 XEN_NETIF_RSP_OKAY
);
1453 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1454 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1455 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1456 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1458 xenvif_fill_frags(vif
, skb
);
1460 if (unlikely(skb_has_frag_list(skb
))) {
1461 if (xenvif_handle_frag_list(vif
, skb
)) {
1462 if (net_ratelimit())
1463 netdev_err(vif
->dev
,
1464 "Not enough memory to consolidate frag_list!\n");
1465 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1471 if (skb_is_nonlinear(skb
) && skb_headlen(skb
) < PKT_PROT_LEN
) {
1472 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1473 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1476 skb
->dev
= vif
->dev
;
1477 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1478 skb_reset_network_header(skb
);
1480 if (checksum_setup(vif
, skb
)) {
1481 netdev_dbg(vif
->dev
,
1482 "Can't setup checksum in net_tx_action\n");
1483 /* We have to set this flag to trigger the callback */
1484 if (skb_shinfo(skb
)->destructor_arg
)
1485 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1490 skb_probe_transport_header(skb
, 0);
1492 /* If the packet is GSO then we will have just set up the
1493 * transport header offset in checksum_setup so it's now
1494 * straightforward to calculate gso_segs.
1496 if (skb_is_gso(skb
)) {
1497 int mss
= skb_shinfo(skb
)->gso_size
;
1498 int hdrlen
= skb_transport_header(skb
) -
1499 skb_mac_header(skb
) +
1502 skb_shinfo(skb
)->gso_segs
=
1503 DIV_ROUND_UP(skb
->len
- hdrlen
, mss
);
1506 vif
->dev
->stats
.rx_bytes
+= skb
->len
;
1507 vif
->dev
->stats
.rx_packets
++;
1511 /* Set this flag right before netif_receive_skb, otherwise
1512 * someone might think this packet already left netback, and
1513 * do a skb_copy_ubufs while we are still in control of the
1514 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1516 if (skb_shinfo(skb
)->destructor_arg
) {
1517 skb_shinfo(skb
)->tx_flags
|= SKBTX_DEV_ZEROCOPY
;
1518 vif
->tx_zerocopy_sent
++;
1521 netif_receive_skb(skb
);
1527 void xenvif_zerocopy_callback(struct ubuf_info
*ubuf
, bool zerocopy_success
)
1529 unsigned long flags
;
1530 pending_ring_idx_t index
;
1531 struct xenvif
*vif
= ubuf_to_vif(ubuf
);
1533 /* This is the only place where we grab this lock, to protect callbacks
1536 spin_lock_irqsave(&vif
->callback_lock
, flags
);
1538 u16 pending_idx
= ubuf
->desc
;
1539 ubuf
= (struct ubuf_info
*) ubuf
->ctx
;
1540 BUG_ON(vif
->dealloc_prod
- vif
->dealloc_cons
>=
1542 index
= pending_index(vif
->dealloc_prod
);
1543 vif
->dealloc_ring
[index
] = pending_idx
;
1544 /* Sync with xenvif_tx_dealloc_action:
1545 * insert idx then incr producer.
1548 vif
->dealloc_prod
++;
1550 wake_up(&vif
->dealloc_wq
);
1551 spin_unlock_irqrestore(&vif
->callback_lock
, flags
);
1553 if (likely(zerocopy_success
))
1554 vif
->tx_zerocopy_success
++;
1556 vif
->tx_zerocopy_fail
++;
1559 static inline void xenvif_tx_dealloc_action(struct xenvif
*vif
)
1561 struct gnttab_unmap_grant_ref
*gop
;
1562 pending_ring_idx_t dc
, dp
;
1563 u16 pending_idx
, pending_idx_release
[MAX_PENDING_REQS
];
1566 dc
= vif
->dealloc_cons
;
1567 gop
= vif
->tx_unmap_ops
;
1569 /* Free up any grants we have finished using */
1571 dp
= vif
->dealloc_prod
;
1573 /* Ensure we see all indices enqueued by all
1574 * xenvif_zerocopy_callback().
1579 BUG_ON(gop
- vif
->tx_unmap_ops
> MAX_PENDING_REQS
);
1581 vif
->dealloc_ring
[pending_index(dc
++)];
1583 pending_idx_release
[gop
-vif
->tx_unmap_ops
] =
1585 vif
->pages_to_unmap
[gop
-vif
->tx_unmap_ops
] =
1586 vif
->mmap_pages
[pending_idx
];
1587 gnttab_set_unmap_op(gop
,
1588 idx_to_kaddr(vif
, pending_idx
),
1590 vif
->grant_tx_handle
[pending_idx
]);
1591 xenvif_grant_handle_reset(vif
, pending_idx
);
1595 } while (dp
!= vif
->dealloc_prod
);
1597 vif
->dealloc_cons
= dc
;
1599 if (gop
- vif
->tx_unmap_ops
> 0) {
1601 ret
= gnttab_unmap_refs(vif
->tx_unmap_ops
,
1603 vif
->pages_to_unmap
,
1604 gop
- vif
->tx_unmap_ops
);
1606 netdev_err(vif
->dev
, "Unmap fail: nr_ops %tx ret %d\n",
1607 gop
- vif
->tx_unmap_ops
, ret
);
1608 for (i
= 0; i
< gop
- vif
->tx_unmap_ops
; ++i
) {
1609 if (gop
[i
].status
!= GNTST_okay
)
1610 netdev_err(vif
->dev
,
1611 " host_addr: %llx handle: %x status: %d\n",
1620 for (i
= 0; i
< gop
- vif
->tx_unmap_ops
; ++i
)
1621 xenvif_idx_release(vif
, pending_idx_release
[i
],
1622 XEN_NETIF_RSP_OKAY
);
1626 /* Called after netfront has transmitted */
1627 int xenvif_tx_action(struct xenvif
*vif
, int budget
)
1629 unsigned nr_mops
, nr_cops
= 0;
1632 if (unlikely(!tx_work_todo(vif
)))
1635 xenvif_tx_build_gops(vif
, budget
, &nr_cops
, &nr_mops
);
1640 gnttab_batch_copy(vif
->tx_copy_ops
, nr_cops
);
1642 ret
= gnttab_map_refs(vif
->tx_map_ops
,
1649 work_done
= xenvif_tx_submit(vif
);
1654 static void xenvif_idx_release(struct xenvif
*vif
, u16 pending_idx
,
1657 struct pending_tx_info
*pending_tx_info
;
1658 pending_ring_idx_t index
;
1659 unsigned long flags
;
1661 pending_tx_info
= &vif
->pending_tx_info
[pending_idx
];
1662 spin_lock_irqsave(&vif
->response_lock
, flags
);
1663 make_tx_response(vif
, &pending_tx_info
->req
, status
);
1664 index
= pending_index(vif
->pending_prod
);
1665 vif
->pending_ring
[index
] = pending_idx
;
1666 /* TX shouldn't use the index before we give it back here */
1668 vif
->pending_prod
++;
1669 spin_unlock_irqrestore(&vif
->response_lock
, flags
);
1673 static void make_tx_response(struct xenvif
*vif
,
1674 struct xen_netif_tx_request
*txp
,
1677 RING_IDX i
= vif
->tx
.rsp_prod_pvt
;
1678 struct xen_netif_tx_response
*resp
;
1681 resp
= RING_GET_RESPONSE(&vif
->tx
, i
);
1685 if (txp
->flags
& XEN_NETTXF_extra_info
)
1686 RING_GET_RESPONSE(&vif
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1688 vif
->tx
.rsp_prod_pvt
= ++i
;
1689 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->tx
, notify
);
1691 notify_remote_via_irq(vif
->tx_irq
);
1694 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
1701 RING_IDX i
= vif
->rx
.rsp_prod_pvt
;
1702 struct xen_netif_rx_response
*resp
;
1704 resp
= RING_GET_RESPONSE(&vif
->rx
, i
);
1705 resp
->offset
= offset
;
1706 resp
->flags
= flags
;
1708 resp
->status
= (s16
)size
;
1710 resp
->status
= (s16
)st
;
1712 vif
->rx
.rsp_prod_pvt
= ++i
;
1717 void xenvif_idx_unmap(struct xenvif
*vif
, u16 pending_idx
)
1720 struct gnttab_unmap_grant_ref tx_unmap_op
;
1722 gnttab_set_unmap_op(&tx_unmap_op
,
1723 idx_to_kaddr(vif
, pending_idx
),
1725 vif
->grant_tx_handle
[pending_idx
]);
1726 xenvif_grant_handle_reset(vif
, pending_idx
);
1728 ret
= gnttab_unmap_refs(&tx_unmap_op
, NULL
,
1729 &vif
->mmap_pages
[pending_idx
], 1);
1731 netdev_err(vif
->dev
,
1732 "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: %x status: %d\n",
1735 tx_unmap_op
.host_addr
,
1737 tx_unmap_op
.status
);
1741 xenvif_idx_release(vif
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1744 static inline int rx_work_todo(struct xenvif
*vif
)
1746 return (!skb_queue_empty(&vif
->rx_queue
) &&
1747 xenvif_rx_ring_slots_available(vif
, vif
->rx_last_skb_slots
)) ||
1748 vif
->rx_queue_purge
;
1751 static inline int tx_work_todo(struct xenvif
*vif
)
1754 if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif
->tx
)))
1760 static inline bool tx_dealloc_work_todo(struct xenvif
*vif
)
1762 return vif
->dealloc_cons
!= vif
->dealloc_prod
;
1765 void xenvif_unmap_frontend_rings(struct xenvif
*vif
)
1768 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1771 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1775 int xenvif_map_frontend_rings(struct xenvif
*vif
,
1776 grant_ref_t tx_ring_ref
,
1777 grant_ref_t rx_ring_ref
)
1780 struct xen_netif_tx_sring
*txs
;
1781 struct xen_netif_rx_sring
*rxs
;
1785 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1786 tx_ring_ref
, &addr
);
1790 txs
= (struct xen_netif_tx_sring
*)addr
;
1791 BACK_RING_INIT(&vif
->tx
, txs
, PAGE_SIZE
);
1793 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1794 rx_ring_ref
, &addr
);
1798 rxs
= (struct xen_netif_rx_sring
*)addr
;
1799 BACK_RING_INIT(&vif
->rx
, rxs
, PAGE_SIZE
);
1804 xenvif_unmap_frontend_rings(vif
);
1808 void xenvif_stop_queue(struct xenvif
*vif
)
1810 if (!vif
->can_queue
)
1813 netif_stop_queue(vif
->dev
);
1816 static void xenvif_start_queue(struct xenvif
*vif
)
1818 if (xenvif_schedulable(vif
))
1819 netif_wake_queue(vif
->dev
);
1822 int xenvif_kthread_guest_rx(void *data
)
1824 struct xenvif
*vif
= data
;
1825 struct sk_buff
*skb
;
1827 while (!kthread_should_stop()) {
1828 wait_event_interruptible(vif
->wq
,
1829 rx_work_todo(vif
) ||
1831 kthread_should_stop());
1833 /* This frontend is found to be rogue, disable it in
1834 * kthread context. Currently this is only set when
1835 * netback finds out frontend sends malformed packet,
1836 * but we cannot disable the interface in softirq
1837 * context so we defer it here.
1839 if (unlikely(vif
->disabled
&& netif_carrier_ok(vif
->dev
)))
1840 xenvif_carrier_off(vif
);
1842 if (kthread_should_stop())
1845 if (vif
->rx_queue_purge
) {
1846 skb_queue_purge(&vif
->rx_queue
);
1847 vif
->rx_queue_purge
= false;
1850 if (!skb_queue_empty(&vif
->rx_queue
))
1851 xenvif_rx_action(vif
);
1853 if (skb_queue_empty(&vif
->rx_queue
) &&
1854 netif_queue_stopped(vif
->dev
)) {
1855 del_timer_sync(&vif
->wake_queue
);
1856 xenvif_start_queue(vif
);
1862 /* Bin any remaining skbs */
1863 while ((skb
= skb_dequeue(&vif
->rx_queue
)) != NULL
)
1869 int xenvif_dealloc_kthread(void *data
)
1871 struct xenvif
*vif
= data
;
1873 while (!kthread_should_stop()) {
1874 wait_event_interruptible(vif
->dealloc_wq
,
1875 tx_dealloc_work_todo(vif
) ||
1876 kthread_should_stop());
1877 if (kthread_should_stop())
1880 xenvif_tx_dealloc_action(vif
);
1884 /* Unmap anything remaining*/
1885 if (tx_dealloc_work_todo(vif
))
1886 xenvif_tx_dealloc_action(vif
);
1891 static int __init
netback_init(void)
1898 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
1899 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1900 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
1901 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
1904 rc
= xenvif_xenbus_init();
1908 rx_drain_timeout_jiffies
= msecs_to_jiffies(rx_drain_timeout_msecs
);
1916 module_init(netback_init
);
1918 static void __exit
netback_fini(void)
1920 xenvif_xenbus_fini();
1922 module_exit(netback_fini
);
1924 MODULE_LICENSE("Dual BSD/GPL");
1925 MODULE_ALIAS("xen-backend:vif");