Commit | Line | Data |
---|---|---|
86641094 | 1 | /* Intel(R) Ethernet Switch Host Interface Driver |
9de6a1a6 | 2 | * Copyright(c) 2013 - 2016 Intel Corporation. |
b3890e30 AD |
3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | |
5 | * under the terms and conditions of the GNU General Public License, | |
6 | * version 2, as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope it will be useful, but WITHOUT | |
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
11 | * more details. | |
12 | * | |
13 | * The full GNU General Public License is included in this distribution in | |
14 | * the file called "COPYING". | |
15 | * | |
16 | * Contact Information: | |
17 | * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> | |
18 | * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 | |
19 | */ | |
20 | ||
21 | #include <linux/types.h> | |
22 | #include <linux/module.h> | |
23 | #include <net/ipv6.h> | |
24 | #include <net/ip.h> | |
25 | #include <net/tcp.h> | |
26 | #include <linux/if_macvlan.h> | |
b101c962 | 27 | #include <linux/prefetch.h> |
b3890e30 AD |
28 | |
29 | #include "fm10k.h" | |
30 | ||
5264cc63 | 31 | #define DRV_VERSION "0.21.2-k" |
2d0f76be | 32 | #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" |
b3890e30 AD |
33 | const char fm10k_driver_version[] = DRV_VERSION; |
34 | char fm10k_driver_name[] = "fm10k"; | |
2d0f76be | 35 | static const char fm10k_driver_string[] = DRV_SUMMARY; |
b3890e30 | 36 | static const char fm10k_copyright[] = |
86641094 | 37 | "Copyright (c) 2013 - 2016 Intel Corporation."; |
b3890e30 AD |
38 | |
39 | MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); | |
2d0f76be | 40 | MODULE_DESCRIPTION(DRV_SUMMARY); |
b3890e30 AD |
41 | MODULE_LICENSE("GPL"); |
42 | MODULE_VERSION(DRV_VERSION); | |
43 | ||
b382bb1b | 44 | /* single workqueue for entire fm10k driver */ |
07146e2e | 45 | struct workqueue_struct *fm10k_workqueue; |
b382bb1b | 46 | |
6d2ce900 AD |
47 | /** |
48 | * fm10k_init_module - Driver Registration Routine | |
b3890e30 AD |
49 | * |
50 | * fm10k_init_module is the first routine called when the driver is | |
51 | * loaded. All it does is register with the PCI subsystem. | |
52 | **/ | |
53 | static int __init fm10k_init_module(void) | |
54 | { | |
55 | pr_info("%s - version %s\n", fm10k_driver_string, fm10k_driver_version); | |
56 | pr_info("%s\n", fm10k_copyright); | |
57 | ||
b382bb1b | 58 | /* create driver workqueue */ |
4aa0bd54 | 59 | fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, fm10k_driver_name); |
b382bb1b | 60 | |
7461fd91 AD |
61 | fm10k_dbg_init(); |
62 | ||
b3890e30 AD |
63 | return fm10k_register_pci_driver(); |
64 | } | |
65 | module_init(fm10k_init_module); | |
66 | ||
67 | /** | |
68 | * fm10k_exit_module - Driver Exit Cleanup Routine | |
69 | * | |
70 | * fm10k_exit_module is called just before the driver is removed | |
71 | * from memory. | |
72 | **/ | |
73 | static void __exit fm10k_exit_module(void) | |
74 | { | |
75 | fm10k_unregister_pci_driver(); | |
7461fd91 AD |
76 | |
77 | fm10k_dbg_exit(); | |
b382bb1b JK |
78 | |
79 | /* destroy driver workqueue */ | |
b382bb1b | 80 | destroy_workqueue(fm10k_workqueue); |
b3890e30 AD |
81 | } |
82 | module_exit(fm10k_exit_module); | |
18283cad | 83 | |
b101c962 AD |
84 | static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring, |
85 | struct fm10k_rx_buffer *bi) | |
86 | { | |
87 | struct page *page = bi->page; | |
88 | dma_addr_t dma; | |
89 | ||
90 | /* Only page will be NULL if buffer was consumed */ | |
91 | if (likely(page)) | |
92 | return true; | |
93 | ||
94 | /* alloc new page for storage */ | |
42b17f09 | 95 | page = dev_alloc_page(); |
b101c962 AD |
96 | if (unlikely(!page)) { |
97 | rx_ring->rx_stats.alloc_failed++; | |
98 | return false; | |
99 | } | |
100 | ||
101 | /* map page for use */ | |
102 | dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); | |
103 | ||
104 | /* if mapping failed free memory back to system since | |
105 | * there isn't much point in holding memory we can't use | |
106 | */ | |
107 | if (dma_mapping_error(rx_ring->dev, dma)) { | |
108 | __free_page(page); | |
b101c962 AD |
109 | |
110 | rx_ring->rx_stats.alloc_failed++; | |
111 | return false; | |
112 | } | |
113 | ||
114 | bi->dma = dma; | |
115 | bi->page = page; | |
116 | bi->page_offset = 0; | |
117 | ||
118 | return true; | |
119 | } | |
120 | ||
121 | /** | |
122 | * fm10k_alloc_rx_buffers - Replace used receive buffers | |
123 | * @rx_ring: ring to place buffers on | |
124 | * @cleaned_count: number of buffers to replace | |
125 | **/ | |
126 | void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count) | |
127 | { | |
128 | union fm10k_rx_desc *rx_desc; | |
129 | struct fm10k_rx_buffer *bi; | |
130 | u16 i = rx_ring->next_to_use; | |
131 | ||
132 | /* nothing to do */ | |
133 | if (!cleaned_count) | |
134 | return; | |
135 | ||
136 | rx_desc = FM10K_RX_DESC(rx_ring, i); | |
137 | bi = &rx_ring->rx_buffer[i]; | |
138 | i -= rx_ring->count; | |
139 | ||
140 | do { | |
141 | if (!fm10k_alloc_mapped_page(rx_ring, bi)) | |
142 | break; | |
143 | ||
144 | /* Refresh the desc even if buffer_addrs didn't change | |
145 | * because each write-back erases this info. | |
146 | */ | |
147 | rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); | |
148 | ||
149 | rx_desc++; | |
150 | bi++; | |
151 | i++; | |
152 | if (unlikely(!i)) { | |
153 | rx_desc = FM10K_RX_DESC(rx_ring, 0); | |
154 | bi = rx_ring->rx_buffer; | |
155 | i -= rx_ring->count; | |
156 | } | |
157 | ||
ba5b8dcd AD |
158 | /* clear the status bits for the next_to_use descriptor */ |
159 | rx_desc->d.staterr = 0; | |
b101c962 AD |
160 | |
161 | cleaned_count--; | |
162 | } while (cleaned_count); | |
163 | ||
164 | i += rx_ring->count; | |
165 | ||
166 | if (rx_ring->next_to_use != i) { | |
167 | /* record the next descriptor to use */ | |
168 | rx_ring->next_to_use = i; | |
169 | ||
170 | /* update next to alloc since we have filled the ring */ | |
171 | rx_ring->next_to_alloc = i; | |
172 | ||
173 | /* Force memory writes to complete before letting h/w | |
174 | * know there are new descriptors to fetch. (Only | |
175 | * applicable for weak-ordered memory model archs, | |
176 | * such as IA-64). | |
177 | */ | |
178 | wmb(); | |
179 | ||
180 | /* notify hardware of new descriptors */ | |
181 | writel(i, rx_ring->tail); | |
182 | } | |
183 | } | |
184 | ||
185 | /** | |
186 | * fm10k_reuse_rx_page - page flip buffer and store it back on the ring | |
187 | * @rx_ring: rx descriptor ring to store buffers on | |
188 | * @old_buff: donor buffer to have page reused | |
189 | * | |
190 | * Synchronizes page for reuse by the interface | |
191 | **/ | |
192 | static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, | |
193 | struct fm10k_rx_buffer *old_buff) | |
194 | { | |
195 | struct fm10k_rx_buffer *new_buff; | |
196 | u16 nta = rx_ring->next_to_alloc; | |
197 | ||
198 | new_buff = &rx_ring->rx_buffer[nta]; | |
199 | ||
200 | /* update, and store next to alloc */ | |
201 | nta++; | |
202 | rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; | |
203 | ||
204 | /* transfer page from old buffer to new buffer */ | |
ba5b8dcd | 205 | *new_buff = *old_buff; |
b101c962 AD |
206 | |
207 | /* sync the buffer for use by the device */ | |
208 | dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, | |
209 | old_buff->page_offset, | |
210 | FM10K_RX_BUFSZ, | |
211 | DMA_FROM_DEVICE); | |
212 | } | |
213 | ||
ba5b8dcd AD |
214 | static inline bool fm10k_page_is_reserved(struct page *page) |
215 | { | |
2f064f34 | 216 | return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); |
ba5b8dcd AD |
217 | } |
218 | ||
b101c962 AD |
219 | static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, |
220 | struct page *page, | |
de445199 | 221 | unsigned int __maybe_unused truesize) |
b101c962 AD |
222 | { |
223 | /* avoid re-using remote pages */ | |
ba5b8dcd | 224 | if (unlikely(fm10k_page_is_reserved(page))) |
b101c962 AD |
225 | return false; |
226 | ||
227 | #if (PAGE_SIZE < 8192) | |
228 | /* if we are only owner of page we can reuse it */ | |
229 | if (unlikely(page_count(page) != 1)) | |
230 | return false; | |
231 | ||
232 | /* flip page offset to other buffer */ | |
233 | rx_buffer->page_offset ^= FM10K_RX_BUFSZ; | |
b101c962 AD |
234 | #else |
235 | /* move offset up to the next cache line */ | |
236 | rx_buffer->page_offset += truesize; | |
237 | ||
238 | if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ)) | |
239 | return false; | |
b101c962 AD |
240 | #endif |
241 | ||
ba5b8dcd AD |
242 | /* Even if we own the page, we are not allowed to use atomic_set() |
243 | * This would break get_page_unless_zero() users. | |
244 | */ | |
fe896d18 | 245 | page_ref_inc(page); |
ba5b8dcd | 246 | |
b101c962 AD |
247 | return true; |
248 | } | |
249 | ||
250 | /** | |
251 | * fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff | |
b101c962 AD |
252 | * @rx_buffer: buffer containing page to add |
253 | * @rx_desc: descriptor containing length of buffer written by hardware | |
254 | * @skb: sk_buff to place the data into | |
255 | * | |
256 | * This function will add the data contained in rx_buffer->page to the skb. | |
257 | * This is done either through a direct copy if the data in the buffer is | |
258 | * less than the skb header size, otherwise it will just attach the page as | |
259 | * a frag to the skb. | |
260 | * | |
261 | * The function will then update the page offset if necessary and return | |
262 | * true if the buffer can be reused by the interface. | |
263 | **/ | |
de445199 | 264 | static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer, |
b101c962 AD |
265 | union fm10k_rx_desc *rx_desc, |
266 | struct sk_buff *skb) | |
267 | { | |
268 | struct page *page = rx_buffer->page; | |
1a8782e5 | 269 | unsigned char *va = page_address(page) + rx_buffer->page_offset; |
b101c962 AD |
270 | unsigned int size = le16_to_cpu(rx_desc->w.length); |
271 | #if (PAGE_SIZE < 8192) | |
272 | unsigned int truesize = FM10K_RX_BUFSZ; | |
273 | #else | |
fb5677aa | 274 | unsigned int truesize = ALIGN(size, 512); |
b101c962 | 275 | #endif |
1a8782e5 | 276 | unsigned int pull_len; |
b101c962 | 277 | |
1a8782e5 AD |
278 | if (unlikely(skb_is_nonlinear(skb))) |
279 | goto add_tail_frag; | |
b101c962 | 280 | |
1a8782e5 | 281 | if (likely(size <= FM10K_RX_HDR_LEN)) { |
b101c962 AD |
282 | memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); |
283 | ||
ba5b8dcd AD |
284 | /* page is not reserved, we can reuse buffer as-is */ |
285 | if (likely(!fm10k_page_is_reserved(page))) | |
b101c962 AD |
286 | return true; |
287 | ||
288 | /* this page cannot be reused so discard it */ | |
ba5b8dcd | 289 | __free_page(page); |
b101c962 AD |
290 | return false; |
291 | } | |
292 | ||
1a8782e5 AD |
293 | /* we need the header to contain the greater of either ETH_HLEN or |
294 | * 60 bytes if the skb->len is less than 60 for skb_pad. | |
295 | */ | |
296 | pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN); | |
297 | ||
298 | /* align pull length to size of long to optimize memcpy performance */ | |
299 | memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); | |
300 | ||
301 | /* update all of the pointers */ | |
302 | va += pull_len; | |
303 | size -= pull_len; | |
304 | ||
305 | add_tail_frag: | |
b101c962 | 306 | skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, |
1a8782e5 | 307 | (unsigned long)va & ~PAGE_MASK, size, truesize); |
b101c962 AD |
308 | |
309 | return fm10k_can_reuse_rx_page(rx_buffer, page, truesize); | |
310 | } | |
311 | ||
312 | static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring, | |
313 | union fm10k_rx_desc *rx_desc, | |
314 | struct sk_buff *skb) | |
315 | { | |
316 | struct fm10k_rx_buffer *rx_buffer; | |
317 | struct page *page; | |
318 | ||
319 | rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean]; | |
b101c962 AD |
320 | page = rx_buffer->page; |
321 | prefetchw(page); | |
322 | ||
323 | if (likely(!skb)) { | |
324 | void *page_addr = page_address(page) + | |
325 | rx_buffer->page_offset; | |
326 | ||
327 | /* prefetch first cache line of first page */ | |
328 | prefetch(page_addr); | |
329 | #if L1_CACHE_BYTES < 128 | |
330 | prefetch(page_addr + L1_CACHE_BYTES); | |
331 | #endif | |
332 | ||
333 | /* allocate a skb to store the frags */ | |
67fd893e AD |
334 | skb = napi_alloc_skb(&rx_ring->q_vector->napi, |
335 | FM10K_RX_HDR_LEN); | |
b101c962 AD |
336 | if (unlikely(!skb)) { |
337 | rx_ring->rx_stats.alloc_failed++; | |
338 | return NULL; | |
339 | } | |
340 | ||
341 | /* we will be copying header into skb->data in | |
342 | * pskb_may_pull so it is in our interest to prefetch | |
343 | * it now to avoid a possible cache miss | |
344 | */ | |
345 | prefetchw(skb->data); | |
346 | } | |
347 | ||
348 | /* we are reusing so sync this buffer for CPU use */ | |
349 | dma_sync_single_range_for_cpu(rx_ring->dev, | |
350 | rx_buffer->dma, | |
351 | rx_buffer->page_offset, | |
352 | FM10K_RX_BUFSZ, | |
353 | DMA_FROM_DEVICE); | |
354 | ||
355 | /* pull page into skb */ | |
de445199 | 356 | if (fm10k_add_rx_frag(rx_buffer, rx_desc, skb)) { |
b101c962 AD |
357 | /* hand second half of page back to the ring */ |
358 | fm10k_reuse_rx_page(rx_ring, rx_buffer); | |
359 | } else { | |
360 | /* we are not reusing the buffer so unmap it */ | |
361 | dma_unmap_page(rx_ring->dev, rx_buffer->dma, | |
362 | PAGE_SIZE, DMA_FROM_DEVICE); | |
363 | } | |
364 | ||
365 | /* clear contents of rx_buffer */ | |
366 | rx_buffer->page = NULL; | |
367 | ||
368 | return skb; | |
369 | } | |
370 | ||
76a540d4 AD |
371 | static inline void fm10k_rx_checksum(struct fm10k_ring *ring, |
372 | union fm10k_rx_desc *rx_desc, | |
373 | struct sk_buff *skb) | |
374 | { | |
375 | skb_checksum_none_assert(skb); | |
376 | ||
377 | /* Rx checksum disabled via ethtool */ | |
378 | if (!(ring->netdev->features & NETIF_F_RXCSUM)) | |
379 | return; | |
380 | ||
381 | /* TCP/UDP checksum error bit is set */ | |
382 | if (fm10k_test_staterr(rx_desc, | |
383 | FM10K_RXD_STATUS_L4E | | |
384 | FM10K_RXD_STATUS_L4E2 | | |
385 | FM10K_RXD_STATUS_IPE | | |
386 | FM10K_RXD_STATUS_IPE2)) { | |
387 | ring->rx_stats.csum_err++; | |
388 | return; | |
389 | } | |
390 | ||
391 | /* It must be a TCP or UDP packet with a valid checksum */ | |
392 | if (fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS2)) | |
393 | skb->encapsulation = true; | |
394 | else if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS)) | |
395 | return; | |
396 | ||
397 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
80043f3b JK |
398 | |
399 | ring->rx_stats.csum_good++; | |
76a540d4 AD |
400 | } |
401 | ||
402 | #define FM10K_RSS_L4_TYPES_MASK \ | |
fcdb0a99 BA |
403 | (BIT(FM10K_RSSTYPE_IPV4_TCP) | \ |
404 | BIT(FM10K_RSSTYPE_IPV4_UDP) | \ | |
405 | BIT(FM10K_RSSTYPE_IPV6_TCP) | \ | |
406 | BIT(FM10K_RSSTYPE_IPV6_UDP)) | |
76a540d4 AD |
407 | |
408 | static inline void fm10k_rx_hash(struct fm10k_ring *ring, | |
409 | union fm10k_rx_desc *rx_desc, | |
410 | struct sk_buff *skb) | |
411 | { | |
412 | u16 rss_type; | |
413 | ||
414 | if (!(ring->netdev->features & NETIF_F_RXHASH)) | |
415 | return; | |
416 | ||
417 | rss_type = le16_to_cpu(rx_desc->w.pkt_info) & FM10K_RXD_RSSTYPE_MASK; | |
418 | if (!rss_type) | |
419 | return; | |
420 | ||
421 | skb_set_hash(skb, le32_to_cpu(rx_desc->d.rss), | |
fcdb0a99 | 422 | (BIT(rss_type) & FM10K_RSS_L4_TYPES_MASK) ? |
76a540d4 AD |
423 | PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); |
424 | } | |
425 | ||
5cd5e2e9 | 426 | static void fm10k_type_trans(struct fm10k_ring *rx_ring, |
de445199 | 427 | union fm10k_rx_desc __maybe_unused *rx_desc, |
5cd5e2e9 AD |
428 | struct sk_buff *skb) |
429 | { | |
430 | struct net_device *dev = rx_ring->netdev; | |
431 | struct fm10k_l2_accel *l2_accel = rcu_dereference_bh(rx_ring->l2_accel); | |
432 | ||
433 | /* check to see if DGLORT belongs to a MACVLAN */ | |
434 | if (l2_accel) { | |
435 | u16 idx = le16_to_cpu(FM10K_CB(skb)->fi.w.dglort) - 1; | |
436 | ||
437 | idx -= l2_accel->dglort; | |
438 | if (idx < l2_accel->size && l2_accel->macvlan[idx]) | |
439 | dev = l2_accel->macvlan[idx]; | |
440 | else | |
441 | l2_accel = NULL; | |
442 | } | |
443 | ||
444 | skb->protocol = eth_type_trans(skb, dev); | |
445 | ||
446 | if (!l2_accel) | |
447 | return; | |
448 | ||
449 | /* update MACVLAN statistics */ | |
450 | macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, 1, | |
451 | !!(rx_desc->w.hdr_info & | |
452 | cpu_to_le16(FM10K_RXD_HDR_INFO_XC_MASK))); | |
453 | } | |
454 | ||
b101c962 AD |
455 | /** |
456 | * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor | |
457 | * @rx_ring: rx descriptor ring packet is being transacted on | |
458 | * @rx_desc: pointer to the EOP Rx descriptor | |
459 | * @skb: pointer to current skb being populated | |
460 | * | |
461 | * This function checks the ring, descriptor, and packet information in | |
462 | * order to populate the hash, checksum, VLAN, timestamp, protocol, and | |
463 | * other fields within the skb. | |
464 | **/ | |
465 | static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring, | |
466 | union fm10k_rx_desc *rx_desc, | |
467 | struct sk_buff *skb) | |
468 | { | |
469 | unsigned int len = skb->len; | |
470 | ||
76a540d4 AD |
471 | fm10k_rx_hash(rx_ring, rx_desc, skb); |
472 | ||
473 | fm10k_rx_checksum(rx_ring, rx_desc, skb); | |
474 | ||
b101c962 AD |
475 | FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan; |
476 | ||
477 | skb_record_rx_queue(skb, rx_ring->queue_index); | |
478 | ||
479 | FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort; | |
480 | ||
481 | if (rx_desc->w.vlan) { | |
482 | u16 vid = le16_to_cpu(rx_desc->w.vlan); | |
483 | ||
e71c9318 | 484 | if ((vid & VLAN_VID_MASK) != rx_ring->vid) |
b101c962 | 485 | __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); |
e71c9318 JK |
486 | else if (vid & VLAN_PRIO_MASK) |
487 | __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), | |
488 | vid & VLAN_PRIO_MASK); | |
b101c962 AD |
489 | } |
490 | ||
5cd5e2e9 | 491 | fm10k_type_trans(rx_ring, rx_desc, skb); |
b101c962 AD |
492 | |
493 | return len; | |
494 | } | |
495 | ||
496 | /** | |
497 | * fm10k_is_non_eop - process handling of non-EOP buffers | |
498 | * @rx_ring: Rx ring being processed | |
499 | * @rx_desc: Rx descriptor for current buffer | |
500 | * | |
501 | * This function updates next to clean. If the buffer is an EOP buffer | |
502 | * this function exits returning false, otherwise it will place the | |
503 | * sk_buff in the next buffer to be chained and return true indicating | |
504 | * that this is in fact a non-EOP buffer. | |
505 | **/ | |
506 | static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring, | |
507 | union fm10k_rx_desc *rx_desc) | |
508 | { | |
509 | u32 ntc = rx_ring->next_to_clean + 1; | |
510 | ||
511 | /* fetch, update, and store next to clean */ | |
512 | ntc = (ntc < rx_ring->count) ? ntc : 0; | |
513 | rx_ring->next_to_clean = ntc; | |
514 | ||
515 | prefetch(FM10K_RX_DESC(rx_ring, ntc)); | |
516 | ||
517 | if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP))) | |
518 | return false; | |
519 | ||
520 | return true; | |
521 | } | |
522 | ||
b101c962 AD |
523 | /** |
524 | * fm10k_cleanup_headers - Correct corrupted or empty headers | |
525 | * @rx_ring: rx descriptor ring packet is being transacted on | |
526 | * @rx_desc: pointer to the EOP Rx descriptor | |
527 | * @skb: pointer to current skb being fixed | |
528 | * | |
529 | * Address the case where we are pulling data in on pages only | |
530 | * and as such no data is present in the skb header. | |
531 | * | |
532 | * In addition if skb is not at least 60 bytes we need to pad it so that | |
533 | * it is large enough to qualify as a valid Ethernet frame. | |
534 | * | |
535 | * Returns true if an error was encountered and skb was freed. | |
536 | **/ | |
537 | static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring, | |
538 | union fm10k_rx_desc *rx_desc, | |
539 | struct sk_buff *skb) | |
540 | { | |
541 | if (unlikely((fm10k_test_staterr(rx_desc, | |
542 | FM10K_RXD_STATUS_RXE)))) { | |
80043f3b JK |
543 | #define FM10K_TEST_RXD_BIT(rxd, bit) \ |
544 | ((rxd)->w.csum_err & cpu_to_le16(bit)) | |
545 | if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_ERROR)) | |
546 | rx_ring->rx_stats.switch_errors++; | |
547 | if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_NO_DESCRIPTOR)) | |
548 | rx_ring->rx_stats.drops++; | |
549 | if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_PP_ERROR)) | |
550 | rx_ring->rx_stats.pp_errors++; | |
551 | if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_READY)) | |
552 | rx_ring->rx_stats.link_errors++; | |
553 | if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_TOO_BIG)) | |
554 | rx_ring->rx_stats.length_errors++; | |
b101c962 AD |
555 | dev_kfree_skb_any(skb); |
556 | rx_ring->rx_stats.errors++; | |
557 | return true; | |
558 | } | |
559 | ||
a94d9e22 AD |
560 | /* if eth_skb_pad returns an error the skb was freed */ |
561 | if (eth_skb_pad(skb)) | |
562 | return true; | |
b101c962 AD |
563 | |
564 | return false; | |
565 | } | |
566 | ||
567 | /** | |
568 | * fm10k_receive_skb - helper function to handle rx indications | |
569 | * @q_vector: structure containing interrupt and ring information | |
570 | * @skb: packet to send up | |
571 | **/ | |
572 | static void fm10k_receive_skb(struct fm10k_q_vector *q_vector, | |
573 | struct sk_buff *skb) | |
574 | { | |
575 | napi_gro_receive(&q_vector->napi, skb); | |
576 | } | |
577 | ||
32b3e08f JB |
578 | static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, |
579 | struct fm10k_ring *rx_ring, | |
580 | int budget) | |
b101c962 AD |
581 | { |
582 | struct sk_buff *skb = rx_ring->skb; | |
583 | unsigned int total_bytes = 0, total_packets = 0; | |
584 | u16 cleaned_count = fm10k_desc_unused(rx_ring); | |
585 | ||
59486329 | 586 | while (likely(total_packets < budget)) { |
b101c962 AD |
587 | union fm10k_rx_desc *rx_desc; |
588 | ||
589 | /* return some buffers to hardware, one at a time is too slow */ | |
590 | if (cleaned_count >= FM10K_RX_BUFFER_WRITE) { | |
591 | fm10k_alloc_rx_buffers(rx_ring, cleaned_count); | |
592 | cleaned_count = 0; | |
593 | } | |
594 | ||
595 | rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean); | |
596 | ||
124b74c1 | 597 | if (!rx_desc->d.staterr) |
b101c962 AD |
598 | break; |
599 | ||
600 | /* This memory barrier is needed to keep us from reading | |
601 | * any other fields out of the rx_desc until we know the | |
124b74c1 | 602 | * descriptor has been written back |
b101c962 | 603 | */ |
124b74c1 | 604 | dma_rmb(); |
b101c962 AD |
605 | |
606 | /* retrieve a buffer from the ring */ | |
607 | skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb); | |
608 | ||
609 | /* exit if we failed to retrieve a buffer */ | |
610 | if (!skb) | |
611 | break; | |
612 | ||
613 | cleaned_count++; | |
614 | ||
615 | /* fetch next buffer in frame if non-eop */ | |
616 | if (fm10k_is_non_eop(rx_ring, rx_desc)) | |
617 | continue; | |
618 | ||
619 | /* verify the packet layout is correct */ | |
620 | if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) { | |
621 | skb = NULL; | |
622 | continue; | |
623 | } | |
624 | ||
625 | /* populate checksum, timestamp, VLAN, and protocol */ | |
626 | total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb); | |
627 | ||
628 | fm10k_receive_skb(q_vector, skb); | |
629 | ||
630 | /* reset skb pointer */ | |
631 | skb = NULL; | |
632 | ||
633 | /* update budget accounting */ | |
634 | total_packets++; | |
59486329 | 635 | } |
b101c962 AD |
636 | |
637 | /* place incomplete frames back on ring for completion */ | |
638 | rx_ring->skb = skb; | |
639 | ||
640 | u64_stats_update_begin(&rx_ring->syncp); | |
641 | rx_ring->stats.packets += total_packets; | |
642 | rx_ring->stats.bytes += total_bytes; | |
643 | u64_stats_update_end(&rx_ring->syncp); | |
644 | q_vector->rx.total_packets += total_packets; | |
645 | q_vector->rx.total_bytes += total_bytes; | |
646 | ||
32b3e08f | 647 | return total_packets; |
b101c962 AD |
648 | } |
649 | ||
76a540d4 AD |
650 | #define VXLAN_HLEN (sizeof(struct udphdr) + 8) |
651 | static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb) | |
652 | { | |
653 | struct fm10k_intfc *interface = netdev_priv(skb->dev); | |
f92e0e48 | 654 | struct fm10k_udp_port *vxlan_port; |
76a540d4 AD |
655 | |
656 | /* we can only offload a vxlan if we recognize it as such */ | |
657 | vxlan_port = list_first_entry_or_null(&interface->vxlan_port, | |
f92e0e48 | 658 | struct fm10k_udp_port, list); |
76a540d4 AD |
659 | |
660 | if (!vxlan_port) | |
661 | return NULL; | |
662 | if (vxlan_port->port != udp_hdr(skb)->dest) | |
663 | return NULL; | |
664 | ||
665 | /* return offset of udp_hdr plus 8 bytes for VXLAN header */ | |
666 | return (struct ethhdr *)(skb_transport_header(skb) + VXLAN_HLEN); | |
667 | } | |
668 | ||
669 | #define FM10K_NVGRE_RESERVED0_FLAGS htons(0x9FFF) | |
670 | #define NVGRE_TNI htons(0x2000) | |
671 | struct fm10k_nvgre_hdr { | |
672 | __be16 flags; | |
673 | __be16 proto; | |
674 | __be32 tni; | |
675 | }; | |
676 | ||
677 | static struct ethhdr *fm10k_gre_is_nvgre(struct sk_buff *skb) | |
678 | { | |
679 | struct fm10k_nvgre_hdr *nvgre_hdr; | |
680 | int hlen = ip_hdrlen(skb); | |
681 | ||
682 | /* currently only IPv4 is supported due to hlen above */ | |
683 | if (vlan_get_protocol(skb) != htons(ETH_P_IP)) | |
684 | return NULL; | |
685 | ||
686 | /* our transport header should be NVGRE */ | |
687 | nvgre_hdr = (struct fm10k_nvgre_hdr *)(skb_network_header(skb) + hlen); | |
688 | ||
689 | /* verify all reserved flags are 0 */ | |
690 | if (nvgre_hdr->flags & FM10K_NVGRE_RESERVED0_FLAGS) | |
691 | return NULL; | |
692 | ||
76a540d4 AD |
693 | /* report start of ethernet header */ |
694 | if (nvgre_hdr->flags & NVGRE_TNI) | |
695 | return (struct ethhdr *)(nvgre_hdr + 1); | |
696 | ||
697 | return (struct ethhdr *)(&nvgre_hdr->tni); | |
698 | } | |
699 | ||
5bf33dc6 | 700 | __be16 fm10k_tx_encap_offload(struct sk_buff *skb) |
76a540d4 | 701 | { |
8c1a90aa | 702 | u8 l4_hdr = 0, inner_l4_hdr = 0, inner_l4_hlen; |
76a540d4 | 703 | struct ethhdr *eth_hdr; |
76a540d4 | 704 | |
8c1a90aa MV |
705 | if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || |
706 | skb->inner_protocol != htons(ETH_P_TEB)) | |
b66b6d9f JS |
707 | return 0; |
708 | ||
76a540d4 AD |
709 | switch (vlan_get_protocol(skb)) { |
710 | case htons(ETH_P_IP): | |
711 | l4_hdr = ip_hdr(skb)->protocol; | |
712 | break; | |
713 | case htons(ETH_P_IPV6): | |
714 | l4_hdr = ipv6_hdr(skb)->nexthdr; | |
715 | break; | |
716 | default: | |
717 | return 0; | |
718 | } | |
719 | ||
720 | switch (l4_hdr) { | |
721 | case IPPROTO_UDP: | |
722 | eth_hdr = fm10k_port_is_vxlan(skb); | |
723 | break; | |
724 | case IPPROTO_GRE: | |
725 | eth_hdr = fm10k_gre_is_nvgre(skb); | |
726 | break; | |
727 | default: | |
728 | return 0; | |
729 | } | |
730 | ||
731 | if (!eth_hdr) | |
732 | return 0; | |
733 | ||
734 | switch (eth_hdr->h_proto) { | |
735 | case htons(ETH_P_IP): | |
8c1a90aa MV |
736 | inner_l4_hdr = inner_ip_hdr(skb)->protocol; |
737 | break; | |
76a540d4 | 738 | case htons(ETH_P_IPV6): |
8c1a90aa | 739 | inner_l4_hdr = inner_ipv6_hdr(skb)->nexthdr; |
76a540d4 AD |
740 | break; |
741 | default: | |
742 | return 0; | |
743 | } | |
744 | ||
8c1a90aa MV |
745 | switch (inner_l4_hdr) { |
746 | case IPPROTO_TCP: | |
747 | inner_l4_hlen = inner_tcp_hdrlen(skb); | |
748 | break; | |
749 | case IPPROTO_UDP: | |
750 | inner_l4_hlen = 8; | |
751 | break; | |
752 | default: | |
753 | return 0; | |
754 | } | |
755 | ||
756 | /* The hardware allows tunnel offloads only if the combined inner and | |
757 | * outer header is 184 bytes or less | |
758 | */ | |
759 | if (skb_inner_transport_header(skb) + inner_l4_hlen - | |
760 | skb_mac_header(skb) > FM10K_TUNNEL_HEADER_LENGTH) | |
761 | return 0; | |
762 | ||
76a540d4 AD |
763 | return eth_hdr->h_proto; |
764 | } | |
765 | ||
766 | static int fm10k_tso(struct fm10k_ring *tx_ring, | |
767 | struct fm10k_tx_buffer *first) | |
768 | { | |
769 | struct sk_buff *skb = first->skb; | |
770 | struct fm10k_tx_desc *tx_desc; | |
771 | unsigned char *th; | |
772 | u8 hdrlen; | |
773 | ||
774 | if (skb->ip_summed != CHECKSUM_PARTIAL) | |
775 | return 0; | |
776 | ||
777 | if (!skb_is_gso(skb)) | |
778 | return 0; | |
779 | ||
780 | /* compute header lengths */ | |
781 | if (skb->encapsulation) { | |
782 | if (!fm10k_tx_encap_offload(skb)) | |
783 | goto err_vxlan; | |
784 | th = skb_inner_transport_header(skb); | |
785 | } else { | |
786 | th = skb_transport_header(skb); | |
787 | } | |
788 | ||
789 | /* compute offset from SOF to transport header and add header len */ | |
790 | hdrlen = (th - skb->data) + (((struct tcphdr *)th)->doff << 2); | |
791 | ||
792 | first->tx_flags |= FM10K_TX_FLAGS_CSUM; | |
793 | ||
794 | /* update gso size and bytecount with header size */ | |
795 | first->gso_segs = skb_shinfo(skb)->gso_segs; | |
796 | first->bytecount += (first->gso_segs - 1) * hdrlen; | |
797 | ||
798 | /* populate Tx descriptor header size and mss */ | |
799 | tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use); | |
800 | tx_desc->hdrlen = hdrlen; | |
801 | tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); | |
802 | ||
803 | return 1; | |
804 | err_vxlan: | |
805 | tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL; | |
806 | if (!net_ratelimit()) | |
807 | netdev_err(tx_ring->netdev, | |
808 | "TSO requested for unsupported tunnel, disabling offload\n"); | |
809 | return -1; | |
810 | } | |
811 | ||
812 | static void fm10k_tx_csum(struct fm10k_ring *tx_ring, | |
813 | struct fm10k_tx_buffer *first) | |
814 | { | |
815 | struct sk_buff *skb = first->skb; | |
816 | struct fm10k_tx_desc *tx_desc; | |
817 | union { | |
818 | struct iphdr *ipv4; | |
819 | struct ipv6hdr *ipv6; | |
820 | u8 *raw; | |
821 | } network_hdr; | |
dc1b4c2b JK |
822 | u8 *transport_hdr; |
823 | __be16 frag_off; | |
76a540d4 AD |
824 | __be16 protocol; |
825 | u8 l4_hdr = 0; | |
826 | ||
827 | if (skb->ip_summed != CHECKSUM_PARTIAL) | |
828 | goto no_csum; | |
829 | ||
830 | if (skb->encapsulation) { | |
831 | protocol = fm10k_tx_encap_offload(skb); | |
832 | if (!protocol) { | |
833 | if (skb_checksum_help(skb)) { | |
834 | dev_warn(tx_ring->dev, | |
835 | "failed to offload encap csum!\n"); | |
836 | tx_ring->tx_stats.csum_err++; | |
837 | } | |
838 | goto no_csum; | |
839 | } | |
840 | network_hdr.raw = skb_inner_network_header(skb); | |
dc1b4c2b | 841 | transport_hdr = skb_inner_transport_header(skb); |
76a540d4 AD |
842 | } else { |
843 | protocol = vlan_get_protocol(skb); | |
844 | network_hdr.raw = skb_network_header(skb); | |
dc1b4c2b | 845 | transport_hdr = skb_transport_header(skb); |
76a540d4 AD |
846 | } |
847 | ||
848 | switch (protocol) { | |
849 | case htons(ETH_P_IP): | |
850 | l4_hdr = network_hdr.ipv4->protocol; | |
851 | break; | |
852 | case htons(ETH_P_IPV6): | |
853 | l4_hdr = network_hdr.ipv6->nexthdr; | |
dc1b4c2b JK |
854 | if (likely((transport_hdr - network_hdr.raw) == |
855 | sizeof(struct ipv6hdr))) | |
856 | break; | |
857 | ipv6_skip_exthdr(skb, network_hdr.raw - skb->data + | |
858 | sizeof(struct ipv6hdr), | |
859 | &l4_hdr, &frag_off); | |
860 | if (unlikely(frag_off)) | |
861 | l4_hdr = NEXTHDR_FRAGMENT; | |
76a540d4 AD |
862 | break; |
863 | default: | |
dc1b4c2b | 864 | break; |
76a540d4 AD |
865 | } |
866 | ||
867 | switch (l4_hdr) { | |
868 | case IPPROTO_TCP: | |
869 | case IPPROTO_UDP: | |
870 | break; | |
871 | case IPPROTO_GRE: | |
872 | if (skb->encapsulation) | |
873 | break; | |
874 | default: | |
875 | if (unlikely(net_ratelimit())) { | |
876 | dev_warn(tx_ring->dev, | |
dc1b4c2b JK |
877 | "partial checksum, version=%d l4 proto=%x\n", |
878 | protocol, l4_hdr); | |
76a540d4 | 879 | } |
dc1b4c2b | 880 | skb_checksum_help(skb); |
76a540d4 AD |
881 | tx_ring->tx_stats.csum_err++; |
882 | goto no_csum; | |
883 | } | |
884 | ||
885 | /* update TX checksum flag */ | |
886 | first->tx_flags |= FM10K_TX_FLAGS_CSUM; | |
80043f3b | 887 | tx_ring->tx_stats.csum_good++; |
76a540d4 AD |
888 | |
889 | no_csum: | |
890 | /* populate Tx descriptor header size and mss */ | |
891 | tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use); | |
892 | tx_desc->hdrlen = 0; | |
893 | tx_desc->mss = 0; | |
894 | } | |
895 | ||
896 | #define FM10K_SET_FLAG(_input, _flag, _result) \ | |
897 | ((_flag <= _result) ? \ | |
898 | ((u32)(_input & _flag) * (_result / _flag)) : \ | |
899 | ((u32)(_input & _flag) / (_flag / _result))) | |
900 | ||
901 | static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags) | |
902 | { | |
903 | /* set type for advanced descriptor with frame checksum insertion */ | |
904 | u32 desc_flags = 0; | |
905 | ||
906 | /* set checksum offload bits */ | |
907 | desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM, | |
908 | FM10K_TXD_FLAG_CSUM); | |
909 | ||
910 | return desc_flags; | |
911 | } | |
912 | ||
b101c962 AD |
913 | static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring, |
914 | struct fm10k_tx_desc *tx_desc, u16 i, | |
915 | dma_addr_t dma, unsigned int size, u8 desc_flags) | |
916 | { | |
917 | /* set RS and INT for last frame in a cache line */ | |
918 | if ((++i & (FM10K_TXD_WB_FIFO_SIZE - 1)) == 0) | |
919 | desc_flags |= FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_INT; | |
920 | ||
921 | /* record values to descriptor */ | |
922 | tx_desc->buffer_addr = cpu_to_le64(dma); | |
923 | tx_desc->flags = desc_flags; | |
924 | tx_desc->buflen = cpu_to_le16(size); | |
925 | ||
926 | /* return true if we just wrapped the ring */ | |
927 | return i == tx_ring->count; | |
928 | } | |
929 | ||
2c2b2f0c AD |
930 | static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size) |
931 | { | |
932 | netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); | |
933 | ||
eca32047 | 934 | /* Memory barrier before checking head and tail */ |
2c2b2f0c AD |
935 | smp_mb(); |
936 | ||
eca32047 | 937 | /* Check again in a case another CPU has just made room available */ |
2c2b2f0c AD |
938 | if (likely(fm10k_desc_unused(tx_ring) < size)) |
939 | return -EBUSY; | |
940 | ||
941 | /* A reprieve! - use start_queue because it doesn't call schedule */ | |
942 | netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); | |
943 | ++tx_ring->tx_stats.restart_queue; | |
944 | return 0; | |
945 | } | |
946 | ||
947 | static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size) | |
948 | { | |
949 | if (likely(fm10k_desc_unused(tx_ring) >= size)) | |
950 | return 0; | |
951 | return __fm10k_maybe_stop_tx(tx_ring, size); | |
952 | } | |
953 | ||
b101c962 AD |
954 | static void fm10k_tx_map(struct fm10k_ring *tx_ring, |
955 | struct fm10k_tx_buffer *first) | |
956 | { | |
957 | struct sk_buff *skb = first->skb; | |
958 | struct fm10k_tx_buffer *tx_buffer; | |
959 | struct fm10k_tx_desc *tx_desc; | |
960 | struct skb_frag_struct *frag; | |
961 | unsigned char *data; | |
962 | dma_addr_t dma; | |
963 | unsigned int data_len, size; | |
76a540d4 | 964 | u32 tx_flags = first->tx_flags; |
b101c962 | 965 | u16 i = tx_ring->next_to_use; |
76a540d4 | 966 | u8 flags = fm10k_tx_desc_flags(skb, tx_flags); |
b101c962 AD |
967 | |
968 | tx_desc = FM10K_TX_DESC(tx_ring, i); | |
969 | ||
970 | /* add HW VLAN tag */ | |
df8a39de JP |
971 | if (skb_vlan_tag_present(skb)) |
972 | tx_desc->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); | |
b101c962 AD |
973 | else |
974 | tx_desc->vlan = 0; | |
975 | ||
976 | size = skb_headlen(skb); | |
977 | data = skb->data; | |
978 | ||
979 | dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE); | |
980 | ||
981 | data_len = skb->data_len; | |
982 | tx_buffer = first; | |
983 | ||
984 | for (frag = &skb_shinfo(skb)->frags[0];; frag++) { | |
985 | if (dma_mapping_error(tx_ring->dev, dma)) | |
986 | goto dma_error; | |
987 | ||
988 | /* record length, and DMA address */ | |
989 | dma_unmap_len_set(tx_buffer, len, size); | |
990 | dma_unmap_addr_set(tx_buffer, dma, dma); | |
991 | ||
992 | while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) { | |
993 | if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, dma, | |
994 | FM10K_MAX_DATA_PER_TXD, flags)) { | |
995 | tx_desc = FM10K_TX_DESC(tx_ring, 0); | |
996 | i = 0; | |
997 | } | |
998 | ||
999 | dma += FM10K_MAX_DATA_PER_TXD; | |
1000 | size -= FM10K_MAX_DATA_PER_TXD; | |
1001 | } | |
1002 | ||
1003 | if (likely(!data_len)) | |
1004 | break; | |
1005 | ||
1006 | if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, | |
1007 | dma, size, flags)) { | |
1008 | tx_desc = FM10K_TX_DESC(tx_ring, 0); | |
1009 | i = 0; | |
1010 | } | |
1011 | ||
1012 | size = skb_frag_size(frag); | |
1013 | data_len -= size; | |
1014 | ||
1015 | dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, | |
1016 | DMA_TO_DEVICE); | |
1017 | ||
1018 | tx_buffer = &tx_ring->tx_buffer[i]; | |
1019 | } | |
1020 | ||
1021 | /* write last descriptor with LAST bit set */ | |
1022 | flags |= FM10K_TXD_FLAG_LAST; | |
1023 | ||
1024 | if (fm10k_tx_desc_push(tx_ring, tx_desc, i++, dma, size, flags)) | |
1025 | i = 0; | |
1026 | ||
1027 | /* record bytecount for BQL */ | |
1028 | netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); | |
1029 | ||
1030 | /* record SW timestamp if HW timestamp is not available */ | |
1031 | skb_tx_timestamp(first->skb); | |
1032 | ||
1033 | /* Force memory writes to complete before letting h/w know there | |
1034 | * are new descriptors to fetch. (Only applicable for weak-ordered | |
1035 | * memory model archs, such as IA-64). | |
1036 | * | |
1037 | * We also need this memory barrier to make certain all of the | |
1038 | * status bits have been updated before next_to_watch is written. | |
1039 | */ | |
1040 | wmb(); | |
1041 | ||
1042 | /* set next_to_watch value indicating a packet is present */ | |
1043 | first->next_to_watch = tx_desc; | |
1044 | ||
1045 | tx_ring->next_to_use = i; | |
1046 | ||
2c2b2f0c AD |
1047 | /* Make sure there is space in the ring for the next send. */ |
1048 | fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED); | |
1049 | ||
b101c962 | 1050 | /* notify HW of packet */ |
2c2b2f0c AD |
1051 | if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { |
1052 | writel(i, tx_ring->tail); | |
b101c962 | 1053 | |
2c2b2f0c AD |
1054 | /* we need this if more than one processor can write to our tail |
1055 | * at a time, it synchronizes IO on IA64/Altix systems | |
1056 | */ | |
1057 | mmiowb(); | |
1058 | } | |
b101c962 AD |
1059 | |
1060 | return; | |
1061 | dma_error: | |
1062 | dev_err(tx_ring->dev, "TX DMA map failed\n"); | |
1063 | ||
1064 | /* clear dma mappings for failed tx_buffer map */ | |
1065 | for (;;) { | |
1066 | tx_buffer = &tx_ring->tx_buffer[i]; | |
1067 | fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer); | |
1068 | if (tx_buffer == first) | |
1069 | break; | |
1070 | if (i == 0) | |
1071 | i = tx_ring->count; | |
1072 | i--; | |
1073 | } | |
1074 | ||
1075 | tx_ring->next_to_use = i; | |
1076 | } | |
1077 | ||
b101c962 AD |
1078 | netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, |
1079 | struct fm10k_ring *tx_ring) | |
1080 | { | |
03d13a51 | 1081 | u16 count = TXD_USE_COUNT(skb_headlen(skb)); |
b101c962 | 1082 | struct fm10k_tx_buffer *first; |
b101c962 | 1083 | unsigned short f; |
03d13a51 JK |
1084 | u32 tx_flags = 0; |
1085 | int tso; | |
b101c962 AD |
1086 | |
1087 | /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD, | |
1088 | * + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD, | |
1089 | * + 2 desc gap to keep tail from touching head | |
1090 | * otherwise try next time | |
1091 | */ | |
b101c962 AD |
1092 | for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) |
1093 | count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); | |
aae072e3 | 1094 | |
b101c962 AD |
1095 | if (fm10k_maybe_stop_tx(tx_ring, count + 3)) { |
1096 | tx_ring->tx_stats.tx_busy++; | |
1097 | return NETDEV_TX_BUSY; | |
1098 | } | |
1099 | ||
1100 | /* record the location of the first descriptor for this packet */ | |
1101 | first = &tx_ring->tx_buffer[tx_ring->next_to_use]; | |
1102 | first->skb = skb; | |
1103 | first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); | |
1104 | first->gso_segs = 1; | |
1105 | ||
1106 | /* record initial flags and protocol */ | |
1107 | first->tx_flags = tx_flags; | |
1108 | ||
76a540d4 AD |
1109 | tso = fm10k_tso(tx_ring, first); |
1110 | if (tso < 0) | |
1111 | goto out_drop; | |
1112 | else if (!tso) | |
1113 | fm10k_tx_csum(tx_ring, first); | |
1114 | ||
b101c962 AD |
1115 | fm10k_tx_map(tx_ring, first); |
1116 | ||
76a540d4 AD |
1117 | return NETDEV_TX_OK; |
1118 | ||
1119 | out_drop: | |
1120 | dev_kfree_skb_any(first->skb); | |
1121 | first->skb = NULL; | |
1122 | ||
b101c962 AD |
1123 | return NETDEV_TX_OK; |
1124 | } | |
1125 | ||
1126 | static u64 fm10k_get_tx_completed(struct fm10k_ring *ring) | |
1127 | { | |
1128 | return ring->stats.packets; | |
1129 | } | |
1130 | ||
5b9e4432 JK |
1131 | /** |
1132 | * fm10k_get_tx_pending - how many Tx descriptors not processed | |
1133 | * @ring: the ring structure | |
1134 | * @in_sw: is tx_pending being checked in SW or in HW? | |
1135 | */ | |
1136 | u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw) | |
b101c962 | 1137 | { |
34bad71c JK |
1138 | struct fm10k_intfc *interface = ring->q_vector->interface; |
1139 | struct fm10k_hw *hw = &interface->hw; | |
5b9e4432 | 1140 | u32 head, tail; |
34bad71c | 1141 | |
5b9e4432 JK |
1142 | if (likely(in_sw)) { |
1143 | head = ring->next_to_clean; | |
1144 | tail = ring->next_to_use; | |
1145 | } else { | |
1146 | head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx)); | |
1147 | tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx)); | |
1148 | } | |
b101c962 AD |
1149 | |
1150 | return ((head <= tail) ? tail : tail + ring->count) - head; | |
1151 | } | |
1152 | ||
1153 | bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring) | |
1154 | { | |
1155 | u32 tx_done = fm10k_get_tx_completed(tx_ring); | |
1156 | u32 tx_done_old = tx_ring->tx_stats.tx_done_old; | |
5b9e4432 | 1157 | u32 tx_pending = fm10k_get_tx_pending(tx_ring, true); |
b101c962 AD |
1158 | |
1159 | clear_check_for_tx_hang(tx_ring); | |
1160 | ||
1161 | /* Check for a hung queue, but be thorough. This verifies | |
1162 | * that a transmit has been completed since the previous | |
1163 | * check AND there is at least one packet pending. By | |
1164 | * requiring this to fail twice we avoid races with | |
1165 | * clearing the ARMED bit and conditions where we | |
1166 | * run the check_tx_hang logic with a transmit completion | |
1167 | * pending but without time to complete it yet. | |
1168 | */ | |
1169 | if (!tx_pending || (tx_done_old != tx_done)) { | |
1170 | /* update completed stats and continue */ | |
1171 | tx_ring->tx_stats.tx_done_old = tx_done; | |
1172 | /* reset the countdown */ | |
1173 | clear_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state); | |
1174 | ||
1175 | return false; | |
1176 | } | |
1177 | ||
1178 | /* make sure it is true for two checks in a row */ | |
1179 | return test_and_set_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state); | |
1180 | } | |
1181 | ||
1182 | /** | |
1183 | * fm10k_tx_timeout_reset - initiate reset due to Tx timeout | |
1184 | * @interface: driver private struct | |
1185 | **/ | |
1186 | void fm10k_tx_timeout_reset(struct fm10k_intfc *interface) | |
1187 | { | |
1188 | /* Do the reset outside of interrupt context */ | |
1189 | if (!test_bit(__FM10K_DOWN, &interface->state)) { | |
b101c962 AD |
1190 | interface->tx_timeout_count++; |
1191 | interface->flags |= FM10K_FLAG_RESET_REQUESTED; | |
1192 | fm10k_service_event_schedule(interface); | |
1193 | } | |
1194 | } | |
1195 | ||
1196 | /** | |
1197 | * fm10k_clean_tx_irq - Reclaim resources after transmit completes | |
1198 | * @q_vector: structure containing interrupt and ring information | |
1199 | * @tx_ring: tx ring to clean | |
144d8305 | 1200 | * @napi_budget: Used to determine if we are in netpoll |
b101c962 AD |
1201 | **/ |
1202 | static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector, | |
144d8305 | 1203 | struct fm10k_ring *tx_ring, int napi_budget) |
b101c962 AD |
1204 | { |
1205 | struct fm10k_intfc *interface = q_vector->interface; | |
1206 | struct fm10k_tx_buffer *tx_buffer; | |
1207 | struct fm10k_tx_desc *tx_desc; | |
1208 | unsigned int total_bytes = 0, total_packets = 0; | |
1209 | unsigned int budget = q_vector->tx.work_limit; | |
1210 | unsigned int i = tx_ring->next_to_clean; | |
1211 | ||
1212 | if (test_bit(__FM10K_DOWN, &interface->state)) | |
1213 | return true; | |
1214 | ||
1215 | tx_buffer = &tx_ring->tx_buffer[i]; | |
1216 | tx_desc = FM10K_TX_DESC(tx_ring, i); | |
1217 | i -= tx_ring->count; | |
1218 | ||
1219 | do { | |
1220 | struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch; | |
1221 | ||
1222 | /* if next_to_watch is not set then there is no work pending */ | |
1223 | if (!eop_desc) | |
1224 | break; | |
1225 | ||
1226 | /* prevent any other reads prior to eop_desc */ | |
1227 | read_barrier_depends(); | |
1228 | ||
1229 | /* if DD is not set pending work has not been completed */ | |
1230 | if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE)) | |
1231 | break; | |
1232 | ||
1233 | /* clear next_to_watch to prevent false hangs */ | |
1234 | tx_buffer->next_to_watch = NULL; | |
1235 | ||
1236 | /* update the statistics for this packet */ | |
1237 | total_bytes += tx_buffer->bytecount; | |
1238 | total_packets += tx_buffer->gso_segs; | |
1239 | ||
1240 | /* free the skb */ | |
144d8305 | 1241 | napi_consume_skb(tx_buffer->skb, napi_budget); |
b101c962 AD |
1242 | |
1243 | /* unmap skb header data */ | |
1244 | dma_unmap_single(tx_ring->dev, | |
1245 | dma_unmap_addr(tx_buffer, dma), | |
1246 | dma_unmap_len(tx_buffer, len), | |
1247 | DMA_TO_DEVICE); | |
1248 | ||
1249 | /* clear tx_buffer data */ | |
1250 | tx_buffer->skb = NULL; | |
1251 | dma_unmap_len_set(tx_buffer, len, 0); | |
1252 | ||
1253 | /* unmap remaining buffers */ | |
1254 | while (tx_desc != eop_desc) { | |
1255 | tx_buffer++; | |
1256 | tx_desc++; | |
1257 | i++; | |
1258 | if (unlikely(!i)) { | |
1259 | i -= tx_ring->count; | |
1260 | tx_buffer = tx_ring->tx_buffer; | |
1261 | tx_desc = FM10K_TX_DESC(tx_ring, 0); | |
1262 | } | |
1263 | ||
1264 | /* unmap any remaining paged data */ | |
1265 | if (dma_unmap_len(tx_buffer, len)) { | |
1266 | dma_unmap_page(tx_ring->dev, | |
1267 | dma_unmap_addr(tx_buffer, dma), | |
1268 | dma_unmap_len(tx_buffer, len), | |
1269 | DMA_TO_DEVICE); | |
1270 | dma_unmap_len_set(tx_buffer, len, 0); | |
1271 | } | |
1272 | } | |
1273 | ||
1274 | /* move us one more past the eop_desc for start of next pkt */ | |
1275 | tx_buffer++; | |
1276 | tx_desc++; | |
1277 | i++; | |
1278 | if (unlikely(!i)) { | |
1279 | i -= tx_ring->count; | |
1280 | tx_buffer = tx_ring->tx_buffer; | |
1281 | tx_desc = FM10K_TX_DESC(tx_ring, 0); | |
1282 | } | |
1283 | ||
1284 | /* issue prefetch for next Tx descriptor */ | |
1285 | prefetch(tx_desc); | |
1286 | ||
1287 | /* update budget accounting */ | |
1288 | budget--; | |
1289 | } while (likely(budget)); | |
1290 | ||
1291 | i += tx_ring->count; | |
1292 | tx_ring->next_to_clean = i; | |
1293 | u64_stats_update_begin(&tx_ring->syncp); | |
1294 | tx_ring->stats.bytes += total_bytes; | |
1295 | tx_ring->stats.packets += total_packets; | |
1296 | u64_stats_update_end(&tx_ring->syncp); | |
1297 | q_vector->tx.total_bytes += total_bytes; | |
1298 | q_vector->tx.total_packets += total_packets; | |
1299 | ||
1300 | if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) { | |
1301 | /* schedule immediate reset if we believe we hung */ | |
1302 | struct fm10k_hw *hw = &interface->hw; | |
1303 | ||
1304 | netif_err(interface, drv, tx_ring->netdev, | |
1305 | "Detected Tx Unit Hang\n" | |
1306 | " Tx Queue <%d>\n" | |
1307 | " TDH, TDT <%x>, <%x>\n" | |
1308 | " next_to_use <%x>\n" | |
1309 | " next_to_clean <%x>\n", | |
1310 | tx_ring->queue_index, | |
1311 | fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)), | |
1312 | fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)), | |
1313 | tx_ring->next_to_use, i); | |
1314 | ||
1315 | netif_stop_subqueue(tx_ring->netdev, | |
1316 | tx_ring->queue_index); | |
1317 | ||
1318 | netif_info(interface, probe, tx_ring->netdev, | |
1319 | "tx hang %d detected on queue %d, resetting interface\n", | |
1320 | interface->tx_timeout_count + 1, | |
1321 | tx_ring->queue_index); | |
1322 | ||
1323 | fm10k_tx_timeout_reset(interface); | |
1324 | ||
1325 | /* the netdev is about to reset, no point in enabling stuff */ | |
1326 | return true; | |
1327 | } | |
1328 | ||
1329 | /* notify netdev of completed buffers */ | |
1330 | netdev_tx_completed_queue(txring_txq(tx_ring), | |
1331 | total_packets, total_bytes); | |
1332 | ||
1333 | #define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2) | |
1334 | if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && | |
1335 | (fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { | |
1336 | /* Make sure that anybody stopping the queue after this | |
1337 | * sees the new next_to_clean. | |
1338 | */ | |
1339 | smp_mb(); | |
1340 | if (__netif_subqueue_stopped(tx_ring->netdev, | |
1341 | tx_ring->queue_index) && | |
1342 | !test_bit(__FM10K_DOWN, &interface->state)) { | |
1343 | netif_wake_subqueue(tx_ring->netdev, | |
1344 | tx_ring->queue_index); | |
1345 | ++tx_ring->tx_stats.restart_queue; | |
1346 | } | |
1347 | } | |
1348 | ||
1349 | return !!budget; | |
1350 | } | |
1351 | ||
18283cad AD |
1352 | /** |
1353 | * fm10k_update_itr - update the dynamic ITR value based on packet size | |
1354 | * | |
1355 | * Stores a new ITR value based on strictly on packet size. The | |
1356 | * divisors and thresholds used by this function were determined based | |
1357 | * on theoretical maximum wire speed and testing data, in order to | |
1358 | * minimize response time while increasing bulk throughput. | |
1359 | * | |
1360 | * @ring_container: Container for rings to have ITR updated | |
1361 | **/ | |
1362 | static void fm10k_update_itr(struct fm10k_ring_container *ring_container) | |
1363 | { | |
242722dd | 1364 | unsigned int avg_wire_size, packets, itr_round; |
18283cad AD |
1365 | |
1366 | /* Only update ITR if we are using adaptive setting */ | |
584373f5 | 1367 | if (!ITR_IS_ADAPTIVE(ring_container->itr)) |
18283cad AD |
1368 | goto clear_counts; |
1369 | ||
1370 | packets = ring_container->total_packets; | |
1371 | if (!packets) | |
1372 | goto clear_counts; | |
1373 | ||
1374 | avg_wire_size = ring_container->total_bytes / packets; | |
1375 | ||
242722dd JK |
1376 | /* The following is a crude approximation of: |
1377 | * wmem_default / (size + overhead) = desired_pkts_per_int | |
1378 | * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate | |
1379 | * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value | |
1380 | * | |
1381 | * Assuming wmem_default is 212992 and overhead is 640 bytes per | |
1382 | * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the | |
1383 | * formula down to | |
1384 | * | |
1385 | * (34 * (size + 24)) / (size + 640) = ITR | |
1386 | * | |
1387 | * We first do some math on the packet size and then finally bitshift | |
1388 | * by 8 after rounding up. We also have to account for PCIe link speed | |
1389 | * difference as ITR scales based on this. | |
1390 | */ | |
1391 | if (avg_wire_size <= 360) { | |
1392 | /* Start at 250K ints/sec and gradually drop to 77K ints/sec */ | |
1393 | avg_wire_size *= 8; | |
1394 | avg_wire_size += 376; | |
1395 | } else if (avg_wire_size <= 1152) { | |
1396 | /* 77K ints/sec to 45K ints/sec */ | |
1397 | avg_wire_size *= 3; | |
1398 | avg_wire_size += 2176; | |
1399 | } else if (avg_wire_size <= 1920) { | |
1400 | /* 45K ints/sec to 38K ints/sec */ | |
1401 | avg_wire_size += 4480; | |
1402 | } else { | |
1403 | /* plateau at a limit of 38K ints/sec */ | |
1404 | avg_wire_size = 6656; | |
1405 | } | |
18283cad | 1406 | |
242722dd JK |
1407 | /* Perform final bitshift for division after rounding up to ensure |
1408 | * that the calculation will never get below a 1. The bit shift | |
1409 | * accounts for changes in the ITR due to PCIe link speed. | |
1410 | */ | |
ce4dad2c | 1411 | itr_round = READ_ONCE(ring_container->itr_scale) + 8; |
fcdb0a99 | 1412 | avg_wire_size += BIT(itr_round) - 1; |
242722dd | 1413 | avg_wire_size >>= itr_round; |
18283cad AD |
1414 | |
1415 | /* write back value and retain adaptive flag */ | |
1416 | ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE; | |
1417 | ||
1418 | clear_counts: | |
1419 | ring_container->total_bytes = 0; | |
1420 | ring_container->total_packets = 0; | |
1421 | } | |
1422 | ||
1423 | static void fm10k_qv_enable(struct fm10k_q_vector *q_vector) | |
1424 | { | |
1425 | /* Enable auto-mask and clear the current mask */ | |
1426 | u32 itr = FM10K_ITR_ENABLE; | |
1427 | ||
1428 | /* Update Tx ITR */ | |
1429 | fm10k_update_itr(&q_vector->tx); | |
1430 | ||
1431 | /* Update Rx ITR */ | |
1432 | fm10k_update_itr(&q_vector->rx); | |
1433 | ||
1434 | /* Store Tx itr in timer slot 0 */ | |
1435 | itr |= (q_vector->tx.itr & FM10K_ITR_MAX); | |
1436 | ||
1437 | /* Shift Rx itr to timer slot 1 */ | |
1438 | itr |= (q_vector->rx.itr & FM10K_ITR_MAX) << FM10K_ITR_INTERVAL1_SHIFT; | |
1439 | ||
1440 | /* Write the final value to the ITR register */ | |
1441 | writel(itr, q_vector->itr); | |
1442 | } | |
1443 | ||
1444 | static int fm10k_poll(struct napi_struct *napi, int budget) | |
1445 | { | |
1446 | struct fm10k_q_vector *q_vector = | |
1447 | container_of(napi, struct fm10k_q_vector, napi); | |
b101c962 | 1448 | struct fm10k_ring *ring; |
32b3e08f | 1449 | int per_ring_budget, work_done = 0; |
b101c962 AD |
1450 | bool clean_complete = true; |
1451 | ||
144d8305 AD |
1452 | fm10k_for_each_ring(ring, q_vector->tx) { |
1453 | if (!fm10k_clean_tx_irq(q_vector, ring, budget)) | |
1454 | clean_complete = false; | |
1455 | } | |
b101c962 | 1456 | |
9f872986 AD |
1457 | /* Handle case where we are called by netpoll with a budget of 0 */ |
1458 | if (budget <= 0) | |
1459 | return budget; | |
1460 | ||
b101c962 AD |
1461 | /* attempt to distribute budget to each queue fairly, but don't |
1462 | * allow the budget to go below 1 because we'll exit polling | |
1463 | */ | |
1464 | if (q_vector->rx.count > 1) | |
a4fcad65 | 1465 | per_ring_budget = max(budget / q_vector->rx.count, 1); |
b101c962 AD |
1466 | else |
1467 | per_ring_budget = budget; | |
1468 | ||
32b3e08f JB |
1469 | fm10k_for_each_ring(ring, q_vector->rx) { |
1470 | int work = fm10k_clean_rx_irq(q_vector, ring, per_ring_budget); | |
1471 | ||
1472 | work_done += work; | |
144d8305 AD |
1473 | if (work >= per_ring_budget) |
1474 | clean_complete = false; | |
32b3e08f | 1475 | } |
b101c962 AD |
1476 | |
1477 | /* If all work not completed, return budget and keep polling */ | |
1478 | if (!clean_complete) | |
1479 | return budget; | |
18283cad AD |
1480 | |
1481 | /* all work done, exit the polling mode */ | |
32b3e08f | 1482 | napi_complete_done(napi, work_done); |
18283cad AD |
1483 | |
1484 | /* re-enable the q_vector */ | |
1485 | fm10k_qv_enable(q_vector); | |
1486 | ||
e5fbfb78 | 1487 | return min(work_done, budget - 1); |
18283cad AD |
1488 | } |
1489 | ||
aa3ac822 AD |
1490 | /** |
1491 | * fm10k_set_qos_queues: Allocate queues for a QOS-enabled device | |
1492 | * @interface: board private structure to initialize | |
1493 | * | |
1494 | * When QoS (Quality of Service) is enabled, allocate queues for | |
1495 | * each traffic class. If multiqueue isn't available,then abort QoS | |
1496 | * initialization. | |
1497 | * | |
1498 | * This function handles all combinations of Qos and RSS. | |
1499 | * | |
1500 | **/ | |
1501 | static bool fm10k_set_qos_queues(struct fm10k_intfc *interface) | |
1502 | { | |
1503 | struct net_device *dev = interface->netdev; | |
1504 | struct fm10k_ring_feature *f; | |
1505 | int rss_i, i; | |
1506 | int pcs; | |
1507 | ||
1508 | /* Map queue offset and counts onto allocated tx queues */ | |
1509 | pcs = netdev_get_num_tc(dev); | |
1510 | ||
1511 | if (pcs <= 1) | |
1512 | return false; | |
1513 | ||
1514 | /* set QoS mask and indices */ | |
1515 | f = &interface->ring_feature[RING_F_QOS]; | |
1516 | f->indices = pcs; | |
fcdb0a99 | 1517 | f->mask = BIT(fls(pcs - 1)) - 1; |
aa3ac822 AD |
1518 | |
1519 | /* determine the upper limit for our current DCB mode */ | |
1520 | rss_i = interface->hw.mac.max_queues / pcs; | |
fcdb0a99 | 1521 | rss_i = BIT(fls(rss_i) - 1); |
aa3ac822 AD |
1522 | |
1523 | /* set RSS mask and indices */ | |
1524 | f = &interface->ring_feature[RING_F_RSS]; | |
1525 | rss_i = min_t(u16, rss_i, f->limit); | |
1526 | f->indices = rss_i; | |
fcdb0a99 | 1527 | f->mask = BIT(fls(rss_i - 1)) - 1; |
aa3ac822 AD |
1528 | |
1529 | /* configure pause class to queue mapping */ | |
1530 | for (i = 0; i < pcs; i++) | |
1531 | netdev_set_tc_queue(dev, i, rss_i, rss_i * i); | |
1532 | ||
1533 | interface->num_rx_queues = rss_i * pcs; | |
1534 | interface->num_tx_queues = rss_i * pcs; | |
1535 | ||
1536 | return true; | |
1537 | } | |
1538 | ||
1539 | /** | |
1540 | * fm10k_set_rss_queues: Allocate queues for RSS | |
1541 | * @interface: board private structure to initialize | |
1542 | * | |
1543 | * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try | |
1544 | * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. | |
1545 | * | |
1546 | **/ | |
1547 | static bool fm10k_set_rss_queues(struct fm10k_intfc *interface) | |
1548 | { | |
1549 | struct fm10k_ring_feature *f; | |
1550 | u16 rss_i; | |
1551 | ||
1552 | f = &interface->ring_feature[RING_F_RSS]; | |
1553 | rss_i = min_t(u16, interface->hw.mac.max_queues, f->limit); | |
1554 | ||
1555 | /* record indices and power of 2 mask for RSS */ | |
1556 | f->indices = rss_i; | |
fcdb0a99 | 1557 | f->mask = BIT(fls(rss_i - 1)) - 1; |
aa3ac822 AD |
1558 | |
1559 | interface->num_rx_queues = rss_i; | |
1560 | interface->num_tx_queues = rss_i; | |
1561 | ||
1562 | return true; | |
1563 | } | |
1564 | ||
18283cad AD |
1565 | /** |
1566 | * fm10k_set_num_queues: Allocate queues for device, feature dependent | |
1567 | * @interface: board private structure to initialize | |
1568 | * | |
1569 | * This is the top level queue allocation routine. The order here is very | |
1570 | * important, starting with the "most" number of features turned on at once, | |
1571 | * and ending with the smallest set of features. This way large combinations | |
1572 | * can be allocated if they're turned on, and smaller combinations are the | |
1573 | * fallthrough conditions. | |
1574 | * | |
1575 | **/ | |
1576 | static void fm10k_set_num_queues(struct fm10k_intfc *interface) | |
1577 | { | |
b3525696 | 1578 | /* Attempt to setup QoS and RSS first */ |
aa3ac822 AD |
1579 | if (fm10k_set_qos_queues(interface)) |
1580 | return; | |
1581 | ||
b3525696 | 1582 | /* If we don't have QoS, just fallback to only RSS. */ |
aa3ac822 | 1583 | fm10k_set_rss_queues(interface); |
18283cad AD |
1584 | } |
1585 | ||
4be37c42 JK |
1586 | /** |
1587 | * fm10k_reset_num_queues - Reset the number of queues to zero | |
1588 | * @interface: board private structure | |
1589 | * | |
1590 | * This function should be called whenever we need to reset the number of | |
1591 | * queues after an error condition. | |
1592 | */ | |
1593 | static void fm10k_reset_num_queues(struct fm10k_intfc *interface) | |
1594 | { | |
1595 | interface->num_tx_queues = 0; | |
1596 | interface->num_rx_queues = 0; | |
1597 | interface->num_q_vectors = 0; | |
1598 | } | |
1599 | ||
18283cad AD |
1600 | /** |
1601 | * fm10k_alloc_q_vector - Allocate memory for a single interrupt vector | |
1602 | * @interface: board private structure to initialize | |
1603 | * @v_count: q_vectors allocated on interface, used for ring interleaving | |
1604 | * @v_idx: index of vector in interface struct | |
1605 | * @txr_count: total number of Tx rings to allocate | |
1606 | * @txr_idx: index of first Tx ring to allocate | |
1607 | * @rxr_count: total number of Rx rings to allocate | |
1608 | * @rxr_idx: index of first Rx ring to allocate | |
1609 | * | |
1610 | * We allocate one q_vector. If allocation fails we return -ENOMEM. | |
1611 | **/ | |
1612 | static int fm10k_alloc_q_vector(struct fm10k_intfc *interface, | |
1613 | unsigned int v_count, unsigned int v_idx, | |
1614 | unsigned int txr_count, unsigned int txr_idx, | |
1615 | unsigned int rxr_count, unsigned int rxr_idx) | |
1616 | { | |
1617 | struct fm10k_q_vector *q_vector; | |
e27ef599 | 1618 | struct fm10k_ring *ring; |
18283cad AD |
1619 | int ring_count, size; |
1620 | ||
1621 | ring_count = txr_count + rxr_count; | |
e27ef599 AD |
1622 | size = sizeof(struct fm10k_q_vector) + |
1623 | (sizeof(struct fm10k_ring) * ring_count); | |
18283cad AD |
1624 | |
1625 | /* allocate q_vector and rings */ | |
1626 | q_vector = kzalloc(size, GFP_KERNEL); | |
1627 | if (!q_vector) | |
1628 | return -ENOMEM; | |
1629 | ||
1630 | /* initialize NAPI */ | |
1631 | netif_napi_add(interface->netdev, &q_vector->napi, | |
1632 | fm10k_poll, NAPI_POLL_WEIGHT); | |
1633 | ||
1634 | /* tie q_vector and interface together */ | |
1635 | interface->q_vector[v_idx] = q_vector; | |
1636 | q_vector->interface = interface; | |
1637 | q_vector->v_idx = v_idx; | |
1638 | ||
e27ef599 AD |
1639 | /* initialize pointer to rings */ |
1640 | ring = q_vector->ring; | |
1641 | ||
18283cad | 1642 | /* save Tx ring container info */ |
e27ef599 AD |
1643 | q_vector->tx.ring = ring; |
1644 | q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK; | |
18283cad | 1645 | q_vector->tx.itr = interface->tx_itr; |
242722dd | 1646 | q_vector->tx.itr_scale = interface->hw.mac.itr_scale; |
18283cad AD |
1647 | q_vector->tx.count = txr_count; |
1648 | ||
e27ef599 AD |
1649 | while (txr_count) { |
1650 | /* assign generic ring traits */ | |
1651 | ring->dev = &interface->pdev->dev; | |
1652 | ring->netdev = interface->netdev; | |
1653 | ||
1654 | /* configure backlink on ring */ | |
1655 | ring->q_vector = q_vector; | |
1656 | ||
1657 | /* apply Tx specific ring traits */ | |
1658 | ring->count = interface->tx_ring_count; | |
1659 | ring->queue_index = txr_idx; | |
1660 | ||
1661 | /* assign ring to interface */ | |
1662 | interface->tx_ring[txr_idx] = ring; | |
1663 | ||
1664 | /* update count and index */ | |
1665 | txr_count--; | |
1666 | txr_idx += v_count; | |
1667 | ||
1668 | /* push pointer to next ring */ | |
1669 | ring++; | |
1670 | } | |
1671 | ||
18283cad | 1672 | /* save Rx ring container info */ |
e27ef599 | 1673 | q_vector->rx.ring = ring; |
18283cad | 1674 | q_vector->rx.itr = interface->rx_itr; |
242722dd | 1675 | q_vector->rx.itr_scale = interface->hw.mac.itr_scale; |
18283cad AD |
1676 | q_vector->rx.count = rxr_count; |
1677 | ||
e27ef599 AD |
1678 | while (rxr_count) { |
1679 | /* assign generic ring traits */ | |
1680 | ring->dev = &interface->pdev->dev; | |
1681 | ring->netdev = interface->netdev; | |
5cd5e2e9 | 1682 | rcu_assign_pointer(ring->l2_accel, interface->l2_accel); |
e27ef599 AD |
1683 | |
1684 | /* configure backlink on ring */ | |
1685 | ring->q_vector = q_vector; | |
1686 | ||
1687 | /* apply Rx specific ring traits */ | |
1688 | ring->count = interface->rx_ring_count; | |
1689 | ring->queue_index = rxr_idx; | |
1690 | ||
1691 | /* assign ring to interface */ | |
1692 | interface->rx_ring[rxr_idx] = ring; | |
1693 | ||
1694 | /* update count and index */ | |
1695 | rxr_count--; | |
1696 | rxr_idx += v_count; | |
1697 | ||
1698 | /* push pointer to next ring */ | |
1699 | ring++; | |
1700 | } | |
1701 | ||
7461fd91 AD |
1702 | fm10k_dbg_q_vector_init(q_vector); |
1703 | ||
18283cad AD |
1704 | return 0; |
1705 | } | |
1706 | ||
1707 | /** | |
1708 | * fm10k_free_q_vector - Free memory allocated for specific interrupt vector | |
1709 | * @interface: board private structure to initialize | |
1710 | * @v_idx: Index of vector to be freed | |
1711 | * | |
1712 | * This function frees the memory allocated to the q_vector. In addition if | |
1713 | * NAPI is enabled it will delete any references to the NAPI struct prior | |
1714 | * to freeing the q_vector. | |
1715 | **/ | |
1716 | static void fm10k_free_q_vector(struct fm10k_intfc *interface, int v_idx) | |
1717 | { | |
1718 | struct fm10k_q_vector *q_vector = interface->q_vector[v_idx]; | |
e27ef599 AD |
1719 | struct fm10k_ring *ring; |
1720 | ||
7461fd91 AD |
1721 | fm10k_dbg_q_vector_exit(q_vector); |
1722 | ||
e27ef599 AD |
1723 | fm10k_for_each_ring(ring, q_vector->tx) |
1724 | interface->tx_ring[ring->queue_index] = NULL; | |
1725 | ||
1726 | fm10k_for_each_ring(ring, q_vector->rx) | |
1727 | interface->rx_ring[ring->queue_index] = NULL; | |
18283cad AD |
1728 | |
1729 | interface->q_vector[v_idx] = NULL; | |
1730 | netif_napi_del(&q_vector->napi); | |
1731 | kfree_rcu(q_vector, rcu); | |
1732 | } | |
1733 | ||
1734 | /** | |
1735 | * fm10k_alloc_q_vectors - Allocate memory for interrupt vectors | |
1736 | * @interface: board private structure to initialize | |
1737 | * | |
1738 | * We allocate one q_vector per queue interrupt. If allocation fails we | |
1739 | * return -ENOMEM. | |
1740 | **/ | |
1741 | static int fm10k_alloc_q_vectors(struct fm10k_intfc *interface) | |
1742 | { | |
1743 | unsigned int q_vectors = interface->num_q_vectors; | |
1744 | unsigned int rxr_remaining = interface->num_rx_queues; | |
1745 | unsigned int txr_remaining = interface->num_tx_queues; | |
1746 | unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0; | |
1747 | int err; | |
1748 | ||
1749 | if (q_vectors >= (rxr_remaining + txr_remaining)) { | |
1750 | for (; rxr_remaining; v_idx++) { | |
1751 | err = fm10k_alloc_q_vector(interface, q_vectors, v_idx, | |
1752 | 0, 0, 1, rxr_idx); | |
1753 | if (err) | |
1754 | goto err_out; | |
1755 | ||
1756 | /* update counts and index */ | |
1757 | rxr_remaining--; | |
1758 | rxr_idx++; | |
1759 | } | |
1760 | } | |
1761 | ||
1762 | for (; v_idx < q_vectors; v_idx++) { | |
1763 | int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); | |
1764 | int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); | |
1765 | ||
1766 | err = fm10k_alloc_q_vector(interface, q_vectors, v_idx, | |
1767 | tqpv, txr_idx, | |
1768 | rqpv, rxr_idx); | |
1769 | ||
1770 | if (err) | |
1771 | goto err_out; | |
1772 | ||
1773 | /* update counts and index */ | |
1774 | rxr_remaining -= rqpv; | |
1775 | txr_remaining -= tqpv; | |
1776 | rxr_idx++; | |
1777 | txr_idx++; | |
1778 | } | |
1779 | ||
1780 | return 0; | |
1781 | ||
1782 | err_out: | |
4be37c42 | 1783 | fm10k_reset_num_queues(interface); |
18283cad AD |
1784 | |
1785 | while (v_idx--) | |
1786 | fm10k_free_q_vector(interface, v_idx); | |
1787 | ||
1788 | return -ENOMEM; | |
1789 | } | |
1790 | ||
1791 | /** | |
1792 | * fm10k_free_q_vectors - Free memory allocated for interrupt vectors | |
1793 | * @interface: board private structure to initialize | |
1794 | * | |
1795 | * This function frees the memory allocated to the q_vectors. In addition if | |
1796 | * NAPI is enabled it will delete any references to the NAPI struct prior | |
1797 | * to freeing the q_vector. | |
1798 | **/ | |
1799 | static void fm10k_free_q_vectors(struct fm10k_intfc *interface) | |
1800 | { | |
1801 | int v_idx = interface->num_q_vectors; | |
1802 | ||
4be37c42 | 1803 | fm10k_reset_num_queues(interface); |
18283cad AD |
1804 | |
1805 | while (v_idx--) | |
1806 | fm10k_free_q_vector(interface, v_idx); | |
1807 | } | |
1808 | ||
1809 | /** | |
1810 | * f10k_reset_msix_capability - reset MSI-X capability | |
1811 | * @interface: board private structure to initialize | |
1812 | * | |
1813 | * Reset the MSI-X capability back to its starting state | |
1814 | **/ | |
1815 | static void fm10k_reset_msix_capability(struct fm10k_intfc *interface) | |
1816 | { | |
1817 | pci_disable_msix(interface->pdev); | |
1818 | kfree(interface->msix_entries); | |
1819 | interface->msix_entries = NULL; | |
1820 | } | |
1821 | ||
1822 | /** | |
1823 | * f10k_init_msix_capability - configure MSI-X capability | |
1824 | * @interface: board private structure to initialize | |
1825 | * | |
1826 | * Attempt to configure the interrupts using the best available | |
1827 | * capabilities of the hardware and the kernel. | |
1828 | **/ | |
1829 | static int fm10k_init_msix_capability(struct fm10k_intfc *interface) | |
1830 | { | |
1831 | struct fm10k_hw *hw = &interface->hw; | |
1832 | int v_budget, vector; | |
1833 | ||
1834 | /* It's easy to be greedy for MSI-X vectors, but it really | |
1835 | * doesn't do us much good if we have a lot more vectors | |
1836 | * than CPU's. So let's be conservative and only ask for | |
1837 | * (roughly) the same number of vectors as there are CPU's. | |
1838 | * the default is to use pairs of vectors | |
1839 | */ | |
1840 | v_budget = max(interface->num_rx_queues, interface->num_tx_queues); | |
1841 | v_budget = min_t(u16, v_budget, num_online_cpus()); | |
1842 | ||
1843 | /* account for vectors not related to queues */ | |
1844 | v_budget += NON_Q_VECTORS(hw); | |
1845 | ||
1846 | /* At the same time, hardware can only support a maximum of | |
1847 | * hw.mac->max_msix_vectors vectors. With features | |
1848 | * such as RSS and VMDq, we can easily surpass the number of Rx and Tx | |
1849 | * descriptor queues supported by our device. Thus, we cap it off in | |
1850 | * those rare cases where the cpu count also exceeds our vector limit. | |
1851 | */ | |
1852 | v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors); | |
1853 | ||
1854 | /* A failure in MSI-X entry allocation is fatal. */ | |
1855 | interface->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), | |
1856 | GFP_KERNEL); | |
1857 | if (!interface->msix_entries) | |
1858 | return -ENOMEM; | |
1859 | ||
1860 | /* populate entry values */ | |
1861 | for (vector = 0; vector < v_budget; vector++) | |
1862 | interface->msix_entries[vector].entry = vector; | |
1863 | ||
1864 | /* Attempt to enable MSI-X with requested value */ | |
1865 | v_budget = pci_enable_msix_range(interface->pdev, | |
1866 | interface->msix_entries, | |
1867 | MIN_MSIX_COUNT(hw), | |
1868 | v_budget); | |
1869 | if (v_budget < 0) { | |
1870 | kfree(interface->msix_entries); | |
1871 | interface->msix_entries = NULL; | |
30e23b71 | 1872 | return v_budget; |
18283cad AD |
1873 | } |
1874 | ||
1875 | /* record the number of queues available for q_vectors */ | |
1876 | interface->num_q_vectors = v_budget - NON_Q_VECTORS(hw); | |
1877 | ||
1878 | return 0; | |
1879 | } | |
1880 | ||
aa3ac822 AD |
1881 | /** |
1882 | * fm10k_cache_ring_qos - Descriptor ring to register mapping for QoS | |
1883 | * @interface: Interface structure continaining rings and devices | |
1884 | * | |
1885 | * Cache the descriptor ring offsets for Qos | |
1886 | **/ | |
1887 | static bool fm10k_cache_ring_qos(struct fm10k_intfc *interface) | |
1888 | { | |
1889 | struct net_device *dev = interface->netdev; | |
1890 | int pc, offset, rss_i, i, q_idx; | |
1891 | u16 pc_stride = interface->ring_feature[RING_F_QOS].mask + 1; | |
1892 | u8 num_pcs = netdev_get_num_tc(dev); | |
1893 | ||
1894 | if (num_pcs <= 1) | |
1895 | return false; | |
1896 | ||
1897 | rss_i = interface->ring_feature[RING_F_RSS].indices; | |
1898 | ||
1899 | for (pc = 0, offset = 0; pc < num_pcs; pc++, offset += rss_i) { | |
1900 | q_idx = pc; | |
1901 | for (i = 0; i < rss_i; i++) { | |
1902 | interface->tx_ring[offset + i]->reg_idx = q_idx; | |
1903 | interface->tx_ring[offset + i]->qos_pc = pc; | |
1904 | interface->rx_ring[offset + i]->reg_idx = q_idx; | |
1905 | interface->rx_ring[offset + i]->qos_pc = pc; | |
1906 | q_idx += pc_stride; | |
1907 | } | |
1908 | } | |
1909 | ||
1910 | return true; | |
1911 | } | |
1912 | ||
1913 | /** | |
1914 | * fm10k_cache_ring_rss - Descriptor ring to register mapping for RSS | |
1915 | * @interface: Interface structure continaining rings and devices | |
1916 | * | |
1917 | * Cache the descriptor ring offsets for RSS | |
1918 | **/ | |
1919 | static void fm10k_cache_ring_rss(struct fm10k_intfc *interface) | |
1920 | { | |
1921 | int i; | |
1922 | ||
1923 | for (i = 0; i < interface->num_rx_queues; i++) | |
1924 | interface->rx_ring[i]->reg_idx = i; | |
1925 | ||
1926 | for (i = 0; i < interface->num_tx_queues; i++) | |
1927 | interface->tx_ring[i]->reg_idx = i; | |
1928 | } | |
1929 | ||
1930 | /** | |
1931 | * fm10k_assign_rings - Map rings to network devices | |
1932 | * @interface: Interface structure containing rings and devices | |
1933 | * | |
1934 | * This function is meant to go though and configure both the network | |
1935 | * devices so that they contain rings, and configure the rings so that | |
1936 | * they function with their network devices. | |
1937 | **/ | |
1938 | static void fm10k_assign_rings(struct fm10k_intfc *interface) | |
1939 | { | |
1940 | if (fm10k_cache_ring_qos(interface)) | |
1941 | return; | |
1942 | ||
1943 | fm10k_cache_ring_rss(interface); | |
1944 | } | |
1945 | ||
18283cad AD |
1946 | static void fm10k_init_reta(struct fm10k_intfc *interface) |
1947 | { | |
1948 | u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices; | |
540a5d85 | 1949 | u32 reta; |
18283cad | 1950 | |
1012014e KJ |
1951 | /* If the Rx flow indirection table has been configured manually, we |
1952 | * need to maintain it when possible. | |
1953 | */ | |
1954 | if (netif_is_rxfh_configured(interface->netdev)) { | |
18283cad AD |
1955 | for (i = FM10K_RETA_SIZE; i--;) { |
1956 | reta = interface->reta[i]; | |
1957 | if ((((reta << 24) >> 24) < rss_i) && | |
1958 | (((reta << 16) >> 24) < rss_i) && | |
1959 | (((reta << 8) >> 24) < rss_i) && | |
1960 | (((reta) >> 24) < rss_i)) | |
1961 | continue; | |
1012014e KJ |
1962 | |
1963 | /* this should never happen */ | |
1964 | dev_err(&interface->pdev->dev, | |
1965 | "RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n"); | |
18283cad AD |
1966 | goto repopulate_reta; |
1967 | } | |
1968 | ||
1969 | /* do nothing if all of the elements are in bounds */ | |
1970 | return; | |
1971 | } | |
1972 | ||
1973 | repopulate_reta: | |
540a5d85 | 1974 | fm10k_write_reta(interface, NULL); |
18283cad AD |
1975 | } |
1976 | ||
1977 | /** | |
1978 | * fm10k_init_queueing_scheme - Determine proper queueing scheme | |
1979 | * @interface: board private structure to initialize | |
1980 | * | |
1981 | * We determine which queueing scheme to use based on... | |
1982 | * - Hardware queue count (num_*_queues) | |
1983 | * - defined by miscellaneous hardware support/features (RSS, etc.) | |
1984 | **/ | |
1985 | int fm10k_init_queueing_scheme(struct fm10k_intfc *interface) | |
1986 | { | |
1987 | int err; | |
1988 | ||
1989 | /* Number of supported queues */ | |
1990 | fm10k_set_num_queues(interface); | |
1991 | ||
1992 | /* Configure MSI-X capability */ | |
1993 | err = fm10k_init_msix_capability(interface); | |
1994 | if (err) { | |
1995 | dev_err(&interface->pdev->dev, | |
1996 | "Unable to initialize MSI-X capability\n"); | |
4be37c42 | 1997 | goto err_init_msix; |
18283cad AD |
1998 | } |
1999 | ||
2000 | /* Allocate memory for queues */ | |
2001 | err = fm10k_alloc_q_vectors(interface); | |
587731e6 | 2002 | if (err) { |
4be37c42 JK |
2003 | dev_err(&interface->pdev->dev, |
2004 | "Unable to allocate queue vectors\n"); | |
2005 | goto err_alloc_q_vectors; | |
587731e6 | 2006 | } |
18283cad | 2007 | |
aa3ac822 AD |
2008 | /* Map rings to devices, and map devices to physical queues */ |
2009 | fm10k_assign_rings(interface); | |
2010 | ||
18283cad AD |
2011 | /* Initialize RSS redirection table */ |
2012 | fm10k_init_reta(interface); | |
2013 | ||
2014 | return 0; | |
4be37c42 JK |
2015 | |
2016 | err_alloc_q_vectors: | |
2017 | fm10k_reset_msix_capability(interface); | |
2018 | err_init_msix: | |
2019 | fm10k_reset_num_queues(interface); | |
2020 | return err; | |
18283cad AD |
2021 | } |
2022 | ||
2023 | /** | |
2024 | * fm10k_clear_queueing_scheme - Clear the current queueing scheme settings | |
2025 | * @interface: board private structure to clear queueing scheme on | |
2026 | * | |
2027 | * We go through and clear queueing specific resources and reset the structure | |
2028 | * to pre-load conditions | |
2029 | **/ | |
2030 | void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface) | |
2031 | { | |
2032 | fm10k_free_q_vectors(interface); | |
2033 | fm10k_reset_msix_capability(interface); | |
2034 | } |