via-rhine: hardware VLAN support
[deliverable/linux.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
5a0e3ad6 63#include <linux/slab.h>
0e3125c7 64#include <linux/vmalloc.h>
457c4cbc 65#include <net/net_namespace.h>
1da177e4
LT
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
a1f8e7f7 76#include <asm/cacheflush.h>
1da177e4
LT
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
905db440 83#include <linux/mutex.h>
05423b24 84#include <linux/if_vlan.h>
bfd5f4a3 85#include <linux/virtio_net.h>
ed85b565 86#include <linux/errqueue.h>
614f60fa 87#include <linux/net_tstamp.h>
1da177e4
LT
88
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
1da177e4
LT
93/*
94 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
b0e380b1
ACM
108 mac_header -> ll header
109 data -> data
1da177e4
LT
110
111Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
112 mac_header -> ll header
113 data -> ll header
1da177e4
LT
114
115Incoming, dev->hard_header==NULL
b0e380b1
ACM
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
db0c58f9 118 assymetry between rx and tx paths.
b0e380b1 119 data -> data
1da177e4
LT
120
121Outgoing, dev->hard_header==NULL
b0e380b1
ACM
122 mac_header -> data. ll header is still not built!
123 data -> data
1da177e4
LT
124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
b0e380b1
ACM
133 mac_header -> ll header
134 data -> ll header
1da177e4
LT
135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
137 mac_header -> data
138 data -> data
1da177e4
LT
139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
1da177e4
LT
144/* Private packet socket structures. */
145
40d4e3df 146struct packet_mclist {
1da177e4
LT
147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
0fb375fb
EB
152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
40d4e3df 157struct packet_mreq_max {
0fb375fb
EB
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 162};
a2efcfa0 163
69e3c75f
JB
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
0e3125c7
NH
167#define PGV_FROM_VMALLOC 1
168struct pgv {
169 char *buffer;
0e3125c7
NH
170};
171
69e3c75f 172struct packet_ring_buffer {
0e3125c7 173 struct pgv *pg_vec;
69e3c75f
JB
174 unsigned int head;
175 unsigned int frames_per_block;
176 unsigned int frame_size;
177 unsigned int frame_max;
178
179 unsigned int pg_vec_order;
180 unsigned int pg_vec_pages;
181 unsigned int pg_vec_len;
182
183 atomic_t pending;
184};
185
186struct packet_sock;
187static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
188
189static void packet_flush_mclist(struct sock *sk);
190
191struct packet_sock {
192 /* struct sock has to be the first member of packet_sock */
193 struct sock sk;
194 struct tpacket_stats stats;
69e3c75f
JB
195 struct packet_ring_buffer rx_ring;
196 struct packet_ring_buffer tx_ring;
1da177e4 197 int copy_thresh;
1da177e4 198 spinlock_t bind_lock;
905db440 199 struct mutex pg_vec_lock;
8dc41944 200 unsigned int running:1, /* prot_hook is attached*/
80feaacb 201 auxdata:1,
bfd5f4a3
SS
202 origdev:1,
203 has_vnet_hdr:1;
1da177e4 204 int ifindex; /* bound device */
0e11c91e 205 __be16 num;
1da177e4 206 struct packet_mclist *mclist;
1da177e4 207 atomic_t mapped;
bbd6ef87
PM
208 enum tpacket_versions tp_version;
209 unsigned int tp_hdrlen;
8913336a 210 unsigned int tp_reserve;
69e3c75f 211 unsigned int tp_loss:1;
614f60fa 212 unsigned int tp_tstamp;
94b05952 213 struct packet_type prot_hook ____cacheline_aligned_in_smp;
1da177e4
LT
214};
215
ffbc6111
HX
216struct packet_skb_cb {
217 unsigned int origlen;
218 union {
219 struct sockaddr_pkt pkt;
220 struct sockaddr_ll ll;
221 } sa;
222};
223
224#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 225
0af55bb5
CG
226static inline struct page *pgv_to_page(void *addr)
227{
228 if (is_vmalloc_addr(addr))
229 return vmalloc_to_page(addr);
230 return virt_to_page(addr);
231}
232
69e3c75f 233static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 234{
bbd6ef87
PM
235 union {
236 struct tpacket_hdr *h1;
237 struct tpacket2_hdr *h2;
238 void *raw;
239 } h;
1da177e4 240
69e3c75f 241 h.raw = frame;
bbd6ef87
PM
242 switch (po->tp_version) {
243 case TPACKET_V1:
69e3c75f 244 h.h1->tp_status = status;
0af55bb5 245 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
bbd6ef87
PM
246 break;
247 case TPACKET_V2:
69e3c75f 248 h.h2->tp_status = status;
0af55bb5 249 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
bbd6ef87 250 break;
69e3c75f 251 default:
40d4e3df 252 pr_err("TPACKET version not supported\n");
69e3c75f 253 BUG();
bbd6ef87 254 }
69e3c75f
JB
255
256 smp_wmb();
bbd6ef87
PM
257}
258
69e3c75f 259static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
260{
261 union {
262 struct tpacket_hdr *h1;
263 struct tpacket2_hdr *h2;
264 void *raw;
265 } h;
266
69e3c75f
JB
267 smp_rmb();
268
bbd6ef87
PM
269 h.raw = frame;
270 switch (po->tp_version) {
271 case TPACKET_V1:
0af55bb5 272 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
69e3c75f 273 return h.h1->tp_status;
bbd6ef87 274 case TPACKET_V2:
0af55bb5 275 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
69e3c75f
JB
276 return h.h2->tp_status;
277 default:
40d4e3df 278 pr_err("TPACKET version not supported\n");
69e3c75f
JB
279 BUG();
280 return 0;
bbd6ef87 281 }
1da177e4 282}
69e3c75f
JB
283
284static void *packet_lookup_frame(struct packet_sock *po,
285 struct packet_ring_buffer *rb,
286 unsigned int position,
287 int status)
288{
289 unsigned int pg_vec_pos, frame_offset;
290 union {
291 struct tpacket_hdr *h1;
292 struct tpacket2_hdr *h2;
293 void *raw;
294 } h;
295
296 pg_vec_pos = position / rb->frames_per_block;
297 frame_offset = position % rb->frames_per_block;
298
0e3125c7
NH
299 h.raw = rb->pg_vec[pg_vec_pos].buffer +
300 (frame_offset * rb->frame_size);
69e3c75f
JB
301
302 if (status != __packet_get_status(po, h.raw))
303 return NULL;
304
305 return h.raw;
306}
307
308static inline void *packet_current_frame(struct packet_sock *po,
309 struct packet_ring_buffer *rb,
310 int status)
311{
312 return packet_lookup_frame(po, rb, rb->head, status);
313}
314
315static inline void *packet_previous_frame(struct packet_sock *po,
316 struct packet_ring_buffer *rb,
317 int status)
318{
319 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
320 return packet_lookup_frame(po, rb, previous, status);
321}
322
323static inline void packet_increment_head(struct packet_ring_buffer *buff)
324{
325 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
326}
327
1da177e4
LT
328static inline struct packet_sock *pkt_sk(struct sock *sk)
329{
330 return (struct packet_sock *)sk;
331}
332
333static void packet_sock_destruct(struct sock *sk)
334{
ed85b565
RC
335 skb_queue_purge(&sk->sk_error_queue);
336
547b792c
IJ
337 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
338 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
339
340 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 341 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
342 return;
343 }
344
17ab56a2 345 sk_refcnt_debug_dec(sk);
1da177e4
LT
346}
347
348
90ddc4f0 349static const struct proto_ops packet_ops;
1da177e4 350
90ddc4f0 351static const struct proto_ops packet_ops_spkt;
1da177e4 352
40d4e3df
ED
353static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
354 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
355{
356 struct sock *sk;
357 struct sockaddr_pkt *spkt;
358
359 /*
360 * When we registered the protocol we saved the socket in the data
361 * field for just this event.
362 */
363
364 sk = pt->af_packet_priv;
1ce4f28b 365
1da177e4
LT
366 /*
367 * Yank back the headers [hope the device set this
368 * right or kerboom...]
369 *
370 * Incoming packets have ll header pulled,
371 * push it back.
372 *
98e399f8 373 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
374 * so that this procedure is noop.
375 */
376
377 if (skb->pkt_type == PACKET_LOOPBACK)
378 goto out;
379
09ad9bc7 380 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
381 goto out;
382
40d4e3df
ED
383 skb = skb_share_check(skb, GFP_ATOMIC);
384 if (skb == NULL)
1da177e4
LT
385 goto oom;
386
387 /* drop any routing info */
adf30907 388 skb_dst_drop(skb);
1da177e4 389
84531c24
PO
390 /* drop conntrack reference */
391 nf_reset(skb);
392
ffbc6111 393 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 394
98e399f8 395 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
396
397 /*
398 * The SOCK_PACKET socket receives _all_ frames.
399 */
400
401 spkt->spkt_family = dev->type;
402 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
403 spkt->spkt_protocol = skb->protocol;
404
405 /*
406 * Charge the memory to the socket. This is done specifically
407 * to prevent sockets using all the memory up.
408 */
409
40d4e3df 410 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
411 return 0;
412
413out:
414 kfree_skb(skb);
415oom:
416 return 0;
417}
418
419
420/*
421 * Output a raw packet to a device layer. This bypasses all the other
422 * protocol layers and you must therefore supply it with a complete frame
423 */
1ce4f28b 424
1da177e4
LT
425static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
426 struct msghdr *msg, size_t len)
427{
428 struct sock *sk = sock->sk;
40d4e3df 429 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1a35ca80 430 struct sk_buff *skb = NULL;
1da177e4 431 struct net_device *dev;
40d4e3df 432 __be16 proto = 0;
1da177e4 433 int err;
1ce4f28b 434
1da177e4 435 /*
1ce4f28b 436 * Get and verify the address.
1da177e4
LT
437 */
438
40d4e3df 439 if (saddr) {
1da177e4 440 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
441 return -EINVAL;
442 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
443 proto = saddr->spkt_protocol;
444 } else
445 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
446
447 /*
1ce4f28b 448 * Find the device first to size check it
1da177e4
LT
449 */
450
451 saddr->spkt_device[13] = 0;
1a35ca80 452retry:
654d1f8a
ED
453 rcu_read_lock();
454 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1da177e4
LT
455 err = -ENODEV;
456 if (dev == NULL)
457 goto out_unlock;
1ce4f28b 458
d5e76b0a
DM
459 err = -ENETDOWN;
460 if (!(dev->flags & IFF_UP))
461 goto out_unlock;
462
1da177e4 463 /*
40d4e3df
ED
464 * You may not queue a frame bigger than the mtu. This is the lowest level
465 * raw protocol and you must do your own fragmentation at this level.
1da177e4 466 */
1ce4f28b 467
1da177e4 468 err = -EMSGSIZE;
8ae55f04 469 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
470 goto out_unlock;
471
1a35ca80
ED
472 if (!skb) {
473 size_t reserved = LL_RESERVED_SPACE(dev);
474 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
475
476 rcu_read_unlock();
477 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
478 if (skb == NULL)
479 return -ENOBUFS;
480 /* FIXME: Save some space for broken drivers that write a hard
481 * header at transmission time by themselves. PPP is the notable
482 * one here. This should really be fixed at the driver level.
483 */
484 skb_reserve(skb, reserved);
485 skb_reset_network_header(skb);
486
487 /* Try to align data part correctly */
488 if (hhlen) {
489 skb->data -= hhlen;
490 skb->tail -= hhlen;
491 if (len < hhlen)
492 skb_reset_network_header(skb);
493 }
494 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
495 if (err)
496 goto out_free;
497 goto retry;
1da177e4
LT
498 }
499
1a35ca80 500
1da177e4
LT
501 skb->protocol = proto;
502 skb->dev = dev;
503 skb->priority = sk->sk_priority;
2d37a186 504 skb->mark = sk->sk_mark;
2244d07b 505 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
506 if (err < 0)
507 goto out_unlock;
1da177e4
LT
508
509 dev_queue_xmit(skb);
654d1f8a 510 rcu_read_unlock();
40d4e3df 511 return len;
1da177e4 512
1da177e4 513out_unlock:
654d1f8a 514 rcu_read_unlock();
1a35ca80
ED
515out_free:
516 kfree_skb(skb);
1da177e4
LT
517 return err;
518}
1da177e4 519
dbcb5855
DM
520static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
521 unsigned int res)
1da177e4
LT
522{
523 struct sk_filter *filter;
fda9ef5d
DM
524
525 rcu_read_lock_bh();
a898def2 526 filter = rcu_dereference_bh(sk->sk_filter);
dbcb5855 527 if (filter != NULL)
93aaae2e 528 res = sk_run_filter(skb, filter->insns);
fda9ef5d 529 rcu_read_unlock_bh();
1da177e4 530
dbcb5855 531 return res;
1da177e4
LT
532}
533
534/*
535 This function makes lazy skb cloning in hope that most of packets
536 are discarded by BPF.
537
538 Note tricky part: we DO mangle shared skb! skb->data, skb->len
539 and skb->cb are mangled. It works because (and until) packets
540 falling here are owned by current CPU. Output packets are cloned
541 by dev_queue_xmit_nit(), input packets are processed by net_bh
542 sequencially, so that if we return skb to original state on exit,
543 we will not harm anyone.
544 */
545
40d4e3df
ED
546static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
547 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
548{
549 struct sock *sk;
550 struct sockaddr_ll *sll;
551 struct packet_sock *po;
40d4e3df 552 u8 *skb_head = skb->data;
1da177e4 553 int skb_len = skb->len;
dbcb5855 554 unsigned int snaplen, res;
1da177e4
LT
555
556 if (skb->pkt_type == PACKET_LOOPBACK)
557 goto drop;
558
559 sk = pt->af_packet_priv;
560 po = pkt_sk(sk);
561
09ad9bc7 562 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
563 goto drop;
564
1da177e4
LT
565 skb->dev = dev;
566
3b04ddde 567 if (dev->header_ops) {
1da177e4
LT
568 /* The device has an explicit notion of ll header,
569 exported to higher levels.
570
571 Otherwise, the device hides datails of it frame
572 structure, so that corresponding packet head
573 never delivered to user.
574 */
575 if (sk->sk_type != SOCK_DGRAM)
98e399f8 576 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
577 else if (skb->pkt_type == PACKET_OUTGOING) {
578 /* Special case: outgoing packets have ll header at head */
bbe735e4 579 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
580 }
581 }
582
583 snaplen = skb->len;
584
dbcb5855
DM
585 res = run_filter(skb, sk, snaplen);
586 if (!res)
fda9ef5d 587 goto drop_n_restore;
dbcb5855
DM
588 if (snaplen > res)
589 snaplen = res;
1da177e4
LT
590
591 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
592 (unsigned)sk->sk_rcvbuf)
593 goto drop_n_acct;
594
595 if (skb_shared(skb)) {
596 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
597 if (nskb == NULL)
598 goto drop_n_acct;
599
600 if (skb_head != skb->data) {
601 skb->data = skb_head;
602 skb->len = skb_len;
603 }
604 kfree_skb(skb);
605 skb = nskb;
606 }
607
ffbc6111
HX
608 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
609 sizeof(skb->cb));
610
611 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
612 sll->sll_family = AF_PACKET;
613 sll->sll_hatype = dev->type;
614 sll->sll_protocol = skb->protocol;
615 sll->sll_pkttype = skb->pkt_type;
8032b464 616 if (unlikely(po->origdev))
80feaacb
PWJ
617 sll->sll_ifindex = orig_dev->ifindex;
618 else
619 sll->sll_ifindex = dev->ifindex;
1da177e4 620
b95cce35 621 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 622
ffbc6111 623 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 624
1da177e4
LT
625 if (pskb_trim(skb, snaplen))
626 goto drop_n_acct;
627
628 skb_set_owner_r(skb, sk);
629 skb->dev = NULL;
adf30907 630 skb_dst_drop(skb);
1da177e4 631
84531c24
PO
632 /* drop conntrack reference */
633 nf_reset(skb);
634
1da177e4
LT
635 spin_lock(&sk->sk_receive_queue.lock);
636 po->stats.tp_packets++;
3b885787 637 skb->dropcount = atomic_read(&sk->sk_drops);
1da177e4
LT
638 __skb_queue_tail(&sk->sk_receive_queue, skb);
639 spin_unlock(&sk->sk_receive_queue.lock);
640 sk->sk_data_ready(sk, skb->len);
641 return 0;
642
643drop_n_acct:
3b885787 644 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
1da177e4
LT
645
646drop_n_restore:
647 if (skb_head != skb->data && skb_shared(skb)) {
648 skb->data = skb_head;
649 skb->len = skb_len;
650 }
651drop:
ead2ceb0 652 consume_skb(skb);
1da177e4
LT
653 return 0;
654}
655
40d4e3df
ED
656static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
657 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
658{
659 struct sock *sk;
660 struct packet_sock *po;
661 struct sockaddr_ll *sll;
bbd6ef87
PM
662 union {
663 struct tpacket_hdr *h1;
664 struct tpacket2_hdr *h2;
665 void *raw;
666 } h;
40d4e3df 667 u8 *skb_head = skb->data;
1da177e4 668 int skb_len = skb->len;
dbcb5855 669 unsigned int snaplen, res;
1da177e4 670 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 671 unsigned short macoff, netoff, hdrlen;
1da177e4 672 struct sk_buff *copy_skb = NULL;
b7aa0bf7 673 struct timeval tv;
bbd6ef87 674 struct timespec ts;
614f60fa 675 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1da177e4
LT
676
677 if (skb->pkt_type == PACKET_LOOPBACK)
678 goto drop;
679
680 sk = pt->af_packet_priv;
681 po = pkt_sk(sk);
682
09ad9bc7 683 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
684 goto drop;
685
3b04ddde 686 if (dev->header_ops) {
1da177e4 687 if (sk->sk_type != SOCK_DGRAM)
98e399f8 688 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
689 else if (skb->pkt_type == PACKET_OUTGOING) {
690 /* Special case: outgoing packets have ll header at head */
bbe735e4 691 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
692 }
693 }
694
8dc41944
HX
695 if (skb->ip_summed == CHECKSUM_PARTIAL)
696 status |= TP_STATUS_CSUMNOTREADY;
697
1da177e4
LT
698 snaplen = skb->len;
699
dbcb5855
DM
700 res = run_filter(skb, sk, snaplen);
701 if (!res)
fda9ef5d 702 goto drop_n_restore;
dbcb5855
DM
703 if (snaplen > res)
704 snaplen = res;
1da177e4
LT
705
706 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
707 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
708 po->tp_reserve;
1da177e4 709 } else {
bbe735e4 710 unsigned maclen = skb_network_offset(skb);
bbd6ef87 711 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
712 (maclen < 16 ? 16 : maclen)) +
713 po->tp_reserve;
1da177e4
LT
714 macoff = netoff - maclen;
715 }
716
69e3c75f 717 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
718 if (po->copy_thresh &&
719 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
720 (unsigned)sk->sk_rcvbuf) {
721 if (skb_shared(skb)) {
722 copy_skb = skb_clone(skb, GFP_ATOMIC);
723 } else {
724 copy_skb = skb_get(skb);
725 skb_head = skb->data;
726 }
727 if (copy_skb)
728 skb_set_owner_r(copy_skb, sk);
729 }
69e3c75f 730 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
731 if ((int)snaplen < 0)
732 snaplen = 0;
733 }
1da177e4
LT
734
735 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 736 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 737 if (!h.raw)
1da177e4 738 goto ring_is_full;
69e3c75f 739 packet_increment_head(&po->rx_ring);
1da177e4
LT
740 po->stats.tp_packets++;
741 if (copy_skb) {
742 status |= TP_STATUS_COPY;
743 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
744 }
745 if (!po->stats.tp_drops)
746 status &= ~TP_STATUS_LOSING;
747 spin_unlock(&sk->sk_receive_queue.lock);
748
bbd6ef87 749 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 750
bbd6ef87
PM
751 switch (po->tp_version) {
752 case TPACKET_V1:
753 h.h1->tp_len = skb->len;
754 h.h1->tp_snaplen = snaplen;
755 h.h1->tp_mac = macoff;
756 h.h1->tp_net = netoff;
614f60fa
SM
757 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
758 && shhwtstamps->syststamp.tv64)
759 tv = ktime_to_timeval(shhwtstamps->syststamp);
760 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
761 && shhwtstamps->hwtstamp.tv64)
762 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
763 else if (skb->tstamp.tv64)
bbd6ef87
PM
764 tv = ktime_to_timeval(skb->tstamp);
765 else
766 do_gettimeofday(&tv);
767 h.h1->tp_sec = tv.tv_sec;
768 h.h1->tp_usec = tv.tv_usec;
769 hdrlen = sizeof(*h.h1);
770 break;
771 case TPACKET_V2:
772 h.h2->tp_len = skb->len;
773 h.h2->tp_snaplen = snaplen;
774 h.h2->tp_mac = macoff;
775 h.h2->tp_net = netoff;
614f60fa
SM
776 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
777 && shhwtstamps->syststamp.tv64)
778 ts = ktime_to_timespec(shhwtstamps->syststamp);
779 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
780 && shhwtstamps->hwtstamp.tv64)
781 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
782 else if (skb->tstamp.tv64)
bbd6ef87
PM
783 ts = ktime_to_timespec(skb->tstamp);
784 else
785 getnstimeofday(&ts);
786 h.h2->tp_sec = ts.tv_sec;
787 h.h2->tp_nsec = ts.tv_nsec;
05423b24 788 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
bbd6ef87
PM
789 hdrlen = sizeof(*h.h2);
790 break;
791 default:
792 BUG();
793 }
1da177e4 794
bbd6ef87 795 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 796 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
797 sll->sll_family = AF_PACKET;
798 sll->sll_hatype = dev->type;
799 sll->sll_protocol = skb->protocol;
800 sll->sll_pkttype = skb->pkt_type;
8032b464 801 if (unlikely(po->origdev))
80feaacb
PWJ
802 sll->sll_ifindex = orig_dev->ifindex;
803 else
804 sll->sll_ifindex = dev->ifindex;
1da177e4 805
bbd6ef87 806 __packet_set_status(po, h.raw, status);
e16aa207 807 smp_mb();
1da177e4 808 {
0af55bb5
CG
809 u8 *start, *end;
810
811 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
812 for (start = h.raw; start < end; start += PAGE_SIZE)
813 flush_dcache_page(pgv_to_page(start));
1da177e4
LT
814 }
815
816 sk->sk_data_ready(sk, 0);
817
818drop_n_restore:
819 if (skb_head != skb->data && skb_shared(skb)) {
820 skb->data = skb_head;
821 skb->len = skb_len;
822 }
823drop:
1ce4f28b 824 kfree_skb(skb);
1da177e4
LT
825 return 0;
826
827ring_is_full:
828 po->stats.tp_drops++;
829 spin_unlock(&sk->sk_receive_queue.lock);
830
831 sk->sk_data_ready(sk, 0);
acb5d75b 832 kfree_skb(copy_skb);
1da177e4
LT
833 goto drop_n_restore;
834}
835
69e3c75f
JB
836static void tpacket_destruct_skb(struct sk_buff *skb)
837{
838 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 839 void *ph;
1da177e4 840
69e3c75f 841 BUG_ON(skb == NULL);
1da177e4 842
69e3c75f
JB
843 if (likely(po->tx_ring.pg_vec)) {
844 ph = skb_shinfo(skb)->destructor_arg;
845 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
846 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
847 atomic_dec(&po->tx_ring.pending);
848 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
849 }
850
851 sock_wfree(skb);
852}
853
40d4e3df
ED
854static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
855 void *frame, struct net_device *dev, int size_max,
856 __be16 proto, unsigned char *addr)
69e3c75f
JB
857{
858 union {
859 struct tpacket_hdr *h1;
860 struct tpacket2_hdr *h2;
861 void *raw;
862 } ph;
863 int to_write, offset, len, tp_len, nr_frags, len_max;
864 struct socket *sock = po->sk.sk_socket;
865 struct page *page;
866 void *data;
867 int err;
868
869 ph.raw = frame;
870
871 skb->protocol = proto;
872 skb->dev = dev;
873 skb->priority = po->sk.sk_priority;
2d37a186 874 skb->mark = po->sk.sk_mark;
69e3c75f
JB
875 skb_shinfo(skb)->destructor_arg = ph.raw;
876
877 switch (po->tp_version) {
878 case TPACKET_V2:
879 tp_len = ph.h2->tp_len;
880 break;
881 default:
882 tp_len = ph.h1->tp_len;
883 break;
884 }
885 if (unlikely(tp_len > size_max)) {
40d4e3df 886 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
887 return -EMSGSIZE;
888 }
889
890 skb_reserve(skb, LL_RESERVED_SPACE(dev));
891 skb_reset_network_header(skb);
892
893 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
894 to_write = tp_len;
895
896 if (sock->type == SOCK_DGRAM) {
897 err = dev_hard_header(skb, dev, ntohs(proto), addr,
898 NULL, tp_len);
899 if (unlikely(err < 0))
900 return -EINVAL;
40d4e3df 901 } else if (dev->hard_header_len) {
69e3c75f
JB
902 /* net device doesn't like empty head */
903 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
904 pr_err("packet size is too short (%d < %d)\n",
905 tp_len, dev->hard_header_len);
69e3c75f
JB
906 return -EINVAL;
907 }
908
909 skb_push(skb, dev->hard_header_len);
910 err = skb_store_bits(skb, 0, data,
911 dev->hard_header_len);
912 if (unlikely(err))
913 return err;
914
915 data += dev->hard_header_len;
916 to_write -= dev->hard_header_len;
917 }
918
919 err = -EFAULT;
69e3c75f
JB
920 offset = offset_in_page(data);
921 len_max = PAGE_SIZE - offset;
922 len = ((to_write > len_max) ? len_max : to_write);
923
924 skb->data_len = to_write;
925 skb->len += to_write;
926 skb->truesize += to_write;
927 atomic_add(to_write, &po->sk.sk_wmem_alloc);
928
929 while (likely(to_write)) {
930 nr_frags = skb_shinfo(skb)->nr_frags;
931
932 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
933 pr_err("Packet exceed the number of skb frags(%lu)\n",
934 MAX_SKB_FRAGS);
69e3c75f
JB
935 return -EFAULT;
936 }
937
0af55bb5
CG
938 page = pgv_to_page(data);
939 data += len;
69e3c75f
JB
940 flush_dcache_page(page);
941 get_page(page);
0af55bb5 942 skb_fill_page_desc(skb, nr_frags, page, offset, len);
69e3c75f
JB
943 to_write -= len;
944 offset = 0;
945 len_max = PAGE_SIZE;
946 len = ((to_write > len_max) ? len_max : to_write);
947 }
948
949 return tp_len;
950}
951
952static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
953{
954 struct socket *sock;
955 struct sk_buff *skb;
956 struct net_device *dev;
957 __be16 proto;
958 int ifindex, err, reserve = 0;
40d4e3df
ED
959 void *ph;
960 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
961 int tp_len, size_max;
962 unsigned char *addr;
963 int len_sum = 0;
964 int status = 0;
965
966 sock = po->sk.sk_socket;
967
968 mutex_lock(&po->pg_vec_lock);
969
970 err = -EBUSY;
971 if (saddr == NULL) {
972 ifindex = po->ifindex;
973 proto = po->num;
974 addr = NULL;
975 } else {
976 err = -EINVAL;
977 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
978 goto out;
979 if (msg->msg_namelen < (saddr->sll_halen
980 + offsetof(struct sockaddr_ll,
981 sll_addr)))
982 goto out;
983 ifindex = saddr->sll_ifindex;
984 proto = saddr->sll_protocol;
985 addr = saddr->sll_addr;
986 }
987
988 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
989 err = -ENXIO;
990 if (unlikely(dev == NULL))
991 goto out;
992
993 reserve = dev->hard_header_len;
994
995 err = -ENETDOWN;
996 if (unlikely(!(dev->flags & IFF_UP)))
997 goto out_put;
998
999 size_max = po->tx_ring.frame_size
b5dd884e 1000 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
69e3c75f
JB
1001
1002 if (size_max > dev->mtu + reserve)
1003 size_max = dev->mtu + reserve;
1004
1005 do {
1006 ph = packet_current_frame(po, &po->tx_ring,
1007 TP_STATUS_SEND_REQUEST);
1008
1009 if (unlikely(ph == NULL)) {
1010 schedule();
1011 continue;
1012 }
1013
1014 status = TP_STATUS_SEND_REQUEST;
1015 skb = sock_alloc_send_skb(&po->sk,
1016 LL_ALLOCATED_SPACE(dev)
1017 + sizeof(struct sockaddr_ll),
1018 0, &err);
1019
1020 if (unlikely(skb == NULL))
1021 goto out_status;
1022
1023 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1024 addr);
1025
1026 if (unlikely(tp_len < 0)) {
1027 if (po->tp_loss) {
1028 __packet_set_status(po, ph,
1029 TP_STATUS_AVAILABLE);
1030 packet_increment_head(&po->tx_ring);
1031 kfree_skb(skb);
1032 continue;
1033 } else {
1034 status = TP_STATUS_WRONG_FORMAT;
1035 err = tp_len;
1036 goto out_status;
1037 }
1038 }
1039
1040 skb->destructor = tpacket_destruct_skb;
1041 __packet_set_status(po, ph, TP_STATUS_SENDING);
1042 atomic_inc(&po->tx_ring.pending);
1043
1044 status = TP_STATUS_SEND_REQUEST;
1045 err = dev_queue_xmit(skb);
eb70df13
JP
1046 if (unlikely(err > 0)) {
1047 err = net_xmit_errno(err);
1048 if (err && __packet_get_status(po, ph) ==
1049 TP_STATUS_AVAILABLE) {
1050 /* skb was destructed already */
1051 skb = NULL;
1052 goto out_status;
1053 }
1054 /*
1055 * skb was dropped but not destructed yet;
1056 * let's treat it like congestion or err < 0
1057 */
1058 err = 0;
1059 }
69e3c75f
JB
1060 packet_increment_head(&po->tx_ring);
1061 len_sum += tp_len;
f64f9e71
JP
1062 } while (likely((ph != NULL) ||
1063 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1064 (atomic_read(&po->tx_ring.pending))))
1065 );
69e3c75f
JB
1066
1067 err = len_sum;
1068 goto out_put;
1069
69e3c75f
JB
1070out_status:
1071 __packet_set_status(po, ph, status);
1072 kfree_skb(skb);
1073out_put:
1074 dev_put(dev);
1075out:
1076 mutex_unlock(&po->pg_vec_lock);
1077 return err;
1078}
69e3c75f 1079
bfd5f4a3
SS
1080static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1081 size_t reserve, size_t len,
1082 size_t linear, int noblock,
1083 int *err)
1084{
1085 struct sk_buff *skb;
1086
1087 /* Under a page? Don't bother with paged skb. */
1088 if (prepad + len < PAGE_SIZE || !linear)
1089 linear = len;
1090
1091 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1092 err);
1093 if (!skb)
1094 return NULL;
1095
1096 skb_reserve(skb, reserve);
1097 skb_put(skb, linear);
1098 skb->data_len = len - linear;
1099 skb->len += len - linear;
1100
1101 return skb;
1102}
1103
69e3c75f 1104static int packet_snd(struct socket *sock,
1da177e4
LT
1105 struct msghdr *msg, size_t len)
1106{
1107 struct sock *sk = sock->sk;
40d4e3df 1108 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1109 struct sk_buff *skb;
1110 struct net_device *dev;
0e11c91e 1111 __be16 proto;
1da177e4
LT
1112 unsigned char *addr;
1113 int ifindex, err, reserve = 0;
bfd5f4a3
SS
1114 struct virtio_net_hdr vnet_hdr = { 0 };
1115 int offset = 0;
1116 int vnet_hdr_len;
1117 struct packet_sock *po = pkt_sk(sk);
1118 unsigned short gso_type = 0;
1da177e4
LT
1119
1120 /*
1ce4f28b 1121 * Get and verify the address.
1da177e4 1122 */
1ce4f28b 1123
1da177e4 1124 if (saddr == NULL) {
1da177e4
LT
1125 ifindex = po->ifindex;
1126 proto = po->num;
1127 addr = NULL;
1128 } else {
1129 err = -EINVAL;
1130 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1131 goto out;
0fb375fb
EB
1132 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1133 goto out;
1da177e4
LT
1134 ifindex = saddr->sll_ifindex;
1135 proto = saddr->sll_protocol;
1136 addr = saddr->sll_addr;
1137 }
1138
1139
3b1e0a65 1140 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1141 err = -ENXIO;
1142 if (dev == NULL)
1143 goto out_unlock;
1144 if (sock->type == SOCK_RAW)
1145 reserve = dev->hard_header_len;
1146
d5e76b0a
DM
1147 err = -ENETDOWN;
1148 if (!(dev->flags & IFF_UP))
1149 goto out_unlock;
1150
bfd5f4a3
SS
1151 if (po->has_vnet_hdr) {
1152 vnet_hdr_len = sizeof(vnet_hdr);
1153
1154 err = -EINVAL;
1155 if (len < vnet_hdr_len)
1156 goto out_unlock;
1157
1158 len -= vnet_hdr_len;
1159
1160 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1161 vnet_hdr_len);
1162 if (err < 0)
1163 goto out_unlock;
1164
1165 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1166 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1167 vnet_hdr.hdr_len))
1168 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1169 vnet_hdr.csum_offset + 2;
1170
1171 err = -EINVAL;
1172 if (vnet_hdr.hdr_len > len)
1173 goto out_unlock;
1174
1175 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1176 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1177 case VIRTIO_NET_HDR_GSO_TCPV4:
1178 gso_type = SKB_GSO_TCPV4;
1179 break;
1180 case VIRTIO_NET_HDR_GSO_TCPV6:
1181 gso_type = SKB_GSO_TCPV6;
1182 break;
1183 case VIRTIO_NET_HDR_GSO_UDP:
1184 gso_type = SKB_GSO_UDP;
1185 break;
1186 default:
1187 goto out_unlock;
1188 }
1189
1190 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1191 gso_type |= SKB_GSO_TCP_ECN;
1192
1193 if (vnet_hdr.gso_size == 0)
1194 goto out_unlock;
1195
1196 }
1197 }
1198
1da177e4 1199 err = -EMSGSIZE;
bfd5f4a3 1200 if (!gso_type && (len > dev->mtu+reserve))
1da177e4
LT
1201 goto out_unlock;
1202
bfd5f4a3
SS
1203 err = -ENOBUFS;
1204 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1205 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1206 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1207 if (skb == NULL)
1da177e4
LT
1208 goto out_unlock;
1209
bfd5f4a3 1210 skb_set_network_header(skb, reserve);
1da177e4 1211
0c4e8581
SH
1212 err = -EINVAL;
1213 if (sock->type == SOCK_DGRAM &&
bfd5f4a3 1214 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
0c4e8581 1215 goto out_free;
1da177e4
LT
1216
1217 /* Returns -EFAULT on error */
bfd5f4a3 1218 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1da177e4
LT
1219 if (err)
1220 goto out_free;
2244d07b 1221 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
1222 if (err < 0)
1223 goto out_free;
1da177e4
LT
1224
1225 skb->protocol = proto;
1226 skb->dev = dev;
1227 skb->priority = sk->sk_priority;
2d37a186 1228 skb->mark = sk->sk_mark;
1da177e4 1229
bfd5f4a3
SS
1230 if (po->has_vnet_hdr) {
1231 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1232 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1233 vnet_hdr.csum_offset)) {
1234 err = -EINVAL;
1235 goto out_free;
1236 }
1237 }
1238
1239 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1240 skb_shinfo(skb)->gso_type = gso_type;
1241
1242 /* Header must be checked, and gso_segs computed. */
1243 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1244 skb_shinfo(skb)->gso_segs = 0;
1245
1246 len += vnet_hdr_len;
1247 }
1248
1da177e4
LT
1249 /*
1250 * Now send it
1251 */
1252
1253 err = dev_queue_xmit(skb);
1254 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1255 goto out_unlock;
1256
1257 dev_put(dev);
1258
40d4e3df 1259 return len;
1da177e4
LT
1260
1261out_free:
1262 kfree_skb(skb);
1263out_unlock:
1264 if (dev)
1265 dev_put(dev);
1266out:
1267 return err;
1268}
1269
69e3c75f
JB
1270static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1271 struct msghdr *msg, size_t len)
1272{
69e3c75f
JB
1273 struct sock *sk = sock->sk;
1274 struct packet_sock *po = pkt_sk(sk);
1275 if (po->tx_ring.pg_vec)
1276 return tpacket_snd(po, msg);
1277 else
69e3c75f
JB
1278 return packet_snd(sock, msg, len);
1279}
1280
1da177e4
LT
1281/*
1282 * Close a PACKET socket. This is fairly simple. We immediately go
1283 * to 'closed' state and remove our protocol entry in the device list.
1284 */
1285
1286static int packet_release(struct socket *sock)
1287{
1288 struct sock *sk = sock->sk;
1289 struct packet_sock *po;
d12d01d6 1290 struct net *net;
69e3c75f 1291 struct tpacket_req req;
1da177e4
LT
1292
1293 if (!sk)
1294 return 0;
1295
3b1e0a65 1296 net = sock_net(sk);
1da177e4
LT
1297 po = pkt_sk(sk);
1298
808f5114 1299 spin_lock_bh(&net->packet.sklist_lock);
1300 sk_del_node_init_rcu(sk);
920de804 1301 sock_prot_inuse_add(net, sk->sk_prot, -1);
808f5114 1302 spin_unlock_bh(&net->packet.sklist_lock);
1da177e4 1303
808f5114 1304 spin_lock(&po->bind_lock);
1da177e4
LT
1305 if (po->running) {
1306 /*
808f5114 1307 * Remove from protocol table
1da177e4 1308 */
1da177e4
LT
1309 po->running = 0;
1310 po->num = 0;
808f5114 1311 __dev_remove_pack(&po->prot_hook);
1da177e4
LT
1312 __sock_put(sk);
1313 }
808f5114 1314 spin_unlock(&po->bind_lock);
1da177e4 1315
1da177e4 1316 packet_flush_mclist(sk);
1da177e4 1317
69e3c75f
JB
1318 memset(&req, 0, sizeof(req));
1319
1320 if (po->rx_ring.pg_vec)
1321 packet_set_ring(sk, &req, 1, 0);
1322
1323 if (po->tx_ring.pg_vec)
1324 packet_set_ring(sk, &req, 1, 1);
1da177e4 1325
808f5114 1326 synchronize_net();
1da177e4
LT
1327 /*
1328 * Now the socket is dead. No more input will appear.
1329 */
1da177e4
LT
1330 sock_orphan(sk);
1331 sock->sk = NULL;
1332
1333 /* Purge queues */
1334
1335 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1336 sk_refcnt_debug_release(sk);
1da177e4
LT
1337
1338 sock_put(sk);
1339 return 0;
1340}
1341
1342/*
1343 * Attach a packet hook.
1344 */
1345
0e11c91e 1346static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1347{
1348 struct packet_sock *po = pkt_sk(sk);
1349 /*
1350 * Detach an existing hook if present.
1351 */
1352
1353 lock_sock(sk);
1354
1355 spin_lock(&po->bind_lock);
1356 if (po->running) {
1357 __sock_put(sk);
1358 po->running = 0;
1359 po->num = 0;
1360 spin_unlock(&po->bind_lock);
1361 dev_remove_pack(&po->prot_hook);
1362 spin_lock(&po->bind_lock);
1363 }
1364
1365 po->num = protocol;
1366 po->prot_hook.type = protocol;
1367 po->prot_hook.dev = dev;
1368
1369 po->ifindex = dev ? dev->ifindex : 0;
1370
1371 if (protocol == 0)
1372 goto out_unlock;
1373
be85d4ad 1374 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1375 dev_add_pack(&po->prot_hook);
1376 sock_hold(sk);
1377 po->running = 1;
be85d4ad
UT
1378 } else {
1379 sk->sk_err = ENETDOWN;
1380 if (!sock_flag(sk, SOCK_DEAD))
1381 sk->sk_error_report(sk);
1da177e4
LT
1382 }
1383
1384out_unlock:
1385 spin_unlock(&po->bind_lock);
1386 release_sock(sk);
1387 return 0;
1388}
1389
1390/*
1391 * Bind a packet socket to a device
1392 */
1393
40d4e3df
ED
1394static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1395 int addr_len)
1da177e4 1396{
40d4e3df 1397 struct sock *sk = sock->sk;
1da177e4
LT
1398 char name[15];
1399 struct net_device *dev;
1400 int err = -ENODEV;
1ce4f28b 1401
1da177e4
LT
1402 /*
1403 * Check legality
1404 */
1ce4f28b 1405
8ae55f04 1406 if (addr_len != sizeof(struct sockaddr))
1da177e4 1407 return -EINVAL;
40d4e3df 1408 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1409
3b1e0a65 1410 dev = dev_get_by_name(sock_net(sk), name);
1da177e4
LT
1411 if (dev) {
1412 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1413 dev_put(dev);
1414 }
1415 return err;
1416}
1da177e4
LT
1417
1418static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1419{
40d4e3df
ED
1420 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1421 struct sock *sk = sock->sk;
1da177e4
LT
1422 struct net_device *dev = NULL;
1423 int err;
1424
1425
1426 /*
1427 * Check legality
1428 */
1ce4f28b 1429
1da177e4
LT
1430 if (addr_len < sizeof(struct sockaddr_ll))
1431 return -EINVAL;
1432 if (sll->sll_family != AF_PACKET)
1433 return -EINVAL;
1434
1435 if (sll->sll_ifindex) {
1436 err = -ENODEV;
3b1e0a65 1437 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1438 if (dev == NULL)
1439 goto out;
1440 }
1441 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1442 if (dev)
1443 dev_put(dev);
1444
1445out:
1446 return err;
1447}
1448
1449static struct proto packet_proto = {
1450 .name = "PACKET",
1451 .owner = THIS_MODULE,
1452 .obj_size = sizeof(struct packet_sock),
1453};
1454
1455/*
1ce4f28b 1456 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1457 */
1458
3f378b68
EP
1459static int packet_create(struct net *net, struct socket *sock, int protocol,
1460 int kern)
1da177e4
LT
1461{
1462 struct sock *sk;
1463 struct packet_sock *po;
0e11c91e 1464 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1465 int err;
1466
1467 if (!capable(CAP_NET_RAW))
1468 return -EPERM;
be02097c
DM
1469 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1470 sock->type != SOCK_PACKET)
1da177e4
LT
1471 return -ESOCKTNOSUPPORT;
1472
1473 sock->state = SS_UNCONNECTED;
1474
1475 err = -ENOBUFS;
6257ff21 1476 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1477 if (sk == NULL)
1478 goto out;
1479
1480 sock->ops = &packet_ops;
1da177e4
LT
1481 if (sock->type == SOCK_PACKET)
1482 sock->ops = &packet_ops_spkt;
be02097c 1483
1da177e4
LT
1484 sock_init_data(sock, sk);
1485
1486 po = pkt_sk(sk);
1487 sk->sk_family = PF_PACKET;
0e11c91e 1488 po->num = proto;
1da177e4
LT
1489
1490 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1491 sk_refcnt_debug_inc(sk);
1da177e4
LT
1492
1493 /*
1494 * Attach a protocol block
1495 */
1496
1497 spin_lock_init(&po->bind_lock);
905db440 1498 mutex_init(&po->pg_vec_lock);
1da177e4 1499 po->prot_hook.func = packet_rcv;
be02097c 1500
1da177e4
LT
1501 if (sock->type == SOCK_PACKET)
1502 po->prot_hook.func = packet_rcv_spkt;
be02097c 1503
1da177e4
LT
1504 po->prot_hook.af_packet_priv = sk;
1505
0e11c91e
AV
1506 if (proto) {
1507 po->prot_hook.type = proto;
1da177e4
LT
1508 dev_add_pack(&po->prot_hook);
1509 sock_hold(sk);
1510 po->running = 1;
1511 }
1512
808f5114 1513 spin_lock_bh(&net->packet.sklist_lock);
1514 sk_add_node_rcu(sk, &net->packet.sklist);
3680453c 1515 sock_prot_inuse_add(net, &packet_proto, 1);
808f5114 1516 spin_unlock_bh(&net->packet.sklist_lock);
1517
40d4e3df 1518 return 0;
1da177e4
LT
1519out:
1520 return err;
1521}
1522
ed85b565
RC
1523static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1524{
1525 struct sock_exterr_skb *serr;
1526 struct sk_buff *skb, *skb2;
1527 int copied, err;
1528
1529 err = -EAGAIN;
1530 skb = skb_dequeue(&sk->sk_error_queue);
1531 if (skb == NULL)
1532 goto out;
1533
1534 copied = skb->len;
1535 if (copied > len) {
1536 msg->msg_flags |= MSG_TRUNC;
1537 copied = len;
1538 }
1539 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1540 if (err)
1541 goto out_free_skb;
1542
1543 sock_recv_timestamp(msg, sk, skb);
1544
1545 serr = SKB_EXT_ERR(skb);
1546 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1547 sizeof(serr->ee), &serr->ee);
1548
1549 msg->msg_flags |= MSG_ERRQUEUE;
1550 err = copied;
1551
1552 /* Reset and regenerate socket error */
1553 spin_lock_bh(&sk->sk_error_queue.lock);
1554 sk->sk_err = 0;
1555 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1556 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1557 spin_unlock_bh(&sk->sk_error_queue.lock);
1558 sk->sk_error_report(sk);
1559 } else
1560 spin_unlock_bh(&sk->sk_error_queue.lock);
1561
1562out_free_skb:
1563 kfree_skb(skb);
1564out:
1565 return err;
1566}
1567
1da177e4
LT
1568/*
1569 * Pull a packet from our receive queue and hand it to the user.
1570 * If necessary we block.
1571 */
1572
1573static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1574 struct msghdr *msg, size_t len, int flags)
1575{
1576 struct sock *sk = sock->sk;
1577 struct sk_buff *skb;
1578 int copied, err;
0fb375fb 1579 struct sockaddr_ll *sll;
bfd5f4a3 1580 int vnet_hdr_len = 0;
1da177e4
LT
1581
1582 err = -EINVAL;
ed85b565 1583 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1da177e4
LT
1584 goto out;
1585
1586#if 0
1587 /* What error should we return now? EUNATTACH? */
1588 if (pkt_sk(sk)->ifindex < 0)
1589 return -ENODEV;
1590#endif
1591
ed85b565
RC
1592 if (flags & MSG_ERRQUEUE) {
1593 err = packet_recv_error(sk, msg, len);
1594 goto out;
1595 }
1596
1da177e4
LT
1597 /*
1598 * Call the generic datagram receiver. This handles all sorts
1599 * of horrible races and re-entrancy so we can forget about it
1600 * in the protocol layers.
1601 *
1602 * Now it will return ENETDOWN, if device have just gone down,
1603 * but then it will block.
1604 */
1605
40d4e3df 1606 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1607
1608 /*
1ce4f28b 1609 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1610 * handles the blocking we don't see and worry about blocking
1611 * retries.
1612 */
1613
8ae55f04 1614 if (skb == NULL)
1da177e4
LT
1615 goto out;
1616
bfd5f4a3
SS
1617 if (pkt_sk(sk)->has_vnet_hdr) {
1618 struct virtio_net_hdr vnet_hdr = { 0 };
1619
1620 err = -EINVAL;
1621 vnet_hdr_len = sizeof(vnet_hdr);
1f18b717 1622 if (len < vnet_hdr_len)
bfd5f4a3
SS
1623 goto out_free;
1624
1f18b717
MK
1625 len -= vnet_hdr_len;
1626
bfd5f4a3
SS
1627 if (skb_is_gso(skb)) {
1628 struct skb_shared_info *sinfo = skb_shinfo(skb);
1629
1630 /* This is a hint as to how much should be linear. */
1631 vnet_hdr.hdr_len = skb_headlen(skb);
1632 vnet_hdr.gso_size = sinfo->gso_size;
1633 if (sinfo->gso_type & SKB_GSO_TCPV4)
1634 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1635 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1636 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1637 else if (sinfo->gso_type & SKB_GSO_UDP)
1638 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1639 else if (sinfo->gso_type & SKB_GSO_FCOE)
1640 goto out_free;
1641 else
1642 BUG();
1643 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1644 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1645 } else
1646 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1647
1648 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1649 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1650 vnet_hdr.csum_start = skb->csum_start -
1651 skb_headroom(skb);
1652 vnet_hdr.csum_offset = skb->csum_offset;
1653 } /* else everything is zero */
1654
1655 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1656 vnet_hdr_len);
1657 if (err < 0)
1658 goto out_free;
1659 }
1660
0fb375fb
EB
1661 /*
1662 * If the address length field is there to be filled in, we fill
1663 * it in now.
1664 */
1665
ffbc6111 1666 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1667 if (sock->type == SOCK_PACKET)
1668 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1669 else
1670 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1671
1da177e4
LT
1672 /*
1673 * You lose any data beyond the buffer you gave. If it worries a
1674 * user program they can ask the device for its MTU anyway.
1675 */
1676
1677 copied = skb->len;
40d4e3df
ED
1678 if (copied > len) {
1679 copied = len;
1680 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1681 }
1682
1683 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1684 if (err)
1685 goto out_free;
1686
3b885787 1687 sock_recv_ts_and_drops(msg, sk, skb);
1da177e4
LT
1688
1689 if (msg->msg_name)
ffbc6111
HX
1690 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1691 msg->msg_namelen);
1da177e4 1692
8dc41944 1693 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1694 struct tpacket_auxdata aux;
1695
1696 aux.tp_status = TP_STATUS_USER;
1697 if (skb->ip_summed == CHECKSUM_PARTIAL)
1698 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1699 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1700 aux.tp_snaplen = skb->len;
1701 aux.tp_mac = 0;
bbe735e4 1702 aux.tp_net = skb_network_offset(skb);
05423b24 1703 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
ffbc6111
HX
1704
1705 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1706 }
1707
1da177e4
LT
1708 /*
1709 * Free or return the buffer as appropriate. Again this
1710 * hides all the races and re-entrancy issues from us.
1711 */
bfd5f4a3 1712 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1da177e4
LT
1713
1714out_free:
1715 skb_free_datagram(sk, skb);
1716out:
1717 return err;
1718}
1719
1da177e4
LT
1720static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1721 int *uaddr_len, int peer)
1722{
1723 struct net_device *dev;
1724 struct sock *sk = sock->sk;
1725
1726 if (peer)
1727 return -EOPNOTSUPP;
1728
1729 uaddr->sa_family = AF_PACKET;
654d1f8a
ED
1730 rcu_read_lock();
1731 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1732 if (dev)
67286640 1733 strncpy(uaddr->sa_data, dev->name, 14);
654d1f8a 1734 else
1da177e4 1735 memset(uaddr->sa_data, 0, 14);
654d1f8a 1736 rcu_read_unlock();
1da177e4
LT
1737 *uaddr_len = sizeof(*uaddr);
1738
1739 return 0;
1740}
1da177e4
LT
1741
1742static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1743 int *uaddr_len, int peer)
1744{
1745 struct net_device *dev;
1746 struct sock *sk = sock->sk;
1747 struct packet_sock *po = pkt_sk(sk);
13cfa97b 1748 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1da177e4
LT
1749
1750 if (peer)
1751 return -EOPNOTSUPP;
1752
1753 sll->sll_family = AF_PACKET;
1754 sll->sll_ifindex = po->ifindex;
1755 sll->sll_protocol = po->num;
67286640 1756 sll->sll_pkttype = 0;
654d1f8a
ED
1757 rcu_read_lock();
1758 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1da177e4
LT
1759 if (dev) {
1760 sll->sll_hatype = dev->type;
1761 sll->sll_halen = dev->addr_len;
1762 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1da177e4
LT
1763 } else {
1764 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1765 sll->sll_halen = 0;
1766 }
654d1f8a 1767 rcu_read_unlock();
0fb375fb 1768 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1769
1770 return 0;
1771}
1772
2aeb0b88
WC
1773static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1774 int what)
1da177e4
LT
1775{
1776 switch (i->type) {
1777 case PACKET_MR_MULTICAST:
1162563f
JP
1778 if (i->alen != dev->addr_len)
1779 return -EINVAL;
1da177e4 1780 if (what > 0)
22bedad3 1781 return dev_mc_add(dev, i->addr);
1da177e4 1782 else
22bedad3 1783 return dev_mc_del(dev, i->addr);
1da177e4
LT
1784 break;
1785 case PACKET_MR_PROMISC:
2aeb0b88 1786 return dev_set_promiscuity(dev, what);
1da177e4
LT
1787 break;
1788 case PACKET_MR_ALLMULTI:
2aeb0b88 1789 return dev_set_allmulti(dev, what);
1da177e4 1790 break;
d95ed927 1791 case PACKET_MR_UNICAST:
1162563f
JP
1792 if (i->alen != dev->addr_len)
1793 return -EINVAL;
d95ed927 1794 if (what > 0)
a748ee24 1795 return dev_uc_add(dev, i->addr);
d95ed927 1796 else
a748ee24 1797 return dev_uc_del(dev, i->addr);
d95ed927 1798 break;
40d4e3df
ED
1799 default:
1800 break;
1da177e4 1801 }
2aeb0b88 1802 return 0;
1da177e4
LT
1803}
1804
1805static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1806{
40d4e3df 1807 for ( ; i; i = i->next) {
1da177e4
LT
1808 if (i->ifindex == dev->ifindex)
1809 packet_dev_mc(dev, i, what);
1810 }
1811}
1812
0fb375fb 1813static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1814{
1815 struct packet_sock *po = pkt_sk(sk);
1816 struct packet_mclist *ml, *i;
1817 struct net_device *dev;
1818 int err;
1819
1820 rtnl_lock();
1821
1822 err = -ENODEV;
3b1e0a65 1823 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1824 if (!dev)
1825 goto done;
1826
1827 err = -EINVAL;
1162563f 1828 if (mreq->mr_alen > dev->addr_len)
1da177e4
LT
1829 goto done;
1830
1831 err = -ENOBUFS;
8b3a7005 1832 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1833 if (i == NULL)
1834 goto done;
1835
1836 err = 0;
1837 for (ml = po->mclist; ml; ml = ml->next) {
1838 if (ml->ifindex == mreq->mr_ifindex &&
1839 ml->type == mreq->mr_type &&
1840 ml->alen == mreq->mr_alen &&
1841 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1842 ml->count++;
1843 /* Free the new element ... */
1844 kfree(i);
1845 goto done;
1846 }
1847 }
1848
1849 i->type = mreq->mr_type;
1850 i->ifindex = mreq->mr_ifindex;
1851 i->alen = mreq->mr_alen;
1852 memcpy(i->addr, mreq->mr_address, i->alen);
1853 i->count = 1;
1854 i->next = po->mclist;
1855 po->mclist = i;
2aeb0b88
WC
1856 err = packet_dev_mc(dev, i, 1);
1857 if (err) {
1858 po->mclist = i->next;
1859 kfree(i);
1860 }
1da177e4
LT
1861
1862done:
1863 rtnl_unlock();
1864 return err;
1865}
1866
0fb375fb 1867static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1868{
1869 struct packet_mclist *ml, **mlp;
1870
1871 rtnl_lock();
1872
1873 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1874 if (ml->ifindex == mreq->mr_ifindex &&
1875 ml->type == mreq->mr_type &&
1876 ml->alen == mreq->mr_alen &&
1877 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1878 if (--ml->count == 0) {
1879 struct net_device *dev;
1880 *mlp = ml->next;
ad959e76
ED
1881 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1882 if (dev)
1da177e4 1883 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1884 kfree(ml);
1885 }
1886 rtnl_unlock();
1887 return 0;
1888 }
1889 }
1890 rtnl_unlock();
1891 return -EADDRNOTAVAIL;
1892}
1893
1894static void packet_flush_mclist(struct sock *sk)
1895{
1896 struct packet_sock *po = pkt_sk(sk);
1897 struct packet_mclist *ml;
1898
1899 if (!po->mclist)
1900 return;
1901
1902 rtnl_lock();
1903 while ((ml = po->mclist) != NULL) {
1904 struct net_device *dev;
1905
1906 po->mclist = ml->next;
ad959e76
ED
1907 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1908 if (dev != NULL)
1da177e4 1909 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1910 kfree(ml);
1911 }
1912 rtnl_unlock();
1913}
1da177e4
LT
1914
1915static int
b7058842 1916packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1917{
1918 struct sock *sk = sock->sk;
8dc41944 1919 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1920 int ret;
1921
1922 if (level != SOL_PACKET)
1923 return -ENOPROTOOPT;
1924
69e3c75f 1925 switch (optname) {
1ce4f28b 1926 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1927 case PACKET_DROP_MEMBERSHIP:
1928 {
0fb375fb
EB
1929 struct packet_mreq_max mreq;
1930 int len = optlen;
1931 memset(&mreq, 0, sizeof(mreq));
1932 if (len < sizeof(struct packet_mreq))
1da177e4 1933 return -EINVAL;
0fb375fb
EB
1934 if (len > sizeof(mreq))
1935 len = sizeof(mreq);
40d4e3df 1936 if (copy_from_user(&mreq, optval, len))
1da177e4 1937 return -EFAULT;
0fb375fb
EB
1938 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1939 return -EINVAL;
1da177e4
LT
1940 if (optname == PACKET_ADD_MEMBERSHIP)
1941 ret = packet_mc_add(sk, &mreq);
1942 else
1943 ret = packet_mc_drop(sk, &mreq);
1944 return ret;
1945 }
a2efcfa0 1946
1da177e4 1947 case PACKET_RX_RING:
69e3c75f 1948 case PACKET_TX_RING:
1da177e4
LT
1949 {
1950 struct tpacket_req req;
1951
40d4e3df 1952 if (optlen < sizeof(req))
1da177e4 1953 return -EINVAL;
bfd5f4a3
SS
1954 if (pkt_sk(sk)->has_vnet_hdr)
1955 return -EINVAL;
40d4e3df 1956 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1957 return -EFAULT;
69e3c75f 1958 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1959 }
1960 case PACKET_COPY_THRESH:
1961 {
1962 int val;
1963
40d4e3df 1964 if (optlen != sizeof(val))
1da177e4 1965 return -EINVAL;
40d4e3df 1966 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
1967 return -EFAULT;
1968
1969 pkt_sk(sk)->copy_thresh = val;
1970 return 0;
1971 }
bbd6ef87
PM
1972 case PACKET_VERSION:
1973 {
1974 int val;
1975
1976 if (optlen != sizeof(val))
1977 return -EINVAL;
69e3c75f 1978 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
1979 return -EBUSY;
1980 if (copy_from_user(&val, optval, sizeof(val)))
1981 return -EFAULT;
1982 switch (val) {
1983 case TPACKET_V1:
1984 case TPACKET_V2:
1985 po->tp_version = val;
1986 return 0;
1987 default:
1988 return -EINVAL;
1989 }
1990 }
8913336a
PM
1991 case PACKET_RESERVE:
1992 {
1993 unsigned int val;
1994
1995 if (optlen != sizeof(val))
1996 return -EINVAL;
69e3c75f 1997 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
1998 return -EBUSY;
1999 if (copy_from_user(&val, optval, sizeof(val)))
2000 return -EFAULT;
2001 po->tp_reserve = val;
2002 return 0;
2003 }
69e3c75f
JB
2004 case PACKET_LOSS:
2005 {
2006 unsigned int val;
2007
2008 if (optlen != sizeof(val))
2009 return -EINVAL;
2010 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2011 return -EBUSY;
2012 if (copy_from_user(&val, optval, sizeof(val)))
2013 return -EFAULT;
2014 po->tp_loss = !!val;
2015 return 0;
2016 }
8dc41944
HX
2017 case PACKET_AUXDATA:
2018 {
2019 int val;
2020
2021 if (optlen < sizeof(val))
2022 return -EINVAL;
2023 if (copy_from_user(&val, optval, sizeof(val)))
2024 return -EFAULT;
2025
2026 po->auxdata = !!val;
2027 return 0;
2028 }
80feaacb
PWJ
2029 case PACKET_ORIGDEV:
2030 {
2031 int val;
2032
2033 if (optlen < sizeof(val))
2034 return -EINVAL;
2035 if (copy_from_user(&val, optval, sizeof(val)))
2036 return -EFAULT;
2037
2038 po->origdev = !!val;
2039 return 0;
2040 }
bfd5f4a3
SS
2041 case PACKET_VNET_HDR:
2042 {
2043 int val;
2044
2045 if (sock->type != SOCK_RAW)
2046 return -EINVAL;
2047 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2048 return -EBUSY;
2049 if (optlen < sizeof(val))
2050 return -EINVAL;
2051 if (copy_from_user(&val, optval, sizeof(val)))
2052 return -EFAULT;
2053
2054 po->has_vnet_hdr = !!val;
2055 return 0;
2056 }
614f60fa
SM
2057 case PACKET_TIMESTAMP:
2058 {
2059 int val;
2060
2061 if (optlen != sizeof(val))
2062 return -EINVAL;
2063 if (copy_from_user(&val, optval, sizeof(val)))
2064 return -EFAULT;
2065
2066 po->tp_tstamp = val;
2067 return 0;
2068 }
1da177e4
LT
2069 default:
2070 return -ENOPROTOOPT;
2071 }
2072}
2073
2074static int packet_getsockopt(struct socket *sock, int level, int optname,
2075 char __user *optval, int __user *optlen)
2076{
2077 int len;
8dc41944 2078 int val;
1da177e4
LT
2079 struct sock *sk = sock->sk;
2080 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
2081 void *data;
2082 struct tpacket_stats st;
1da177e4
LT
2083
2084 if (level != SOL_PACKET)
2085 return -ENOPROTOOPT;
2086
8ae55f04
KK
2087 if (get_user(len, optlen))
2088 return -EFAULT;
1da177e4
LT
2089
2090 if (len < 0)
2091 return -EINVAL;
1ce4f28b 2092
69e3c75f 2093 switch (optname) {
1da177e4 2094 case PACKET_STATISTICS:
1da177e4
LT
2095 if (len > sizeof(struct tpacket_stats))
2096 len = sizeof(struct tpacket_stats);
2097 spin_lock_bh(&sk->sk_receive_queue.lock);
2098 st = po->stats;
2099 memset(&po->stats, 0, sizeof(st));
2100 spin_unlock_bh(&sk->sk_receive_queue.lock);
2101 st.tp_packets += st.tp_drops;
2102
8dc41944
HX
2103 data = &st;
2104 break;
2105 case PACKET_AUXDATA:
2106 if (len > sizeof(int))
2107 len = sizeof(int);
2108 val = po->auxdata;
2109
80feaacb
PWJ
2110 data = &val;
2111 break;
2112 case PACKET_ORIGDEV:
2113 if (len > sizeof(int))
2114 len = sizeof(int);
2115 val = po->origdev;
2116
bfd5f4a3
SS
2117 data = &val;
2118 break;
2119 case PACKET_VNET_HDR:
2120 if (len > sizeof(int))
2121 len = sizeof(int);
2122 val = po->has_vnet_hdr;
2123
8dc41944 2124 data = &val;
1da177e4 2125 break;
bbd6ef87
PM
2126 case PACKET_VERSION:
2127 if (len > sizeof(int))
2128 len = sizeof(int);
2129 val = po->tp_version;
2130 data = &val;
2131 break;
2132 case PACKET_HDRLEN:
2133 if (len > sizeof(int))
2134 len = sizeof(int);
2135 if (copy_from_user(&val, optval, len))
2136 return -EFAULT;
2137 switch (val) {
2138 case TPACKET_V1:
2139 val = sizeof(struct tpacket_hdr);
2140 break;
2141 case TPACKET_V2:
2142 val = sizeof(struct tpacket2_hdr);
2143 break;
2144 default:
2145 return -EINVAL;
2146 }
2147 data = &val;
2148 break;
8913336a
PM
2149 case PACKET_RESERVE:
2150 if (len > sizeof(unsigned int))
2151 len = sizeof(unsigned int);
2152 val = po->tp_reserve;
2153 data = &val;
2154 break;
69e3c75f
JB
2155 case PACKET_LOSS:
2156 if (len > sizeof(unsigned int))
2157 len = sizeof(unsigned int);
2158 val = po->tp_loss;
2159 data = &val;
2160 break;
614f60fa
SM
2161 case PACKET_TIMESTAMP:
2162 if (len > sizeof(int))
2163 len = sizeof(int);
2164 val = po->tp_tstamp;
2165 data = &val;
2166 break;
1da177e4
LT
2167 default:
2168 return -ENOPROTOOPT;
2169 }
2170
8ae55f04
KK
2171 if (put_user(len, optlen))
2172 return -EFAULT;
8dc41944
HX
2173 if (copy_to_user(optval, data, len))
2174 return -EFAULT;
8ae55f04 2175 return 0;
1da177e4
LT
2176}
2177
2178
2179static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2180{
2181 struct sock *sk;
2182 struct hlist_node *node;
ad930650 2183 struct net_device *dev = data;
c346dca1 2184 struct net *net = dev_net(dev);
1da177e4 2185
808f5114 2186 rcu_read_lock();
2187 sk_for_each_rcu(sk, node, &net->packet.sklist) {
1da177e4
LT
2188 struct packet_sock *po = pkt_sk(sk);
2189
2190 switch (msg) {
2191 case NETDEV_UNREGISTER:
1da177e4
LT
2192 if (po->mclist)
2193 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
2194 /* fallthrough */
2195
1da177e4
LT
2196 case NETDEV_DOWN:
2197 if (dev->ifindex == po->ifindex) {
2198 spin_lock(&po->bind_lock);
2199 if (po->running) {
2200 __dev_remove_pack(&po->prot_hook);
2201 __sock_put(sk);
2202 po->running = 0;
2203 sk->sk_err = ENETDOWN;
2204 if (!sock_flag(sk, SOCK_DEAD))
2205 sk->sk_error_report(sk);
2206 }
2207 if (msg == NETDEV_UNREGISTER) {
2208 po->ifindex = -1;
2209 po->prot_hook.dev = NULL;
2210 }
2211 spin_unlock(&po->bind_lock);
2212 }
2213 break;
2214 case NETDEV_UP:
808f5114 2215 if (dev->ifindex == po->ifindex) {
2216 spin_lock(&po->bind_lock);
2217 if (po->num && !po->running) {
2218 dev_add_pack(&po->prot_hook);
2219 sock_hold(sk);
2220 po->running = 1;
2221 }
2222 spin_unlock(&po->bind_lock);
1da177e4 2223 }
1da177e4
LT
2224 break;
2225 }
2226 }
808f5114 2227 rcu_read_unlock();
1da177e4
LT
2228 return NOTIFY_DONE;
2229}
2230
2231
2232static int packet_ioctl(struct socket *sock, unsigned int cmd,
2233 unsigned long arg)
2234{
2235 struct sock *sk = sock->sk;
2236
69e3c75f 2237 switch (cmd) {
40d4e3df
ED
2238 case SIOCOUTQ:
2239 {
2240 int amount = sk_wmem_alloc_get(sk);
31e6d363 2241
40d4e3df
ED
2242 return put_user(amount, (int __user *)arg);
2243 }
2244 case SIOCINQ:
2245 {
2246 struct sk_buff *skb;
2247 int amount = 0;
2248
2249 spin_lock_bh(&sk->sk_receive_queue.lock);
2250 skb = skb_peek(&sk->sk_receive_queue);
2251 if (skb)
2252 amount = skb->len;
2253 spin_unlock_bh(&sk->sk_receive_queue.lock);
2254 return put_user(amount, (int __user *)arg);
2255 }
2256 case SIOCGSTAMP:
2257 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2258 case SIOCGSTAMPNS:
2259 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2260
1da177e4 2261#ifdef CONFIG_INET
40d4e3df
ED
2262 case SIOCADDRT:
2263 case SIOCDELRT:
2264 case SIOCDARP:
2265 case SIOCGARP:
2266 case SIOCSARP:
2267 case SIOCGIFADDR:
2268 case SIOCSIFADDR:
2269 case SIOCGIFBRDADDR:
2270 case SIOCSIFBRDADDR:
2271 case SIOCGIFNETMASK:
2272 case SIOCSIFNETMASK:
2273 case SIOCGIFDSTADDR:
2274 case SIOCSIFDSTADDR:
2275 case SIOCSIFFLAGS:
40d4e3df 2276 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2277#endif
2278
40d4e3df
ED
2279 default:
2280 return -ENOIOCTLCMD;
1da177e4
LT
2281 }
2282 return 0;
2283}
2284
40d4e3df 2285static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2286 poll_table *wait)
2287{
2288 struct sock *sk = sock->sk;
2289 struct packet_sock *po = pkt_sk(sk);
2290 unsigned int mask = datagram_poll(file, sock, wait);
2291
2292 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2293 if (po->rx_ring.pg_vec) {
2294 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2295 mask |= POLLIN | POLLRDNORM;
2296 }
2297 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2298 spin_lock_bh(&sk->sk_write_queue.lock);
2299 if (po->tx_ring.pg_vec) {
2300 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2301 mask |= POLLOUT | POLLWRNORM;
2302 }
2303 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2304 return mask;
2305}
2306
2307
2308/* Dirty? Well, I still did not learn better way to account
2309 * for user mmaps.
2310 */
2311
2312static void packet_mm_open(struct vm_area_struct *vma)
2313{
2314 struct file *file = vma->vm_file;
40d4e3df 2315 struct socket *sock = file->private_data;
1da177e4 2316 struct sock *sk = sock->sk;
1ce4f28b 2317
1da177e4
LT
2318 if (sk)
2319 atomic_inc(&pkt_sk(sk)->mapped);
2320}
2321
2322static void packet_mm_close(struct vm_area_struct *vma)
2323{
2324 struct file *file = vma->vm_file;
40d4e3df 2325 struct socket *sock = file->private_data;
1da177e4 2326 struct sock *sk = sock->sk;
1ce4f28b 2327
1da177e4
LT
2328 if (sk)
2329 atomic_dec(&pkt_sk(sk)->mapped);
2330}
2331
f0f37e2f 2332static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2333 .open = packet_mm_open,
2334 .close = packet_mm_close,
1da177e4
LT
2335};
2336
0e3125c7
NH
2337static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2338 unsigned int len)
1da177e4
LT
2339{
2340 int i;
2341
4ebf0ae2 2342 for (i = 0; i < len; i++) {
0e3125c7 2343 if (likely(pg_vec[i].buffer)) {
c56b4d90 2344 if (is_vmalloc_addr(pg_vec[i].buffer))
0e3125c7
NH
2345 vfree(pg_vec[i].buffer);
2346 else
2347 free_pages((unsigned long)pg_vec[i].buffer,
2348 order);
2349 pg_vec[i].buffer = NULL;
2350 }
1da177e4
LT
2351 }
2352 kfree(pg_vec);
2353}
2354
c56b4d90 2355static inline char *alloc_one_pg_vec_page(unsigned long order)
4ebf0ae2 2356{
0e3125c7
NH
2357 char *buffer = NULL;
2358 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2359 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2360
2361 buffer = (char *) __get_free_pages(gfp_flags, order);
2362
2363 if (buffer)
2364 return buffer;
2365
2366 /*
2367 * __get_free_pages failed, fall back to vmalloc
2368 */
bbce5a59 2369 buffer = vzalloc((1 << order) * PAGE_SIZE);
719bfeaa 2370
0e3125c7
NH
2371 if (buffer)
2372 return buffer;
2373
2374 /*
2375 * vmalloc failed, lets dig into swap here
2376 */
0e3125c7
NH
2377 gfp_flags &= ~__GFP_NORETRY;
2378 buffer = (char *)__get_free_pages(gfp_flags, order);
2379 if (buffer)
2380 return buffer;
2381
2382 /*
2383 * complete and utter failure
2384 */
2385 return NULL;
4ebf0ae2
DM
2386}
2387
0e3125c7 2388static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4ebf0ae2
DM
2389{
2390 unsigned int block_nr = req->tp_block_nr;
0e3125c7 2391 struct pgv *pg_vec;
4ebf0ae2
DM
2392 int i;
2393
0e3125c7 2394 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
4ebf0ae2
DM
2395 if (unlikely(!pg_vec))
2396 goto out;
2397
2398 for (i = 0; i < block_nr; i++) {
c56b4d90 2399 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
0e3125c7 2400 if (unlikely(!pg_vec[i].buffer))
4ebf0ae2
DM
2401 goto out_free_pgvec;
2402 }
2403
2404out:
2405 return pg_vec;
2406
2407out_free_pgvec:
2408 free_pg_vec(pg_vec, order, block_nr);
0e3125c7 2409 kfree(pg_vec);
4ebf0ae2
DM
2410 pg_vec = NULL;
2411 goto out;
2412}
1da177e4 2413
69e3c75f
JB
2414static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2415 int closing, int tx_ring)
1da177e4 2416{
0e3125c7 2417 struct pgv *pg_vec = NULL;
1da177e4 2418 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2419 int was_running, order = 0;
69e3c75f
JB
2420 struct packet_ring_buffer *rb;
2421 struct sk_buff_head *rb_queue;
0e11c91e 2422 __be16 num;
69e3c75f 2423 int err;
1ce4f28b 2424
69e3c75f
JB
2425 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2426 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2427
69e3c75f
JB
2428 err = -EBUSY;
2429 if (!closing) {
2430 if (atomic_read(&po->mapped))
2431 goto out;
2432 if (atomic_read(&rb->pending))
2433 goto out;
2434 }
1da177e4 2435
69e3c75f
JB
2436 if (req->tp_block_nr) {
2437 /* Sanity tests and some calculations */
2438 err = -EBUSY;
2439 if (unlikely(rb->pg_vec))
2440 goto out;
1da177e4 2441
bbd6ef87
PM
2442 switch (po->tp_version) {
2443 case TPACKET_V1:
2444 po->tp_hdrlen = TPACKET_HDRLEN;
2445 break;
2446 case TPACKET_V2:
2447 po->tp_hdrlen = TPACKET2_HDRLEN;
2448 break;
2449 }
2450
69e3c75f 2451 err = -EINVAL;
4ebf0ae2 2452 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2453 goto out;
4ebf0ae2 2454 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2455 goto out;
8913336a 2456 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2457 po->tp_reserve))
2458 goto out;
4ebf0ae2 2459 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2460 goto out;
1da177e4 2461
69e3c75f
JB
2462 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2463 if (unlikely(rb->frames_per_block <= 0))
2464 goto out;
2465 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2466 req->tp_frame_nr))
2467 goto out;
1da177e4
LT
2468
2469 err = -ENOMEM;
4ebf0ae2
DM
2470 order = get_order(req->tp_block_size);
2471 pg_vec = alloc_pg_vec(req, order);
2472 if (unlikely(!pg_vec))
1da177e4 2473 goto out;
69e3c75f
JB
2474 }
2475 /* Done */
2476 else {
2477 err = -EINVAL;
4ebf0ae2 2478 if (unlikely(req->tp_frame_nr))
69e3c75f 2479 goto out;
1da177e4
LT
2480 }
2481
2482 lock_sock(sk);
2483
2484 /* Detach socket from network */
2485 spin_lock(&po->bind_lock);
2486 was_running = po->running;
2487 num = po->num;
2488 if (was_running) {
2489 __dev_remove_pack(&po->prot_hook);
2490 po->num = 0;
2491 po->running = 0;
2492 __sock_put(sk);
2493 }
2494 spin_unlock(&po->bind_lock);
1ce4f28b 2495
1da177e4
LT
2496 synchronize_net();
2497
2498 err = -EBUSY;
905db440 2499 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2500 if (closing || atomic_read(&po->mapped) == 0) {
2501 err = 0;
2502#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
69e3c75f
JB
2503 spin_lock_bh(&rb_queue->lock);
2504 pg_vec = XC(rb->pg_vec, pg_vec);
2505 rb->frame_max = (req->tp_frame_nr - 1);
2506 rb->head = 0;
2507 rb->frame_size = req->tp_frame_size;
2508 spin_unlock_bh(&rb_queue->lock);
2509
2510 order = XC(rb->pg_vec_order, order);
2511 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2512
2513 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2514 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2515 tpacket_rcv : packet_rcv;
2516 skb_queue_purge(rb_queue);
1da177e4
LT
2517#undef XC
2518 if (atomic_read(&po->mapped))
40d4e3df
ED
2519 pr_err("packet_mmap: vma is busy: %d\n",
2520 atomic_read(&po->mapped));
1da177e4 2521 }
905db440 2522 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2523
2524 spin_lock(&po->bind_lock);
2525 if (was_running && !po->running) {
2526 sock_hold(sk);
2527 po->running = 1;
2528 po->num = num;
2529 dev_add_pack(&po->prot_hook);
2530 }
2531 spin_unlock(&po->bind_lock);
2532
2533 release_sock(sk);
2534
1da177e4
LT
2535 if (pg_vec)
2536 free_pg_vec(pg_vec, order, req->tp_block_nr);
2537out:
2538 return err;
2539}
2540
69e3c75f
JB
2541static int packet_mmap(struct file *file, struct socket *sock,
2542 struct vm_area_struct *vma)
1da177e4
LT
2543{
2544 struct sock *sk = sock->sk;
2545 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2546 unsigned long size, expected_size;
2547 struct packet_ring_buffer *rb;
1da177e4
LT
2548 unsigned long start;
2549 int err = -EINVAL;
2550 int i;
2551
2552 if (vma->vm_pgoff)
2553 return -EINVAL;
2554
905db440 2555 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2556
2557 expected_size = 0;
2558 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2559 if (rb->pg_vec) {
2560 expected_size += rb->pg_vec_len
2561 * rb->pg_vec_pages
2562 * PAGE_SIZE;
2563 }
2564 }
2565
2566 if (expected_size == 0)
1da177e4 2567 goto out;
69e3c75f
JB
2568
2569 size = vma->vm_end - vma->vm_start;
2570 if (size != expected_size)
1da177e4
LT
2571 goto out;
2572
1da177e4 2573 start = vma->vm_start;
69e3c75f
JB
2574 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2575 if (rb->pg_vec == NULL)
2576 continue;
2577
2578 for (i = 0; i < rb->pg_vec_len; i++) {
0e3125c7
NH
2579 struct page *page;
2580 void *kaddr = rb->pg_vec[i].buffer;
69e3c75f
JB
2581 int pg_num;
2582
c56b4d90
CG
2583 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2584 page = pgv_to_page(kaddr);
69e3c75f
JB
2585 err = vm_insert_page(vma, start, page);
2586 if (unlikely(err))
2587 goto out;
2588 start += PAGE_SIZE;
0e3125c7 2589 kaddr += PAGE_SIZE;
69e3c75f 2590 }
4ebf0ae2 2591 }
1da177e4 2592 }
69e3c75f 2593
4ebf0ae2 2594 atomic_inc(&po->mapped);
1da177e4
LT
2595 vma->vm_ops = &packet_mmap_ops;
2596 err = 0;
2597
2598out:
905db440 2599 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2600 return err;
2601}
1da177e4 2602
90ddc4f0 2603static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2604 .family = PF_PACKET,
2605 .owner = THIS_MODULE,
2606 .release = packet_release,
2607 .bind = packet_bind_spkt,
2608 .connect = sock_no_connect,
2609 .socketpair = sock_no_socketpair,
2610 .accept = sock_no_accept,
2611 .getname = packet_getname_spkt,
2612 .poll = datagram_poll,
2613 .ioctl = packet_ioctl,
2614 .listen = sock_no_listen,
2615 .shutdown = sock_no_shutdown,
2616 .setsockopt = sock_no_setsockopt,
2617 .getsockopt = sock_no_getsockopt,
2618 .sendmsg = packet_sendmsg_spkt,
2619 .recvmsg = packet_recvmsg,
2620 .mmap = sock_no_mmap,
2621 .sendpage = sock_no_sendpage,
2622};
1da177e4 2623
90ddc4f0 2624static const struct proto_ops packet_ops = {
1da177e4
LT
2625 .family = PF_PACKET,
2626 .owner = THIS_MODULE,
2627 .release = packet_release,
2628 .bind = packet_bind,
2629 .connect = sock_no_connect,
2630 .socketpair = sock_no_socketpair,
2631 .accept = sock_no_accept,
1ce4f28b 2632 .getname = packet_getname,
1da177e4
LT
2633 .poll = packet_poll,
2634 .ioctl = packet_ioctl,
2635 .listen = sock_no_listen,
2636 .shutdown = sock_no_shutdown,
2637 .setsockopt = packet_setsockopt,
2638 .getsockopt = packet_getsockopt,
2639 .sendmsg = packet_sendmsg,
2640 .recvmsg = packet_recvmsg,
2641 .mmap = packet_mmap,
2642 .sendpage = sock_no_sendpage,
2643};
2644
ec1b4cf7 2645static const struct net_proto_family packet_family_ops = {
1da177e4
LT
2646 .family = PF_PACKET,
2647 .create = packet_create,
2648 .owner = THIS_MODULE,
2649};
2650
2651static struct notifier_block packet_netdev_notifier = {
40d4e3df 2652 .notifier_call = packet_notifier,
1da177e4
LT
2653};
2654
2655#ifdef CONFIG_PROC_FS
1da177e4
LT
2656
2657static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
808f5114 2658 __acquires(RCU)
1da177e4 2659{
e372c414 2660 struct net *net = seq_file_net(seq);
808f5114 2661
2662 rcu_read_lock();
2663 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
1da177e4
LT
2664}
2665
2666static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2667{
1bf40954 2668 struct net *net = seq_file_net(seq);
808f5114 2669 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
1da177e4
LT
2670}
2671
2672static void packet_seq_stop(struct seq_file *seq, void *v)
808f5114 2673 __releases(RCU)
1da177e4 2674{
808f5114 2675 rcu_read_unlock();
1da177e4
LT
2676}
2677
1ce4f28b 2678static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2679{
2680 if (v == SEQ_START_TOKEN)
2681 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2682 else {
b7ceabd9 2683 struct sock *s = sk_entry(v);
1da177e4
LT
2684 const struct packet_sock *po = pkt_sk(s);
2685
2686 seq_printf(seq,
2687 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2688 s,
2689 atomic_read(&s->sk_refcnt),
2690 s->sk_type,
2691 ntohs(po->num),
2692 po->ifindex,
2693 po->running,
2694 atomic_read(&s->sk_rmem_alloc),
2695 sock_i_uid(s),
40d4e3df 2696 sock_i_ino(s));
1da177e4
LT
2697 }
2698
2699 return 0;
2700}
2701
56b3d975 2702static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2703 .start = packet_seq_start,
2704 .next = packet_seq_next,
2705 .stop = packet_seq_stop,
2706 .show = packet_seq_show,
2707};
2708
2709static int packet_seq_open(struct inode *inode, struct file *file)
2710{
e372c414
DL
2711 return seq_open_net(inode, file, &packet_seq_ops,
2712 sizeof(struct seq_net_private));
1da177e4
LT
2713}
2714
da7071d7 2715static const struct file_operations packet_seq_fops = {
1da177e4
LT
2716 .owner = THIS_MODULE,
2717 .open = packet_seq_open,
2718 .read = seq_read,
2719 .llseek = seq_lseek,
e372c414 2720 .release = seq_release_net,
1da177e4
LT
2721};
2722
2723#endif
2724
2c8c1e72 2725static int __net_init packet_net_init(struct net *net)
d12d01d6 2726{
808f5114 2727 spin_lock_init(&net->packet.sklist_lock);
2aaef4e4 2728 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2729
2730 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2731 return -ENOMEM;
2732
2733 return 0;
2734}
2735
2c8c1e72 2736static void __net_exit packet_net_exit(struct net *net)
d12d01d6
DL
2737{
2738 proc_net_remove(net, "packet");
2739}
2740
2741static struct pernet_operations packet_net_ops = {
2742 .init = packet_net_init,
2743 .exit = packet_net_exit,
2744};
2745
2746
1da177e4
LT
2747static void __exit packet_exit(void)
2748{
1da177e4 2749 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2750 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2751 sock_unregister(PF_PACKET);
2752 proto_unregister(&packet_proto);
2753}
2754
2755static int __init packet_init(void)
2756{
2757 int rc = proto_register(&packet_proto, 0);
2758
2759 if (rc != 0)
2760 goto out;
2761
2762 sock_register(&packet_family_ops);
d12d01d6 2763 register_pernet_subsys(&packet_net_ops);
1da177e4 2764 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2765out:
2766 return rc;
2767}
2768
2769module_init(packet_init);
2770module_exit(packet_exit);
2771MODULE_LICENSE("GPL");
2772MODULE_ALIAS_NETPROTO(PF_PACKET);
This page took 0.832421 seconds and 5 git commands to generate.