Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec...
[deliverable/linux.git] / net / netlink / af_netlink.c
1 /*
2 * NETLINK Kernel-user communication protocol.
3 *
4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 * Patrick McHardy <kaber@trash.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
14 * added netlink_proto_exit
15 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
16 * use nlk_sk, as sk->protinfo is on a diet 8)
17 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
18 * - inc module use count of module that owns
19 * the kernel socket in case userspace opens
20 * socket of same protocol
21 * - remove all module support, since netlink is
22 * mandatory if CONFIG_NET=y these days
23 */
24
25 #include <linux/module.h>
26
27 #include <linux/capability.h>
28 #include <linux/kernel.h>
29 #include <linux/init.h>
30 #include <linux/signal.h>
31 #include <linux/sched.h>
32 #include <linux/errno.h>
33 #include <linux/string.h>
34 #include <linux/stat.h>
35 #include <linux/socket.h>
36 #include <linux/un.h>
37 #include <linux/fcntl.h>
38 #include <linux/termios.h>
39 #include <linux/sockios.h>
40 #include <linux/net.h>
41 #include <linux/fs.h>
42 #include <linux/slab.h>
43 #include <asm/uaccess.h>
44 #include <linux/skbuff.h>
45 #include <linux/netdevice.h>
46 #include <linux/rtnetlink.h>
47 #include <linux/proc_fs.h>
48 #include <linux/seq_file.h>
49 #include <linux/notifier.h>
50 #include <linux/security.h>
51 #include <linux/jhash.h>
52 #include <linux/jiffies.h>
53 #include <linux/random.h>
54 #include <linux/bitops.h>
55 #include <linux/mm.h>
56 #include <linux/types.h>
57 #include <linux/audit.h>
58 #include <linux/mutex.h>
59 #include <linux/vmalloc.h>
60 #include <linux/if_arp.h>
61 #include <asm/cacheflush.h>
62
63 #include <net/net_namespace.h>
64 #include <net/sock.h>
65 #include <net/scm.h>
66 #include <net/netlink.h>
67
68 #include "af_netlink.h"
69
70 struct listeners {
71 struct rcu_head rcu;
72 unsigned long masks[0];
73 };
74
75 /* state bits */
76 #define NETLINK_CONGESTED 0x0
77
78 /* flags */
79 #define NETLINK_KERNEL_SOCKET 0x1
80 #define NETLINK_RECV_PKTINFO 0x2
81 #define NETLINK_BROADCAST_SEND_ERROR 0x4
82 #define NETLINK_RECV_NO_ENOBUFS 0x8
83
84 static inline int netlink_is_kernel(struct sock *sk)
85 {
86 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
87 }
88
89 struct netlink_table *nl_table;
90 EXPORT_SYMBOL_GPL(nl_table);
91
92 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
93
94 static int netlink_dump(struct sock *sk);
95 static void netlink_skb_destructor(struct sk_buff *skb);
96
97 DEFINE_RWLOCK(nl_table_lock);
98 EXPORT_SYMBOL_GPL(nl_table_lock);
99 static atomic_t nl_table_users = ATOMIC_INIT(0);
100
101 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
102
103 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
104
105 static DEFINE_SPINLOCK(netlink_tap_lock);
106 static struct list_head netlink_tap_all __read_mostly;
107
108 static inline u32 netlink_group_mask(u32 group)
109 {
110 return group ? 1 << (group - 1) : 0;
111 }
112
113 static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
114 {
115 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
116 }
117
118 int netlink_add_tap(struct netlink_tap *nt)
119 {
120 if (unlikely(nt->dev->type != ARPHRD_NETLINK))
121 return -EINVAL;
122
123 spin_lock(&netlink_tap_lock);
124 list_add_rcu(&nt->list, &netlink_tap_all);
125 spin_unlock(&netlink_tap_lock);
126
127 if (nt->module)
128 __module_get(nt->module);
129
130 return 0;
131 }
132 EXPORT_SYMBOL_GPL(netlink_add_tap);
133
134 static int __netlink_remove_tap(struct netlink_tap *nt)
135 {
136 bool found = false;
137 struct netlink_tap *tmp;
138
139 spin_lock(&netlink_tap_lock);
140
141 list_for_each_entry(tmp, &netlink_tap_all, list) {
142 if (nt == tmp) {
143 list_del_rcu(&nt->list);
144 found = true;
145 goto out;
146 }
147 }
148
149 pr_warn("__netlink_remove_tap: %p not found\n", nt);
150 out:
151 spin_unlock(&netlink_tap_lock);
152
153 if (found && nt->module)
154 module_put(nt->module);
155
156 return found ? 0 : -ENODEV;
157 }
158
159 int netlink_remove_tap(struct netlink_tap *nt)
160 {
161 int ret;
162
163 ret = __netlink_remove_tap(nt);
164 synchronize_net();
165
166 return ret;
167 }
168 EXPORT_SYMBOL_GPL(netlink_remove_tap);
169
170 static bool netlink_filter_tap(const struct sk_buff *skb)
171 {
172 struct sock *sk = skb->sk;
173 bool pass = false;
174
175 /* We take the more conservative approach and
176 * whitelist socket protocols that may pass.
177 */
178 switch (sk->sk_protocol) {
179 case NETLINK_ROUTE:
180 case NETLINK_USERSOCK:
181 case NETLINK_SOCK_DIAG:
182 case NETLINK_NFLOG:
183 case NETLINK_XFRM:
184 case NETLINK_FIB_LOOKUP:
185 case NETLINK_NETFILTER:
186 case NETLINK_GENERIC:
187 pass = true;
188 break;
189 }
190
191 return pass;
192 }
193
194 static int __netlink_deliver_tap_skb(struct sk_buff *skb,
195 struct net_device *dev)
196 {
197 struct sk_buff *nskb;
198 struct sock *sk = skb->sk;
199 int ret = -ENOMEM;
200
201 dev_hold(dev);
202 nskb = skb_clone(skb, GFP_ATOMIC);
203 if (nskb) {
204 nskb->dev = dev;
205 nskb->protocol = htons((u16) sk->sk_protocol);
206 nskb->pkt_type = netlink_is_kernel(sk) ?
207 PACKET_KERNEL : PACKET_USER;
208
209 ret = dev_queue_xmit(nskb);
210 if (unlikely(ret > 0))
211 ret = net_xmit_errno(ret);
212 }
213
214 dev_put(dev);
215 return ret;
216 }
217
218 static void __netlink_deliver_tap(struct sk_buff *skb)
219 {
220 int ret;
221 struct netlink_tap *tmp;
222
223 if (!netlink_filter_tap(skb))
224 return;
225
226 list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
227 ret = __netlink_deliver_tap_skb(skb, tmp->dev);
228 if (unlikely(ret))
229 break;
230 }
231 }
232
233 static void netlink_deliver_tap(struct sk_buff *skb)
234 {
235 rcu_read_lock();
236
237 if (unlikely(!list_empty(&netlink_tap_all)))
238 __netlink_deliver_tap(skb);
239
240 rcu_read_unlock();
241 }
242
243 static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
244 struct sk_buff *skb)
245 {
246 if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
247 netlink_deliver_tap(skb);
248 }
249
250 static void netlink_overrun(struct sock *sk)
251 {
252 struct netlink_sock *nlk = nlk_sk(sk);
253
254 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
255 if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
256 sk->sk_err = ENOBUFS;
257 sk->sk_error_report(sk);
258 }
259 }
260 atomic_inc(&sk->sk_drops);
261 }
262
263 static void netlink_rcv_wake(struct sock *sk)
264 {
265 struct netlink_sock *nlk = nlk_sk(sk);
266
267 if (skb_queue_empty(&sk->sk_receive_queue))
268 clear_bit(NETLINK_CONGESTED, &nlk->state);
269 if (!test_bit(NETLINK_CONGESTED, &nlk->state))
270 wake_up_interruptible(&nlk->wait);
271 }
272
273 #ifdef CONFIG_NETLINK_MMAP
274 static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
275 {
276 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
277 }
278
279 static bool netlink_rx_is_mmaped(struct sock *sk)
280 {
281 return nlk_sk(sk)->rx_ring.pg_vec != NULL;
282 }
283
284 static bool netlink_tx_is_mmaped(struct sock *sk)
285 {
286 return nlk_sk(sk)->tx_ring.pg_vec != NULL;
287 }
288
289 static __pure struct page *pgvec_to_page(const void *addr)
290 {
291 if (is_vmalloc_addr(addr))
292 return vmalloc_to_page(addr);
293 else
294 return virt_to_page(addr);
295 }
296
297 static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
298 {
299 unsigned int i;
300
301 for (i = 0; i < len; i++) {
302 if (pg_vec[i] != NULL) {
303 if (is_vmalloc_addr(pg_vec[i]))
304 vfree(pg_vec[i]);
305 else
306 free_pages((unsigned long)pg_vec[i], order);
307 }
308 }
309 kfree(pg_vec);
310 }
311
312 static void *alloc_one_pg_vec_page(unsigned long order)
313 {
314 void *buffer;
315 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
316 __GFP_NOWARN | __GFP_NORETRY;
317
318 buffer = (void *)__get_free_pages(gfp_flags, order);
319 if (buffer != NULL)
320 return buffer;
321
322 buffer = vzalloc((1 << order) * PAGE_SIZE);
323 if (buffer != NULL)
324 return buffer;
325
326 gfp_flags &= ~__GFP_NORETRY;
327 return (void *)__get_free_pages(gfp_flags, order);
328 }
329
330 static void **alloc_pg_vec(struct netlink_sock *nlk,
331 struct nl_mmap_req *req, unsigned int order)
332 {
333 unsigned int block_nr = req->nm_block_nr;
334 unsigned int i;
335 void **pg_vec;
336
337 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
338 if (pg_vec == NULL)
339 return NULL;
340
341 for (i = 0; i < block_nr; i++) {
342 pg_vec[i] = alloc_one_pg_vec_page(order);
343 if (pg_vec[i] == NULL)
344 goto err1;
345 }
346
347 return pg_vec;
348 err1:
349 free_pg_vec(pg_vec, order, block_nr);
350 return NULL;
351 }
352
353 static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
354 bool closing, bool tx_ring)
355 {
356 struct netlink_sock *nlk = nlk_sk(sk);
357 struct netlink_ring *ring;
358 struct sk_buff_head *queue;
359 void **pg_vec = NULL;
360 unsigned int order = 0;
361 int err;
362
363 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
364 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
365
366 if (!closing) {
367 if (atomic_read(&nlk->mapped))
368 return -EBUSY;
369 if (atomic_read(&ring->pending))
370 return -EBUSY;
371 }
372
373 if (req->nm_block_nr) {
374 if (ring->pg_vec != NULL)
375 return -EBUSY;
376
377 if ((int)req->nm_block_size <= 0)
378 return -EINVAL;
379 if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
380 return -EINVAL;
381 if (req->nm_frame_size < NL_MMAP_HDRLEN)
382 return -EINVAL;
383 if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
384 return -EINVAL;
385
386 ring->frames_per_block = req->nm_block_size /
387 req->nm_frame_size;
388 if (ring->frames_per_block == 0)
389 return -EINVAL;
390 if (ring->frames_per_block * req->nm_block_nr !=
391 req->nm_frame_nr)
392 return -EINVAL;
393
394 order = get_order(req->nm_block_size);
395 pg_vec = alloc_pg_vec(nlk, req, order);
396 if (pg_vec == NULL)
397 return -ENOMEM;
398 } else {
399 if (req->nm_frame_nr)
400 return -EINVAL;
401 }
402
403 err = -EBUSY;
404 mutex_lock(&nlk->pg_vec_lock);
405 if (closing || atomic_read(&nlk->mapped) == 0) {
406 err = 0;
407 spin_lock_bh(&queue->lock);
408
409 ring->frame_max = req->nm_frame_nr - 1;
410 ring->head = 0;
411 ring->frame_size = req->nm_frame_size;
412 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
413
414 swap(ring->pg_vec_len, req->nm_block_nr);
415 swap(ring->pg_vec_order, order);
416 swap(ring->pg_vec, pg_vec);
417
418 __skb_queue_purge(queue);
419 spin_unlock_bh(&queue->lock);
420
421 WARN_ON(atomic_read(&nlk->mapped));
422 }
423 mutex_unlock(&nlk->pg_vec_lock);
424
425 if (pg_vec)
426 free_pg_vec(pg_vec, order, req->nm_block_nr);
427 return err;
428 }
429
430 static void netlink_mm_open(struct vm_area_struct *vma)
431 {
432 struct file *file = vma->vm_file;
433 struct socket *sock = file->private_data;
434 struct sock *sk = sock->sk;
435
436 if (sk)
437 atomic_inc(&nlk_sk(sk)->mapped);
438 }
439
440 static void netlink_mm_close(struct vm_area_struct *vma)
441 {
442 struct file *file = vma->vm_file;
443 struct socket *sock = file->private_data;
444 struct sock *sk = sock->sk;
445
446 if (sk)
447 atomic_dec(&nlk_sk(sk)->mapped);
448 }
449
450 static const struct vm_operations_struct netlink_mmap_ops = {
451 .open = netlink_mm_open,
452 .close = netlink_mm_close,
453 };
454
455 static int netlink_mmap(struct file *file, struct socket *sock,
456 struct vm_area_struct *vma)
457 {
458 struct sock *sk = sock->sk;
459 struct netlink_sock *nlk = nlk_sk(sk);
460 struct netlink_ring *ring;
461 unsigned long start, size, expected;
462 unsigned int i;
463 int err = -EINVAL;
464
465 if (vma->vm_pgoff)
466 return -EINVAL;
467
468 mutex_lock(&nlk->pg_vec_lock);
469
470 expected = 0;
471 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
472 if (ring->pg_vec == NULL)
473 continue;
474 expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
475 }
476
477 if (expected == 0)
478 goto out;
479
480 size = vma->vm_end - vma->vm_start;
481 if (size != expected)
482 goto out;
483
484 start = vma->vm_start;
485 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
486 if (ring->pg_vec == NULL)
487 continue;
488
489 for (i = 0; i < ring->pg_vec_len; i++) {
490 struct page *page;
491 void *kaddr = ring->pg_vec[i];
492 unsigned int pg_num;
493
494 for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
495 page = pgvec_to_page(kaddr);
496 err = vm_insert_page(vma, start, page);
497 if (err < 0)
498 goto out;
499 start += PAGE_SIZE;
500 kaddr += PAGE_SIZE;
501 }
502 }
503 }
504
505 atomic_inc(&nlk->mapped);
506 vma->vm_ops = &netlink_mmap_ops;
507 err = 0;
508 out:
509 mutex_unlock(&nlk->pg_vec_lock);
510 return err;
511 }
512
513 static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
514 {
515 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
516 struct page *p_start, *p_end;
517
518 /* First page is flushed through netlink_{get,set}_status */
519 p_start = pgvec_to_page(hdr + PAGE_SIZE);
520 p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
521 while (p_start <= p_end) {
522 flush_dcache_page(p_start);
523 p_start++;
524 }
525 #endif
526 }
527
528 static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
529 {
530 smp_rmb();
531 flush_dcache_page(pgvec_to_page(hdr));
532 return hdr->nm_status;
533 }
534
535 static void netlink_set_status(struct nl_mmap_hdr *hdr,
536 enum nl_mmap_status status)
537 {
538 hdr->nm_status = status;
539 flush_dcache_page(pgvec_to_page(hdr));
540 smp_wmb();
541 }
542
543 static struct nl_mmap_hdr *
544 __netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
545 {
546 unsigned int pg_vec_pos, frame_off;
547
548 pg_vec_pos = pos / ring->frames_per_block;
549 frame_off = pos % ring->frames_per_block;
550
551 return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
552 }
553
554 static struct nl_mmap_hdr *
555 netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
556 enum nl_mmap_status status)
557 {
558 struct nl_mmap_hdr *hdr;
559
560 hdr = __netlink_lookup_frame(ring, pos);
561 if (netlink_get_status(hdr) != status)
562 return NULL;
563
564 return hdr;
565 }
566
567 static struct nl_mmap_hdr *
568 netlink_current_frame(const struct netlink_ring *ring,
569 enum nl_mmap_status status)
570 {
571 return netlink_lookup_frame(ring, ring->head, status);
572 }
573
574 static struct nl_mmap_hdr *
575 netlink_previous_frame(const struct netlink_ring *ring,
576 enum nl_mmap_status status)
577 {
578 unsigned int prev;
579
580 prev = ring->head ? ring->head - 1 : ring->frame_max;
581 return netlink_lookup_frame(ring, prev, status);
582 }
583
584 static void netlink_increment_head(struct netlink_ring *ring)
585 {
586 ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
587 }
588
589 static void netlink_forward_ring(struct netlink_ring *ring)
590 {
591 unsigned int head = ring->head, pos = head;
592 const struct nl_mmap_hdr *hdr;
593
594 do {
595 hdr = __netlink_lookup_frame(ring, pos);
596 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
597 break;
598 if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
599 break;
600 netlink_increment_head(ring);
601 } while (ring->head != head);
602 }
603
604 static bool netlink_dump_space(struct netlink_sock *nlk)
605 {
606 struct netlink_ring *ring = &nlk->rx_ring;
607 struct nl_mmap_hdr *hdr;
608 unsigned int n;
609
610 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
611 if (hdr == NULL)
612 return false;
613
614 n = ring->head + ring->frame_max / 2;
615 if (n > ring->frame_max)
616 n -= ring->frame_max;
617
618 hdr = __netlink_lookup_frame(ring, n);
619
620 return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
621 }
622
623 static unsigned int netlink_poll(struct file *file, struct socket *sock,
624 poll_table *wait)
625 {
626 struct sock *sk = sock->sk;
627 struct netlink_sock *nlk = nlk_sk(sk);
628 unsigned int mask;
629 int err;
630
631 if (nlk->rx_ring.pg_vec != NULL) {
632 /* Memory mapped sockets don't call recvmsg(), so flow control
633 * for dumps is performed here. A dump is allowed to continue
634 * if at least half the ring is unused.
635 */
636 while (nlk->cb_running && netlink_dump_space(nlk)) {
637 err = netlink_dump(sk);
638 if (err < 0) {
639 sk->sk_err = err;
640 sk->sk_error_report(sk);
641 break;
642 }
643 }
644 netlink_rcv_wake(sk);
645 }
646
647 mask = datagram_poll(file, sock, wait);
648
649 spin_lock_bh(&sk->sk_receive_queue.lock);
650 if (nlk->rx_ring.pg_vec) {
651 netlink_forward_ring(&nlk->rx_ring);
652 if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
653 mask |= POLLIN | POLLRDNORM;
654 }
655 spin_unlock_bh(&sk->sk_receive_queue.lock);
656
657 spin_lock_bh(&sk->sk_write_queue.lock);
658 if (nlk->tx_ring.pg_vec) {
659 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
660 mask |= POLLOUT | POLLWRNORM;
661 }
662 spin_unlock_bh(&sk->sk_write_queue.lock);
663
664 return mask;
665 }
666
667 static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
668 {
669 return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
670 }
671
672 static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
673 struct netlink_ring *ring,
674 struct nl_mmap_hdr *hdr)
675 {
676 unsigned int size;
677 void *data;
678
679 size = ring->frame_size - NL_MMAP_HDRLEN;
680 data = (void *)hdr + NL_MMAP_HDRLEN;
681
682 skb->head = data;
683 skb->data = data;
684 skb_reset_tail_pointer(skb);
685 skb->end = skb->tail + size;
686 skb->len = 0;
687
688 skb->destructor = netlink_skb_destructor;
689 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
690 NETLINK_CB(skb).sk = sk;
691 }
692
693 static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
694 u32 dst_portid, u32 dst_group,
695 struct sock_iocb *siocb)
696 {
697 struct netlink_sock *nlk = nlk_sk(sk);
698 struct netlink_ring *ring;
699 struct nl_mmap_hdr *hdr;
700 struct sk_buff *skb;
701 unsigned int maxlen;
702 bool excl = true;
703 int err = 0, len = 0;
704
705 /* Netlink messages are validated by the receiver before processing.
706 * In order to avoid userspace changing the contents of the message
707 * after validation, the socket and the ring may only be used by a
708 * single process, otherwise we fall back to copying.
709 */
710 if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
711 atomic_read(&nlk->mapped) > 1)
712 excl = false;
713
714 mutex_lock(&nlk->pg_vec_lock);
715
716 ring = &nlk->tx_ring;
717 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
718
719 do {
720 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
721 if (hdr == NULL) {
722 if (!(msg->msg_flags & MSG_DONTWAIT) &&
723 atomic_read(&nlk->tx_ring.pending))
724 schedule();
725 continue;
726 }
727 if (hdr->nm_len > maxlen) {
728 err = -EINVAL;
729 goto out;
730 }
731
732 netlink_frame_flush_dcache(hdr);
733
734 if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
735 skb = alloc_skb_head(GFP_KERNEL);
736 if (skb == NULL) {
737 err = -ENOBUFS;
738 goto out;
739 }
740 sock_hold(sk);
741 netlink_ring_setup_skb(skb, sk, ring, hdr);
742 NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
743 __skb_put(skb, hdr->nm_len);
744 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
745 atomic_inc(&ring->pending);
746 } else {
747 skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
748 if (skb == NULL) {
749 err = -ENOBUFS;
750 goto out;
751 }
752 __skb_put(skb, hdr->nm_len);
753 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
754 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
755 }
756
757 netlink_increment_head(ring);
758
759 NETLINK_CB(skb).portid = nlk->portid;
760 NETLINK_CB(skb).dst_group = dst_group;
761 NETLINK_CB(skb).creds = siocb->scm->creds;
762
763 err = security_netlink_send(sk, skb);
764 if (err) {
765 kfree_skb(skb);
766 goto out;
767 }
768
769 if (unlikely(dst_group)) {
770 atomic_inc(&skb->users);
771 netlink_broadcast(sk, skb, dst_portid, dst_group,
772 GFP_KERNEL);
773 }
774 err = netlink_unicast(sk, skb, dst_portid,
775 msg->msg_flags & MSG_DONTWAIT);
776 if (err < 0)
777 goto out;
778 len += err;
779
780 } while (hdr != NULL ||
781 (!(msg->msg_flags & MSG_DONTWAIT) &&
782 atomic_read(&nlk->tx_ring.pending)));
783
784 if (len > 0)
785 err = len;
786 out:
787 mutex_unlock(&nlk->pg_vec_lock);
788 return err;
789 }
790
791 static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
792 {
793 struct nl_mmap_hdr *hdr;
794
795 hdr = netlink_mmap_hdr(skb);
796 hdr->nm_len = skb->len;
797 hdr->nm_group = NETLINK_CB(skb).dst_group;
798 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
799 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
800 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
801 netlink_frame_flush_dcache(hdr);
802 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
803
804 NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
805 kfree_skb(skb);
806 }
807
808 static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
809 {
810 struct netlink_sock *nlk = nlk_sk(sk);
811 struct netlink_ring *ring = &nlk->rx_ring;
812 struct nl_mmap_hdr *hdr;
813
814 spin_lock_bh(&sk->sk_receive_queue.lock);
815 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
816 if (hdr == NULL) {
817 spin_unlock_bh(&sk->sk_receive_queue.lock);
818 kfree_skb(skb);
819 netlink_overrun(sk);
820 return;
821 }
822 netlink_increment_head(ring);
823 __skb_queue_tail(&sk->sk_receive_queue, skb);
824 spin_unlock_bh(&sk->sk_receive_queue.lock);
825
826 hdr->nm_len = skb->len;
827 hdr->nm_group = NETLINK_CB(skb).dst_group;
828 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
829 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
830 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
831 netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
832 }
833
834 #else /* CONFIG_NETLINK_MMAP */
835 #define netlink_skb_is_mmaped(skb) false
836 #define netlink_rx_is_mmaped(sk) false
837 #define netlink_tx_is_mmaped(sk) false
838 #define netlink_mmap sock_no_mmap
839 #define netlink_poll datagram_poll
840 #define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
841 #endif /* CONFIG_NETLINK_MMAP */
842
843 static void netlink_skb_destructor(struct sk_buff *skb)
844 {
845 #ifdef CONFIG_NETLINK_MMAP
846 struct nl_mmap_hdr *hdr;
847 struct netlink_ring *ring;
848 struct sock *sk;
849
850 /* If a packet from the kernel to userspace was freed because of an
851 * error without being delivered to userspace, the kernel must reset
852 * the status. In the direction userspace to kernel, the status is
853 * always reset here after the packet was processed and freed.
854 */
855 if (netlink_skb_is_mmaped(skb)) {
856 hdr = netlink_mmap_hdr(skb);
857 sk = NETLINK_CB(skb).sk;
858
859 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
860 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
861 ring = &nlk_sk(sk)->tx_ring;
862 } else {
863 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
864 hdr->nm_len = 0;
865 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
866 }
867 ring = &nlk_sk(sk)->rx_ring;
868 }
869
870 WARN_ON(atomic_read(&ring->pending) == 0);
871 atomic_dec(&ring->pending);
872 sock_put(sk);
873
874 skb->head = NULL;
875 }
876 #endif
877 if (is_vmalloc_addr(skb->head)) {
878 if (!skb->cloned ||
879 !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
880 vfree(skb->head);
881
882 skb->head = NULL;
883 }
884 if (skb->sk != NULL)
885 sock_rfree(skb);
886 }
887
888 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
889 {
890 WARN_ON(skb->sk != NULL);
891 skb->sk = sk;
892 skb->destructor = netlink_skb_destructor;
893 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
894 sk_mem_charge(sk, skb->truesize);
895 }
896
897 static void netlink_sock_destruct(struct sock *sk)
898 {
899 struct netlink_sock *nlk = nlk_sk(sk);
900
901 if (nlk->cb_running) {
902 if (nlk->cb.done)
903 nlk->cb.done(&nlk->cb);
904
905 module_put(nlk->cb.module);
906 kfree_skb(nlk->cb.skb);
907 }
908
909 skb_queue_purge(&sk->sk_receive_queue);
910 #ifdef CONFIG_NETLINK_MMAP
911 if (1) {
912 struct nl_mmap_req req;
913
914 memset(&req, 0, sizeof(req));
915 if (nlk->rx_ring.pg_vec)
916 netlink_set_ring(sk, &req, true, false);
917 memset(&req, 0, sizeof(req));
918 if (nlk->tx_ring.pg_vec)
919 netlink_set_ring(sk, &req, true, true);
920 }
921 #endif /* CONFIG_NETLINK_MMAP */
922
923 if (!sock_flag(sk, SOCK_DEAD)) {
924 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
925 return;
926 }
927
928 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
929 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
930 WARN_ON(nlk_sk(sk)->groups);
931 }
932
933 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
934 * SMP. Look, when several writers sleep and reader wakes them up, all but one
935 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
936 * this, _but_ remember, it adds useless work on UP machines.
937 */
938
939 void netlink_table_grab(void)
940 __acquires(nl_table_lock)
941 {
942 might_sleep();
943
944 write_lock_irq(&nl_table_lock);
945
946 if (atomic_read(&nl_table_users)) {
947 DECLARE_WAITQUEUE(wait, current);
948
949 add_wait_queue_exclusive(&nl_table_wait, &wait);
950 for (;;) {
951 set_current_state(TASK_UNINTERRUPTIBLE);
952 if (atomic_read(&nl_table_users) == 0)
953 break;
954 write_unlock_irq(&nl_table_lock);
955 schedule();
956 write_lock_irq(&nl_table_lock);
957 }
958
959 __set_current_state(TASK_RUNNING);
960 remove_wait_queue(&nl_table_wait, &wait);
961 }
962 }
963
964 void netlink_table_ungrab(void)
965 __releases(nl_table_lock)
966 {
967 write_unlock_irq(&nl_table_lock);
968 wake_up(&nl_table_wait);
969 }
970
971 static inline void
972 netlink_lock_table(void)
973 {
974 /* read_lock() synchronizes us to netlink_table_grab */
975
976 read_lock(&nl_table_lock);
977 atomic_inc(&nl_table_users);
978 read_unlock(&nl_table_lock);
979 }
980
981 static inline void
982 netlink_unlock_table(void)
983 {
984 if (atomic_dec_and_test(&nl_table_users))
985 wake_up(&nl_table_wait);
986 }
987
988 static bool netlink_compare(struct net *net, struct sock *sk)
989 {
990 return net_eq(sock_net(sk), net);
991 }
992
993 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
994 {
995 struct netlink_table *table = &nl_table[protocol];
996 struct nl_portid_hash *hash = &table->hash;
997 struct hlist_head *head;
998 struct sock *sk;
999
1000 read_lock(&nl_table_lock);
1001 head = nl_portid_hashfn(hash, portid);
1002 sk_for_each(sk, head) {
1003 if (table->compare(net, sk) &&
1004 (nlk_sk(sk)->portid == portid)) {
1005 sock_hold(sk);
1006 goto found;
1007 }
1008 }
1009 sk = NULL;
1010 found:
1011 read_unlock(&nl_table_lock);
1012 return sk;
1013 }
1014
1015 static struct hlist_head *nl_portid_hash_zalloc(size_t size)
1016 {
1017 if (size <= PAGE_SIZE)
1018 return kzalloc(size, GFP_ATOMIC);
1019 else
1020 return (struct hlist_head *)
1021 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
1022 get_order(size));
1023 }
1024
1025 static void nl_portid_hash_free(struct hlist_head *table, size_t size)
1026 {
1027 if (size <= PAGE_SIZE)
1028 kfree(table);
1029 else
1030 free_pages((unsigned long)table, get_order(size));
1031 }
1032
1033 static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
1034 {
1035 unsigned int omask, mask, shift;
1036 size_t osize, size;
1037 struct hlist_head *otable, *table;
1038 int i;
1039
1040 omask = mask = hash->mask;
1041 osize = size = (mask + 1) * sizeof(*table);
1042 shift = hash->shift;
1043
1044 if (grow) {
1045 if (++shift > hash->max_shift)
1046 return 0;
1047 mask = mask * 2 + 1;
1048 size *= 2;
1049 }
1050
1051 table = nl_portid_hash_zalloc(size);
1052 if (!table)
1053 return 0;
1054
1055 otable = hash->table;
1056 hash->table = table;
1057 hash->mask = mask;
1058 hash->shift = shift;
1059 get_random_bytes(&hash->rnd, sizeof(hash->rnd));
1060
1061 for (i = 0; i <= omask; i++) {
1062 struct sock *sk;
1063 struct hlist_node *tmp;
1064
1065 sk_for_each_safe(sk, tmp, &otable[i])
1066 __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
1067 }
1068
1069 nl_portid_hash_free(otable, osize);
1070 hash->rehash_time = jiffies + 10 * 60 * HZ;
1071 return 1;
1072 }
1073
1074 static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
1075 {
1076 int avg = hash->entries >> hash->shift;
1077
1078 if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
1079 return 1;
1080
1081 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
1082 nl_portid_hash_rehash(hash, 0);
1083 return 1;
1084 }
1085
1086 return 0;
1087 }
1088
1089 static const struct proto_ops netlink_ops;
1090
1091 static void
1092 netlink_update_listeners(struct sock *sk)
1093 {
1094 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1095 unsigned long mask;
1096 unsigned int i;
1097 struct listeners *listeners;
1098
1099 listeners = nl_deref_protected(tbl->listeners);
1100 if (!listeners)
1101 return;
1102
1103 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
1104 mask = 0;
1105 sk_for_each_bound(sk, &tbl->mc_list) {
1106 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
1107 mask |= nlk_sk(sk)->groups[i];
1108 }
1109 listeners->masks[i] = mask;
1110 }
1111 /* this function is only called with the netlink table "grabbed", which
1112 * makes sure updates are visible before bind or setsockopt return. */
1113 }
1114
1115 static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
1116 {
1117 struct netlink_table *table = &nl_table[sk->sk_protocol];
1118 struct nl_portid_hash *hash = &table->hash;
1119 struct hlist_head *head;
1120 int err = -EADDRINUSE;
1121 struct sock *osk;
1122 int len;
1123
1124 netlink_table_grab();
1125 head = nl_portid_hashfn(hash, portid);
1126 len = 0;
1127 sk_for_each(osk, head) {
1128 if (table->compare(net, osk) &&
1129 (nlk_sk(osk)->portid == portid))
1130 break;
1131 len++;
1132 }
1133 if (osk)
1134 goto err;
1135
1136 err = -EBUSY;
1137 if (nlk_sk(sk)->portid)
1138 goto err;
1139
1140 err = -ENOMEM;
1141 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
1142 goto err;
1143
1144 if (len && nl_portid_hash_dilute(hash, len))
1145 head = nl_portid_hashfn(hash, portid);
1146 hash->entries++;
1147 nlk_sk(sk)->portid = portid;
1148 sk_add_node(sk, head);
1149 err = 0;
1150
1151 err:
1152 netlink_table_ungrab();
1153 return err;
1154 }
1155
1156 static void netlink_remove(struct sock *sk)
1157 {
1158 netlink_table_grab();
1159 if (sk_del_node_init(sk))
1160 nl_table[sk->sk_protocol].hash.entries--;
1161 if (nlk_sk(sk)->subscriptions)
1162 __sk_del_bind_node(sk);
1163 netlink_table_ungrab();
1164 }
1165
1166 static struct proto netlink_proto = {
1167 .name = "NETLINK",
1168 .owner = THIS_MODULE,
1169 .obj_size = sizeof(struct netlink_sock),
1170 };
1171
1172 static int __netlink_create(struct net *net, struct socket *sock,
1173 struct mutex *cb_mutex, int protocol)
1174 {
1175 struct sock *sk;
1176 struct netlink_sock *nlk;
1177
1178 sock->ops = &netlink_ops;
1179
1180 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
1181 if (!sk)
1182 return -ENOMEM;
1183
1184 sock_init_data(sock, sk);
1185
1186 nlk = nlk_sk(sk);
1187 if (cb_mutex) {
1188 nlk->cb_mutex = cb_mutex;
1189 } else {
1190 nlk->cb_mutex = &nlk->cb_def_mutex;
1191 mutex_init(nlk->cb_mutex);
1192 }
1193 init_waitqueue_head(&nlk->wait);
1194 #ifdef CONFIG_NETLINK_MMAP
1195 mutex_init(&nlk->pg_vec_lock);
1196 #endif
1197
1198 sk->sk_destruct = netlink_sock_destruct;
1199 sk->sk_protocol = protocol;
1200 return 0;
1201 }
1202
1203 static int netlink_create(struct net *net, struct socket *sock, int protocol,
1204 int kern)
1205 {
1206 struct module *module = NULL;
1207 struct mutex *cb_mutex;
1208 struct netlink_sock *nlk;
1209 int (*bind)(int group);
1210 void (*unbind)(int group);
1211 int err = 0;
1212
1213 sock->state = SS_UNCONNECTED;
1214
1215 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
1216 return -ESOCKTNOSUPPORT;
1217
1218 if (protocol < 0 || protocol >= MAX_LINKS)
1219 return -EPROTONOSUPPORT;
1220
1221 netlink_lock_table();
1222 #ifdef CONFIG_MODULES
1223 if (!nl_table[protocol].registered) {
1224 netlink_unlock_table();
1225 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
1226 netlink_lock_table();
1227 }
1228 #endif
1229 if (nl_table[protocol].registered &&
1230 try_module_get(nl_table[protocol].module))
1231 module = nl_table[protocol].module;
1232 else
1233 err = -EPROTONOSUPPORT;
1234 cb_mutex = nl_table[protocol].cb_mutex;
1235 bind = nl_table[protocol].bind;
1236 unbind = nl_table[protocol].unbind;
1237 netlink_unlock_table();
1238
1239 if (err < 0)
1240 goto out;
1241
1242 err = __netlink_create(net, sock, cb_mutex, protocol);
1243 if (err < 0)
1244 goto out_module;
1245
1246 local_bh_disable();
1247 sock_prot_inuse_add(net, &netlink_proto, 1);
1248 local_bh_enable();
1249
1250 nlk = nlk_sk(sock->sk);
1251 nlk->module = module;
1252 nlk->netlink_bind = bind;
1253 nlk->netlink_unbind = unbind;
1254 out:
1255 return err;
1256
1257 out_module:
1258 module_put(module);
1259 goto out;
1260 }
1261
1262 static int netlink_release(struct socket *sock)
1263 {
1264 struct sock *sk = sock->sk;
1265 struct netlink_sock *nlk;
1266
1267 if (!sk)
1268 return 0;
1269
1270 netlink_remove(sk);
1271 sock_orphan(sk);
1272 nlk = nlk_sk(sk);
1273
1274 /*
1275 * OK. Socket is unlinked, any packets that arrive now
1276 * will be purged.
1277 */
1278
1279 sock->sk = NULL;
1280 wake_up_interruptible_all(&nlk->wait);
1281
1282 skb_queue_purge(&sk->sk_write_queue);
1283
1284 if (nlk->portid) {
1285 struct netlink_notify n = {
1286 .net = sock_net(sk),
1287 .protocol = sk->sk_protocol,
1288 .portid = nlk->portid,
1289 };
1290 atomic_notifier_call_chain(&netlink_chain,
1291 NETLINK_URELEASE, &n);
1292 }
1293
1294 module_put(nlk->module);
1295
1296 netlink_table_grab();
1297 if (netlink_is_kernel(sk)) {
1298 BUG_ON(nl_table[sk->sk_protocol].registered == 0);
1299 if (--nl_table[sk->sk_protocol].registered == 0) {
1300 struct listeners *old;
1301
1302 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
1303 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
1304 kfree_rcu(old, rcu);
1305 nl_table[sk->sk_protocol].module = NULL;
1306 nl_table[sk->sk_protocol].bind = NULL;
1307 nl_table[sk->sk_protocol].unbind = NULL;
1308 nl_table[sk->sk_protocol].flags = 0;
1309 nl_table[sk->sk_protocol].registered = 0;
1310 }
1311 } else if (nlk->subscriptions) {
1312 netlink_update_listeners(sk);
1313 }
1314 netlink_table_ungrab();
1315
1316 kfree(nlk->groups);
1317 nlk->groups = NULL;
1318
1319 local_bh_disable();
1320 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
1321 local_bh_enable();
1322 sock_put(sk);
1323 return 0;
1324 }
1325
1326 static int netlink_autobind(struct socket *sock)
1327 {
1328 struct sock *sk = sock->sk;
1329 struct net *net = sock_net(sk);
1330 struct netlink_table *table = &nl_table[sk->sk_protocol];
1331 struct nl_portid_hash *hash = &table->hash;
1332 struct hlist_head *head;
1333 struct sock *osk;
1334 s32 portid = task_tgid_vnr(current);
1335 int err;
1336 static s32 rover = -4097;
1337
1338 retry:
1339 cond_resched();
1340 netlink_table_grab();
1341 head = nl_portid_hashfn(hash, portid);
1342 sk_for_each(osk, head) {
1343 if (!table->compare(net, osk))
1344 continue;
1345 if (nlk_sk(osk)->portid == portid) {
1346 /* Bind collision, search negative portid values. */
1347 portid = rover--;
1348 if (rover > -4097)
1349 rover = -4097;
1350 netlink_table_ungrab();
1351 goto retry;
1352 }
1353 }
1354 netlink_table_ungrab();
1355
1356 err = netlink_insert(sk, net, portid);
1357 if (err == -EADDRINUSE)
1358 goto retry;
1359
1360 /* If 2 threads race to autobind, that is fine. */
1361 if (err == -EBUSY)
1362 err = 0;
1363
1364 return err;
1365 }
1366
1367 /**
1368 * __netlink_ns_capable - General netlink message capability test
1369 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
1370 * @user_ns: The user namespace of the capability to use
1371 * @cap: The capability to use
1372 *
1373 * Test to see if the opener of the socket we received the message
1374 * from had when the netlink socket was created and the sender of the
1375 * message has has the capability @cap in the user namespace @user_ns.
1376 */
1377 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
1378 struct user_namespace *user_ns, int cap)
1379 {
1380 return sk_ns_capable(nsp->sk, user_ns, cap);
1381 }
1382 EXPORT_SYMBOL(__netlink_ns_capable);
1383
1384 /**
1385 * netlink_ns_capable - General netlink message capability test
1386 * @skb: socket buffer holding a netlink command from userspace
1387 * @user_ns: The user namespace of the capability to use
1388 * @cap: The capability to use
1389 *
1390 * Test to see if the opener of the socket we received the message
1391 * from had when the netlink socket was created and the sender of the
1392 * message has has the capability @cap in the user namespace @user_ns.
1393 */
1394 bool netlink_ns_capable(const struct sk_buff *skb,
1395 struct user_namespace *user_ns, int cap)
1396 {
1397 return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
1398 }
1399 EXPORT_SYMBOL(netlink_ns_capable);
1400
1401 /**
1402 * netlink_capable - Netlink global message capability test
1403 * @skb: socket buffer holding a netlink command from userspace
1404 * @cap: The capability to use
1405 *
1406 * Test to see if the opener of the socket we received the message
1407 * from had when the netlink socket was created and the sender of the
1408 * message has has the capability @cap in all user namespaces.
1409 */
1410 bool netlink_capable(const struct sk_buff *skb, int cap)
1411 {
1412 return netlink_ns_capable(skb, &init_user_ns, cap);
1413 }
1414 EXPORT_SYMBOL(netlink_capable);
1415
1416 /**
1417 * netlink_net_capable - Netlink network namespace message capability test
1418 * @skb: socket buffer holding a netlink command from userspace
1419 * @cap: The capability to use
1420 *
1421 * Test to see if the opener of the socket we received the message
1422 * from had when the netlink socket was created and the sender of the
1423 * message has has the capability @cap over the network namespace of
1424 * the socket we received the message from.
1425 */
1426 bool netlink_net_capable(const struct sk_buff *skb, int cap)
1427 {
1428 return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
1429 }
1430 EXPORT_SYMBOL(netlink_net_capable);
1431
1432 static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
1433 {
1434 return (nl_table[sock->sk->sk_protocol].flags & flag) ||
1435 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
1436 }
1437
1438 static void
1439 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
1440 {
1441 struct netlink_sock *nlk = nlk_sk(sk);
1442
1443 if (nlk->subscriptions && !subscriptions)
1444 __sk_del_bind_node(sk);
1445 else if (!nlk->subscriptions && subscriptions)
1446 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
1447 nlk->subscriptions = subscriptions;
1448 }
1449
1450 static int netlink_realloc_groups(struct sock *sk)
1451 {
1452 struct netlink_sock *nlk = nlk_sk(sk);
1453 unsigned int groups;
1454 unsigned long *new_groups;
1455 int err = 0;
1456
1457 netlink_table_grab();
1458
1459 groups = nl_table[sk->sk_protocol].groups;
1460 if (!nl_table[sk->sk_protocol].registered) {
1461 err = -ENOENT;
1462 goto out_unlock;
1463 }
1464
1465 if (nlk->ngroups >= groups)
1466 goto out_unlock;
1467
1468 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
1469 if (new_groups == NULL) {
1470 err = -ENOMEM;
1471 goto out_unlock;
1472 }
1473 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
1474 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
1475
1476 nlk->groups = new_groups;
1477 nlk->ngroups = groups;
1478 out_unlock:
1479 netlink_table_ungrab();
1480 return err;
1481 }
1482
1483 static void netlink_unbind(int group, long unsigned int groups,
1484 struct netlink_sock *nlk)
1485 {
1486 int undo;
1487
1488 if (!nlk->netlink_unbind)
1489 return;
1490
1491 for (undo = 0; undo < group; undo++)
1492 if (test_bit(group, &groups))
1493 nlk->netlink_unbind(undo);
1494 }
1495
1496 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1497 int addr_len)
1498 {
1499 struct sock *sk = sock->sk;
1500 struct net *net = sock_net(sk);
1501 struct netlink_sock *nlk = nlk_sk(sk);
1502 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1503 int err;
1504 long unsigned int groups = nladdr->nl_groups;
1505
1506 if (addr_len < sizeof(struct sockaddr_nl))
1507 return -EINVAL;
1508
1509 if (nladdr->nl_family != AF_NETLINK)
1510 return -EINVAL;
1511
1512 /* Only superuser is allowed to listen multicasts */
1513 if (groups) {
1514 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
1515 return -EPERM;
1516 err = netlink_realloc_groups(sk);
1517 if (err)
1518 return err;
1519 }
1520
1521 if (nlk->portid)
1522 if (nladdr->nl_pid != nlk->portid)
1523 return -EINVAL;
1524
1525 if (nlk->netlink_bind && groups) {
1526 int group;
1527
1528 for (group = 0; group < nlk->ngroups; group++) {
1529 if (!test_bit(group, &groups))
1530 continue;
1531 err = nlk->netlink_bind(group);
1532 if (!err)
1533 continue;
1534 netlink_unbind(group, groups, nlk);
1535 return err;
1536 }
1537 }
1538
1539 if (!nlk->portid) {
1540 err = nladdr->nl_pid ?
1541 netlink_insert(sk, net, nladdr->nl_pid) :
1542 netlink_autobind(sock);
1543 if (err) {
1544 netlink_unbind(nlk->ngroups - 1, groups, nlk);
1545 return err;
1546 }
1547 }
1548
1549 if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
1550 return 0;
1551
1552 netlink_table_grab();
1553 netlink_update_subscriptions(sk, nlk->subscriptions +
1554 hweight32(groups) -
1555 hweight32(nlk->groups[0]));
1556 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
1557 netlink_update_listeners(sk);
1558 netlink_table_ungrab();
1559
1560 return 0;
1561 }
1562
1563 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1564 int alen, int flags)
1565 {
1566 int err = 0;
1567 struct sock *sk = sock->sk;
1568 struct netlink_sock *nlk = nlk_sk(sk);
1569 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1570
1571 if (alen < sizeof(addr->sa_family))
1572 return -EINVAL;
1573
1574 if (addr->sa_family == AF_UNSPEC) {
1575 sk->sk_state = NETLINK_UNCONNECTED;
1576 nlk->dst_portid = 0;
1577 nlk->dst_group = 0;
1578 return 0;
1579 }
1580 if (addr->sa_family != AF_NETLINK)
1581 return -EINVAL;
1582
1583 if ((nladdr->nl_groups || nladdr->nl_pid) &&
1584 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1585 return -EPERM;
1586
1587 if (!nlk->portid)
1588 err = netlink_autobind(sock);
1589
1590 if (err == 0) {
1591 sk->sk_state = NETLINK_CONNECTED;
1592 nlk->dst_portid = nladdr->nl_pid;
1593 nlk->dst_group = ffs(nladdr->nl_groups);
1594 }
1595
1596 return err;
1597 }
1598
1599 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1600 int *addr_len, int peer)
1601 {
1602 struct sock *sk = sock->sk;
1603 struct netlink_sock *nlk = nlk_sk(sk);
1604 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
1605
1606 nladdr->nl_family = AF_NETLINK;
1607 nladdr->nl_pad = 0;
1608 *addr_len = sizeof(*nladdr);
1609
1610 if (peer) {
1611 nladdr->nl_pid = nlk->dst_portid;
1612 nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
1613 } else {
1614 nladdr->nl_pid = nlk->portid;
1615 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1616 }
1617 return 0;
1618 }
1619
1620 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1621 {
1622 struct sock *sock;
1623 struct netlink_sock *nlk;
1624
1625 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
1626 if (!sock)
1627 return ERR_PTR(-ECONNREFUSED);
1628
1629 /* Don't bother queuing skb if kernel socket has no input function */
1630 nlk = nlk_sk(sock);
1631 if (sock->sk_state == NETLINK_CONNECTED &&
1632 nlk->dst_portid != nlk_sk(ssk)->portid) {
1633 sock_put(sock);
1634 return ERR_PTR(-ECONNREFUSED);
1635 }
1636 return sock;
1637 }
1638
1639 struct sock *netlink_getsockbyfilp(struct file *filp)
1640 {
1641 struct inode *inode = file_inode(filp);
1642 struct sock *sock;
1643
1644 if (!S_ISSOCK(inode->i_mode))
1645 return ERR_PTR(-ENOTSOCK);
1646
1647 sock = SOCKET_I(inode)->sk;
1648 if (sock->sk_family != AF_NETLINK)
1649 return ERR_PTR(-EINVAL);
1650
1651 sock_hold(sock);
1652 return sock;
1653 }
1654
1655 static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
1656 int broadcast)
1657 {
1658 struct sk_buff *skb;
1659 void *data;
1660
1661 if (size <= NLMSG_GOODSIZE || broadcast)
1662 return alloc_skb(size, GFP_KERNEL);
1663
1664 size = SKB_DATA_ALIGN(size) +
1665 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1666
1667 data = vmalloc(size);
1668 if (data == NULL)
1669 return NULL;
1670
1671 skb = build_skb(data, size);
1672 if (skb == NULL)
1673 vfree(data);
1674 else {
1675 skb->head_frag = 0;
1676 skb->destructor = netlink_skb_destructor;
1677 }
1678
1679 return skb;
1680 }
1681
1682 /*
1683 * Attach a skb to a netlink socket.
1684 * The caller must hold a reference to the destination socket. On error, the
1685 * reference is dropped. The skb is not send to the destination, just all
1686 * all error checks are performed and memory in the queue is reserved.
1687 * Return values:
1688 * < 0: error. skb freed, reference to sock dropped.
1689 * 0: continue
1690 * 1: repeat lookup - reference dropped while waiting for socket memory.
1691 */
1692 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1693 long *timeo, struct sock *ssk)
1694 {
1695 struct netlink_sock *nlk;
1696
1697 nlk = nlk_sk(sk);
1698
1699 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1700 test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1701 !netlink_skb_is_mmaped(skb)) {
1702 DECLARE_WAITQUEUE(wait, current);
1703 if (!*timeo) {
1704 if (!ssk || netlink_is_kernel(ssk))
1705 netlink_overrun(sk);
1706 sock_put(sk);
1707 kfree_skb(skb);
1708 return -EAGAIN;
1709 }
1710
1711 __set_current_state(TASK_INTERRUPTIBLE);
1712 add_wait_queue(&nlk->wait, &wait);
1713
1714 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1715 test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1716 !sock_flag(sk, SOCK_DEAD))
1717 *timeo = schedule_timeout(*timeo);
1718
1719 __set_current_state(TASK_RUNNING);
1720 remove_wait_queue(&nlk->wait, &wait);
1721 sock_put(sk);
1722
1723 if (signal_pending(current)) {
1724 kfree_skb(skb);
1725 return sock_intr_errno(*timeo);
1726 }
1727 return 1;
1728 }
1729 netlink_skb_set_owner_r(skb, sk);
1730 return 0;
1731 }
1732
1733 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1734 {
1735 int len = skb->len;
1736
1737 netlink_deliver_tap(skb);
1738
1739 #ifdef CONFIG_NETLINK_MMAP
1740 if (netlink_skb_is_mmaped(skb))
1741 netlink_queue_mmaped_skb(sk, skb);
1742 else if (netlink_rx_is_mmaped(sk))
1743 netlink_ring_set_copied(sk, skb);
1744 else
1745 #endif /* CONFIG_NETLINK_MMAP */
1746 skb_queue_tail(&sk->sk_receive_queue, skb);
1747 sk->sk_data_ready(sk);
1748 return len;
1749 }
1750
1751 int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1752 {
1753 int len = __netlink_sendskb(sk, skb);
1754
1755 sock_put(sk);
1756 return len;
1757 }
1758
1759 void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
1760 {
1761 kfree_skb(skb);
1762 sock_put(sk);
1763 }
1764
1765 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1766 {
1767 int delta;
1768
1769 WARN_ON(skb->sk != NULL);
1770 if (netlink_skb_is_mmaped(skb))
1771 return skb;
1772
1773 delta = skb->end - skb->tail;
1774 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1775 return skb;
1776
1777 if (skb_shared(skb)) {
1778 struct sk_buff *nskb = skb_clone(skb, allocation);
1779 if (!nskb)
1780 return skb;
1781 consume_skb(skb);
1782 skb = nskb;
1783 }
1784
1785 if (!pskb_expand_head(skb, 0, -delta, allocation))
1786 skb->truesize -= delta;
1787
1788 return skb;
1789 }
1790
1791 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
1792 struct sock *ssk)
1793 {
1794 int ret;
1795 struct netlink_sock *nlk = nlk_sk(sk);
1796
1797 ret = -ECONNREFUSED;
1798 if (nlk->netlink_rcv != NULL) {
1799 ret = skb->len;
1800 netlink_skb_set_owner_r(skb, sk);
1801 NETLINK_CB(skb).sk = ssk;
1802 netlink_deliver_tap_kernel(sk, ssk, skb);
1803 nlk->netlink_rcv(skb);
1804 consume_skb(skb);
1805 } else {
1806 kfree_skb(skb);
1807 }
1808 sock_put(sk);
1809 return ret;
1810 }
1811
1812 int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
1813 u32 portid, int nonblock)
1814 {
1815 struct sock *sk;
1816 int err;
1817 long timeo;
1818
1819 skb = netlink_trim(skb, gfp_any());
1820
1821 timeo = sock_sndtimeo(ssk, nonblock);
1822 retry:
1823 sk = netlink_getsockbyportid(ssk, portid);
1824 if (IS_ERR(sk)) {
1825 kfree_skb(skb);
1826 return PTR_ERR(sk);
1827 }
1828 if (netlink_is_kernel(sk))
1829 return netlink_unicast_kernel(sk, skb, ssk);
1830
1831 if (sk_filter(sk, skb)) {
1832 err = skb->len;
1833 kfree_skb(skb);
1834 sock_put(sk);
1835 return err;
1836 }
1837
1838 err = netlink_attachskb(sk, skb, &timeo, ssk);
1839 if (err == 1)
1840 goto retry;
1841 if (err)
1842 return err;
1843
1844 return netlink_sendskb(sk, skb);
1845 }
1846 EXPORT_SYMBOL(netlink_unicast);
1847
1848 struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1849 u32 dst_portid, gfp_t gfp_mask)
1850 {
1851 #ifdef CONFIG_NETLINK_MMAP
1852 struct sock *sk = NULL;
1853 struct sk_buff *skb;
1854 struct netlink_ring *ring;
1855 struct nl_mmap_hdr *hdr;
1856 unsigned int maxlen;
1857
1858 sk = netlink_getsockbyportid(ssk, dst_portid);
1859 if (IS_ERR(sk))
1860 goto out;
1861
1862 ring = &nlk_sk(sk)->rx_ring;
1863 /* fast-path without atomic ops for common case: non-mmaped receiver */
1864 if (ring->pg_vec == NULL)
1865 goto out_put;
1866
1867 if (ring->frame_size - NL_MMAP_HDRLEN < size)
1868 goto out_put;
1869
1870 skb = alloc_skb_head(gfp_mask);
1871 if (skb == NULL)
1872 goto err1;
1873
1874 spin_lock_bh(&sk->sk_receive_queue.lock);
1875 /* check again under lock */
1876 if (ring->pg_vec == NULL)
1877 goto out_free;
1878
1879 /* check again under lock */
1880 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1881 if (maxlen < size)
1882 goto out_free;
1883
1884 netlink_forward_ring(ring);
1885 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1886 if (hdr == NULL)
1887 goto err2;
1888 netlink_ring_setup_skb(skb, sk, ring, hdr);
1889 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1890 atomic_inc(&ring->pending);
1891 netlink_increment_head(ring);
1892
1893 spin_unlock_bh(&sk->sk_receive_queue.lock);
1894 return skb;
1895
1896 err2:
1897 kfree_skb(skb);
1898 spin_unlock_bh(&sk->sk_receive_queue.lock);
1899 netlink_overrun(sk);
1900 err1:
1901 sock_put(sk);
1902 return NULL;
1903
1904 out_free:
1905 kfree_skb(skb);
1906 spin_unlock_bh(&sk->sk_receive_queue.lock);
1907 out_put:
1908 sock_put(sk);
1909 out:
1910 #endif
1911 return alloc_skb(size, gfp_mask);
1912 }
1913 EXPORT_SYMBOL_GPL(netlink_alloc_skb);
1914
1915 int netlink_has_listeners(struct sock *sk, unsigned int group)
1916 {
1917 int res = 0;
1918 struct listeners *listeners;
1919
1920 BUG_ON(!netlink_is_kernel(sk));
1921
1922 rcu_read_lock();
1923 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
1924
1925 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
1926 res = test_bit(group - 1, listeners->masks);
1927
1928 rcu_read_unlock();
1929
1930 return res;
1931 }
1932 EXPORT_SYMBOL_GPL(netlink_has_listeners);
1933
1934 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1935 {
1936 struct netlink_sock *nlk = nlk_sk(sk);
1937
1938 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1939 !test_bit(NETLINK_CONGESTED, &nlk->state)) {
1940 netlink_skb_set_owner_r(skb, sk);
1941 __netlink_sendskb(sk, skb);
1942 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1943 }
1944 return -1;
1945 }
1946
1947 struct netlink_broadcast_data {
1948 struct sock *exclude_sk;
1949 struct net *net;
1950 u32 portid;
1951 u32 group;
1952 int failure;
1953 int delivery_failure;
1954 int congested;
1955 int delivered;
1956 gfp_t allocation;
1957 struct sk_buff *skb, *skb2;
1958 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1959 void *tx_data;
1960 };
1961
1962 static int do_one_broadcast(struct sock *sk,
1963 struct netlink_broadcast_data *p)
1964 {
1965 struct netlink_sock *nlk = nlk_sk(sk);
1966 int val;
1967
1968 if (p->exclude_sk == sk)
1969 goto out;
1970
1971 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1972 !test_bit(p->group - 1, nlk->groups))
1973 goto out;
1974
1975 if (!net_eq(sock_net(sk), p->net))
1976 goto out;
1977
1978 if (p->failure) {
1979 netlink_overrun(sk);
1980 goto out;
1981 }
1982
1983 sock_hold(sk);
1984 if (p->skb2 == NULL) {
1985 if (skb_shared(p->skb)) {
1986 p->skb2 = skb_clone(p->skb, p->allocation);
1987 } else {
1988 p->skb2 = skb_get(p->skb);
1989 /*
1990 * skb ownership may have been set when
1991 * delivered to a previous socket.
1992 */
1993 skb_orphan(p->skb2);
1994 }
1995 }
1996 if (p->skb2 == NULL) {
1997 netlink_overrun(sk);
1998 /* Clone failed. Notify ALL listeners. */
1999 p->failure = 1;
2000 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
2001 p->delivery_failure = 1;
2002 } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
2003 kfree_skb(p->skb2);
2004 p->skb2 = NULL;
2005 } else if (sk_filter(sk, p->skb2)) {
2006 kfree_skb(p->skb2);
2007 p->skb2 = NULL;
2008 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
2009 netlink_overrun(sk);
2010 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
2011 p->delivery_failure = 1;
2012 } else {
2013 p->congested |= val;
2014 p->delivered = 1;
2015 p->skb2 = NULL;
2016 }
2017 sock_put(sk);
2018
2019 out:
2020 return 0;
2021 }
2022
2023 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
2024 u32 group, gfp_t allocation,
2025 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
2026 void *filter_data)
2027 {
2028 struct net *net = sock_net(ssk);
2029 struct netlink_broadcast_data info;
2030 struct sock *sk;
2031
2032 skb = netlink_trim(skb, allocation);
2033
2034 info.exclude_sk = ssk;
2035 info.net = net;
2036 info.portid = portid;
2037 info.group = group;
2038 info.failure = 0;
2039 info.delivery_failure = 0;
2040 info.congested = 0;
2041 info.delivered = 0;
2042 info.allocation = allocation;
2043 info.skb = skb;
2044 info.skb2 = NULL;
2045 info.tx_filter = filter;
2046 info.tx_data = filter_data;
2047
2048 /* While we sleep in clone, do not allow to change socket list */
2049
2050 netlink_lock_table();
2051
2052 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
2053 do_one_broadcast(sk, &info);
2054
2055 consume_skb(skb);
2056
2057 netlink_unlock_table();
2058
2059 if (info.delivery_failure) {
2060 kfree_skb(info.skb2);
2061 return -ENOBUFS;
2062 }
2063 consume_skb(info.skb2);
2064
2065 if (info.delivered) {
2066 if (info.congested && (allocation & __GFP_WAIT))
2067 yield();
2068 return 0;
2069 }
2070 return -ESRCH;
2071 }
2072 EXPORT_SYMBOL(netlink_broadcast_filtered);
2073
2074 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
2075 u32 group, gfp_t allocation)
2076 {
2077 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
2078 NULL, NULL);
2079 }
2080 EXPORT_SYMBOL(netlink_broadcast);
2081
2082 struct netlink_set_err_data {
2083 struct sock *exclude_sk;
2084 u32 portid;
2085 u32 group;
2086 int code;
2087 };
2088
2089 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
2090 {
2091 struct netlink_sock *nlk = nlk_sk(sk);
2092 int ret = 0;
2093
2094 if (sk == p->exclude_sk)
2095 goto out;
2096
2097 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
2098 goto out;
2099
2100 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
2101 !test_bit(p->group - 1, nlk->groups))
2102 goto out;
2103
2104 if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
2105 ret = 1;
2106 goto out;
2107 }
2108
2109 sk->sk_err = p->code;
2110 sk->sk_error_report(sk);
2111 out:
2112 return ret;
2113 }
2114
2115 /**
2116 * netlink_set_err - report error to broadcast listeners
2117 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
2118 * @portid: the PORTID of a process that we want to skip (if any)
2119 * @group: the broadcast group that will notice the error
2120 * @code: error code, must be negative (as usual in kernelspace)
2121 *
2122 * This function returns the number of broadcast listeners that have set the
2123 * NETLINK_RECV_NO_ENOBUFS socket option.
2124 */
2125 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
2126 {
2127 struct netlink_set_err_data info;
2128 struct sock *sk;
2129 int ret = 0;
2130
2131 info.exclude_sk = ssk;
2132 info.portid = portid;
2133 info.group = group;
2134 /* sk->sk_err wants a positive error value */
2135 info.code = -code;
2136
2137 read_lock(&nl_table_lock);
2138
2139 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
2140 ret += do_one_set_err(sk, &info);
2141
2142 read_unlock(&nl_table_lock);
2143 return ret;
2144 }
2145 EXPORT_SYMBOL(netlink_set_err);
2146
2147 /* must be called with netlink table grabbed */
2148 static void netlink_update_socket_mc(struct netlink_sock *nlk,
2149 unsigned int group,
2150 int is_new)
2151 {
2152 int old, new = !!is_new, subscriptions;
2153
2154 old = test_bit(group - 1, nlk->groups);
2155 subscriptions = nlk->subscriptions - old + new;
2156 if (new)
2157 __set_bit(group - 1, nlk->groups);
2158 else
2159 __clear_bit(group - 1, nlk->groups);
2160 netlink_update_subscriptions(&nlk->sk, subscriptions);
2161 netlink_update_listeners(&nlk->sk);
2162 }
2163
2164 static int netlink_setsockopt(struct socket *sock, int level, int optname,
2165 char __user *optval, unsigned int optlen)
2166 {
2167 struct sock *sk = sock->sk;
2168 struct netlink_sock *nlk = nlk_sk(sk);
2169 unsigned int val = 0;
2170 int err;
2171
2172 if (level != SOL_NETLINK)
2173 return -ENOPROTOOPT;
2174
2175 if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
2176 optlen >= sizeof(int) &&
2177 get_user(val, (unsigned int __user *)optval))
2178 return -EFAULT;
2179
2180 switch (optname) {
2181 case NETLINK_PKTINFO:
2182 if (val)
2183 nlk->flags |= NETLINK_RECV_PKTINFO;
2184 else
2185 nlk->flags &= ~NETLINK_RECV_PKTINFO;
2186 err = 0;
2187 break;
2188 case NETLINK_ADD_MEMBERSHIP:
2189 case NETLINK_DROP_MEMBERSHIP: {
2190 if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
2191 return -EPERM;
2192 err = netlink_realloc_groups(sk);
2193 if (err)
2194 return err;
2195 if (!val || val - 1 >= nlk->ngroups)
2196 return -EINVAL;
2197 if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
2198 err = nlk->netlink_bind(val);
2199 if (err)
2200 return err;
2201 }
2202 netlink_table_grab();
2203 netlink_update_socket_mc(nlk, val,
2204 optname == NETLINK_ADD_MEMBERSHIP);
2205 netlink_table_ungrab();
2206 if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
2207 nlk->netlink_unbind(val);
2208
2209 err = 0;
2210 break;
2211 }
2212 case NETLINK_BROADCAST_ERROR:
2213 if (val)
2214 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
2215 else
2216 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
2217 err = 0;
2218 break;
2219 case NETLINK_NO_ENOBUFS:
2220 if (val) {
2221 nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
2222 clear_bit(NETLINK_CONGESTED, &nlk->state);
2223 wake_up_interruptible(&nlk->wait);
2224 } else {
2225 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
2226 }
2227 err = 0;
2228 break;
2229 #ifdef CONFIG_NETLINK_MMAP
2230 case NETLINK_RX_RING:
2231 case NETLINK_TX_RING: {
2232 struct nl_mmap_req req;
2233
2234 /* Rings might consume more memory than queue limits, require
2235 * CAP_NET_ADMIN.
2236 */
2237 if (!capable(CAP_NET_ADMIN))
2238 return -EPERM;
2239 if (optlen < sizeof(req))
2240 return -EINVAL;
2241 if (copy_from_user(&req, optval, sizeof(req)))
2242 return -EFAULT;
2243 err = netlink_set_ring(sk, &req, false,
2244 optname == NETLINK_TX_RING);
2245 break;
2246 }
2247 #endif /* CONFIG_NETLINK_MMAP */
2248 default:
2249 err = -ENOPROTOOPT;
2250 }
2251 return err;
2252 }
2253
2254 static int netlink_getsockopt(struct socket *sock, int level, int optname,
2255 char __user *optval, int __user *optlen)
2256 {
2257 struct sock *sk = sock->sk;
2258 struct netlink_sock *nlk = nlk_sk(sk);
2259 int len, val, err;
2260
2261 if (level != SOL_NETLINK)
2262 return -ENOPROTOOPT;
2263
2264 if (get_user(len, optlen))
2265 return -EFAULT;
2266 if (len < 0)
2267 return -EINVAL;
2268
2269 switch (optname) {
2270 case NETLINK_PKTINFO:
2271 if (len < sizeof(int))
2272 return -EINVAL;
2273 len = sizeof(int);
2274 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
2275 if (put_user(len, optlen) ||
2276 put_user(val, optval))
2277 return -EFAULT;
2278 err = 0;
2279 break;
2280 case NETLINK_BROADCAST_ERROR:
2281 if (len < sizeof(int))
2282 return -EINVAL;
2283 len = sizeof(int);
2284 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
2285 if (put_user(len, optlen) ||
2286 put_user(val, optval))
2287 return -EFAULT;
2288 err = 0;
2289 break;
2290 case NETLINK_NO_ENOBUFS:
2291 if (len < sizeof(int))
2292 return -EINVAL;
2293 len = sizeof(int);
2294 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
2295 if (put_user(len, optlen) ||
2296 put_user(val, optval))
2297 return -EFAULT;
2298 err = 0;
2299 break;
2300 default:
2301 err = -ENOPROTOOPT;
2302 }
2303 return err;
2304 }
2305
2306 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
2307 {
2308 struct nl_pktinfo info;
2309
2310 info.group = NETLINK_CB(skb).dst_group;
2311 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
2312 }
2313
2314 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
2315 struct msghdr *msg, size_t len)
2316 {
2317 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
2318 struct sock *sk = sock->sk;
2319 struct netlink_sock *nlk = nlk_sk(sk);
2320 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
2321 u32 dst_portid;
2322 u32 dst_group;
2323 struct sk_buff *skb;
2324 int err;
2325 struct scm_cookie scm;
2326
2327 if (msg->msg_flags&MSG_OOB)
2328 return -EOPNOTSUPP;
2329
2330 if (NULL == siocb->scm)
2331 siocb->scm = &scm;
2332
2333 err = scm_send(sock, msg, siocb->scm, true);
2334 if (err < 0)
2335 return err;
2336
2337 if (msg->msg_namelen) {
2338 err = -EINVAL;
2339 if (addr->nl_family != AF_NETLINK)
2340 goto out;
2341 dst_portid = addr->nl_pid;
2342 dst_group = ffs(addr->nl_groups);
2343 err = -EPERM;
2344 if ((dst_group || dst_portid) &&
2345 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
2346 goto out;
2347 } else {
2348 dst_portid = nlk->dst_portid;
2349 dst_group = nlk->dst_group;
2350 }
2351
2352 if (!nlk->portid) {
2353 err = netlink_autobind(sock);
2354 if (err)
2355 goto out;
2356 }
2357
2358 if (netlink_tx_is_mmaped(sk) &&
2359 msg->msg_iov->iov_base == NULL) {
2360 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2361 siocb);
2362 goto out;
2363 }
2364
2365 err = -EMSGSIZE;
2366 if (len > sk->sk_sndbuf - 32)
2367 goto out;
2368 err = -ENOBUFS;
2369 skb = netlink_alloc_large_skb(len, dst_group);
2370 if (skb == NULL)
2371 goto out;
2372
2373 NETLINK_CB(skb).portid = nlk->portid;
2374 NETLINK_CB(skb).dst_group = dst_group;
2375 NETLINK_CB(skb).creds = siocb->scm->creds;
2376
2377 err = -EFAULT;
2378 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
2379 kfree_skb(skb);
2380 goto out;
2381 }
2382
2383 err = security_netlink_send(sk, skb);
2384 if (err) {
2385 kfree_skb(skb);
2386 goto out;
2387 }
2388
2389 if (dst_group) {
2390 atomic_inc(&skb->users);
2391 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
2392 }
2393 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
2394
2395 out:
2396 scm_destroy(siocb->scm);
2397 return err;
2398 }
2399
2400 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
2401 struct msghdr *msg, size_t len,
2402 int flags)
2403 {
2404 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
2405 struct scm_cookie scm;
2406 struct sock *sk = sock->sk;
2407 struct netlink_sock *nlk = nlk_sk(sk);
2408 int noblock = flags&MSG_DONTWAIT;
2409 size_t copied;
2410 struct sk_buff *skb, *data_skb;
2411 int err, ret;
2412
2413 if (flags&MSG_OOB)
2414 return -EOPNOTSUPP;
2415
2416 copied = 0;
2417
2418 skb = skb_recv_datagram(sk, flags, noblock, &err);
2419 if (skb == NULL)
2420 goto out;
2421
2422 data_skb = skb;
2423
2424 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
2425 if (unlikely(skb_shinfo(skb)->frag_list)) {
2426 /*
2427 * If this skb has a frag_list, then here that means that we
2428 * will have to use the frag_list skb's data for compat tasks
2429 * and the regular skb's data for normal (non-compat) tasks.
2430 *
2431 * If we need to send the compat skb, assign it to the
2432 * 'data_skb' variable so that it will be used below for data
2433 * copying. We keep 'skb' for everything else, including
2434 * freeing both later.
2435 */
2436 if (flags & MSG_CMSG_COMPAT)
2437 data_skb = skb_shinfo(skb)->frag_list;
2438 }
2439 #endif
2440
2441 /* Record the max length of recvmsg() calls for future allocations */
2442 nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
2443 nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
2444 16384);
2445
2446 copied = data_skb->len;
2447 if (len < copied) {
2448 msg->msg_flags |= MSG_TRUNC;
2449 copied = len;
2450 }
2451
2452 skb_reset_transport_header(data_skb);
2453 err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
2454
2455 if (msg->msg_name) {
2456 DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
2457 addr->nl_family = AF_NETLINK;
2458 addr->nl_pad = 0;
2459 addr->nl_pid = NETLINK_CB(skb).portid;
2460 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
2461 msg->msg_namelen = sizeof(*addr);
2462 }
2463
2464 if (nlk->flags & NETLINK_RECV_PKTINFO)
2465 netlink_cmsg_recv_pktinfo(msg, skb);
2466
2467 if (NULL == siocb->scm) {
2468 memset(&scm, 0, sizeof(scm));
2469 siocb->scm = &scm;
2470 }
2471 siocb->scm->creds = *NETLINK_CREDS(skb);
2472 if (flags & MSG_TRUNC)
2473 copied = data_skb->len;
2474
2475 skb_free_datagram(sk, skb);
2476
2477 if (nlk->cb_running &&
2478 atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
2479 ret = netlink_dump(sk);
2480 if (ret) {
2481 sk->sk_err = ret;
2482 sk->sk_error_report(sk);
2483 }
2484 }
2485
2486 scm_recv(sock, msg, siocb->scm, flags);
2487 out:
2488 netlink_rcv_wake(sk);
2489 return err ? : copied;
2490 }
2491
2492 static void netlink_data_ready(struct sock *sk)
2493 {
2494 BUG();
2495 }
2496
2497 /*
2498 * We export these functions to other modules. They provide a
2499 * complete set of kernel non-blocking support for message
2500 * queueing.
2501 */
2502
2503 struct sock *
2504 __netlink_kernel_create(struct net *net, int unit, struct module *module,
2505 struct netlink_kernel_cfg *cfg)
2506 {
2507 struct socket *sock;
2508 struct sock *sk;
2509 struct netlink_sock *nlk;
2510 struct listeners *listeners = NULL;
2511 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
2512 unsigned int groups;
2513
2514 BUG_ON(!nl_table);
2515
2516 if (unit < 0 || unit >= MAX_LINKS)
2517 return NULL;
2518
2519 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
2520 return NULL;
2521
2522 /*
2523 * We have to just have a reference on the net from sk, but don't
2524 * get_net it. Besides, we cannot get and then put the net here.
2525 * So we create one inside init_net and the move it to net.
2526 */
2527
2528 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
2529 goto out_sock_release_nosk;
2530
2531 sk = sock->sk;
2532 sk_change_net(sk, net);
2533
2534 if (!cfg || cfg->groups < 32)
2535 groups = 32;
2536 else
2537 groups = cfg->groups;
2538
2539 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2540 if (!listeners)
2541 goto out_sock_release;
2542
2543 sk->sk_data_ready = netlink_data_ready;
2544 if (cfg && cfg->input)
2545 nlk_sk(sk)->netlink_rcv = cfg->input;
2546
2547 if (netlink_insert(sk, net, 0))
2548 goto out_sock_release;
2549
2550 nlk = nlk_sk(sk);
2551 nlk->flags |= NETLINK_KERNEL_SOCKET;
2552
2553 netlink_table_grab();
2554 if (!nl_table[unit].registered) {
2555 nl_table[unit].groups = groups;
2556 rcu_assign_pointer(nl_table[unit].listeners, listeners);
2557 nl_table[unit].cb_mutex = cb_mutex;
2558 nl_table[unit].module = module;
2559 if (cfg) {
2560 nl_table[unit].bind = cfg->bind;
2561 nl_table[unit].flags = cfg->flags;
2562 if (cfg->compare)
2563 nl_table[unit].compare = cfg->compare;
2564 }
2565 nl_table[unit].registered = 1;
2566 } else {
2567 kfree(listeners);
2568 nl_table[unit].registered++;
2569 }
2570 netlink_table_ungrab();
2571 return sk;
2572
2573 out_sock_release:
2574 kfree(listeners);
2575 netlink_kernel_release(sk);
2576 return NULL;
2577
2578 out_sock_release_nosk:
2579 sock_release(sock);
2580 return NULL;
2581 }
2582 EXPORT_SYMBOL(__netlink_kernel_create);
2583
2584 void
2585 netlink_kernel_release(struct sock *sk)
2586 {
2587 sk_release_kernel(sk);
2588 }
2589 EXPORT_SYMBOL(netlink_kernel_release);
2590
2591 int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
2592 {
2593 struct listeners *new, *old;
2594 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
2595
2596 if (groups < 32)
2597 groups = 32;
2598
2599 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
2600 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
2601 if (!new)
2602 return -ENOMEM;
2603 old = nl_deref_protected(tbl->listeners);
2604 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
2605 rcu_assign_pointer(tbl->listeners, new);
2606
2607 kfree_rcu(old, rcu);
2608 }
2609 tbl->groups = groups;
2610
2611 return 0;
2612 }
2613
2614 /**
2615 * netlink_change_ngroups - change number of multicast groups
2616 *
2617 * This changes the number of multicast groups that are available
2618 * on a certain netlink family. Note that it is not possible to
2619 * change the number of groups to below 32. Also note that it does
2620 * not implicitly call netlink_clear_multicast_users() when the
2621 * number of groups is reduced.
2622 *
2623 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
2624 * @groups: The new number of groups.
2625 */
2626 int netlink_change_ngroups(struct sock *sk, unsigned int groups)
2627 {
2628 int err;
2629
2630 netlink_table_grab();
2631 err = __netlink_change_ngroups(sk, groups);
2632 netlink_table_ungrab();
2633
2634 return err;
2635 }
2636
2637 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2638 {
2639 struct sock *sk;
2640 struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
2641
2642 sk_for_each_bound(sk, &tbl->mc_list)
2643 netlink_update_socket_mc(nlk_sk(sk), group, 0);
2644 }
2645
2646 struct nlmsghdr *
2647 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
2648 {
2649 struct nlmsghdr *nlh;
2650 int size = nlmsg_msg_size(len);
2651
2652 nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size));
2653 nlh->nlmsg_type = type;
2654 nlh->nlmsg_len = size;
2655 nlh->nlmsg_flags = flags;
2656 nlh->nlmsg_pid = portid;
2657 nlh->nlmsg_seq = seq;
2658 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
2659 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
2660 return nlh;
2661 }
2662 EXPORT_SYMBOL(__nlmsg_put);
2663
2664 /*
2665 * It looks a bit ugly.
2666 * It would be better to create kernel thread.
2667 */
2668
2669 static int netlink_dump(struct sock *sk)
2670 {
2671 struct netlink_sock *nlk = nlk_sk(sk);
2672 struct netlink_callback *cb;
2673 struct sk_buff *skb = NULL;
2674 struct nlmsghdr *nlh;
2675 int len, err = -ENOBUFS;
2676 int alloc_size;
2677
2678 mutex_lock(nlk->cb_mutex);
2679 if (!nlk->cb_running) {
2680 err = -EINVAL;
2681 goto errout_skb;
2682 }
2683
2684 cb = &nlk->cb;
2685 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2686
2687 if (!netlink_rx_is_mmaped(sk) &&
2688 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2689 goto errout_skb;
2690
2691 /* NLMSG_GOODSIZE is small to avoid high order allocations being
2692 * required, but it makes sense to _attempt_ a 16K bytes allocation
2693 * to reduce number of system calls on dump operations, if user
2694 * ever provided a big enough buffer.
2695 */
2696 if (alloc_size < nlk->max_recvmsg_len) {
2697 skb = netlink_alloc_skb(sk,
2698 nlk->max_recvmsg_len,
2699 nlk->portid,
2700 GFP_KERNEL |
2701 __GFP_NOWARN |
2702 __GFP_NORETRY);
2703 /* available room should be exact amount to avoid MSG_TRUNC */
2704 if (skb)
2705 skb_reserve(skb, skb_tailroom(skb) -
2706 nlk->max_recvmsg_len);
2707 }
2708 if (!skb)
2709 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
2710 GFP_KERNEL);
2711 if (!skb)
2712 goto errout_skb;
2713 netlink_skb_set_owner_r(skb, sk);
2714
2715 len = cb->dump(skb, cb);
2716
2717 if (len > 0) {
2718 mutex_unlock(nlk->cb_mutex);
2719
2720 if (sk_filter(sk, skb))
2721 kfree_skb(skb);
2722 else
2723 __netlink_sendskb(sk, skb);
2724 return 0;
2725 }
2726
2727 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
2728 if (!nlh)
2729 goto errout_skb;
2730
2731 nl_dump_check_consistent(cb, nlh);
2732
2733 memcpy(nlmsg_data(nlh), &len, sizeof(len));
2734
2735 if (sk_filter(sk, skb))
2736 kfree_skb(skb);
2737 else
2738 __netlink_sendskb(sk, skb);
2739
2740 if (cb->done)
2741 cb->done(cb);
2742
2743 nlk->cb_running = false;
2744 mutex_unlock(nlk->cb_mutex);
2745 module_put(cb->module);
2746 consume_skb(cb->skb);
2747 return 0;
2748
2749 errout_skb:
2750 mutex_unlock(nlk->cb_mutex);
2751 kfree_skb(skb);
2752 return err;
2753 }
2754
2755 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2756 const struct nlmsghdr *nlh,
2757 struct netlink_dump_control *control)
2758 {
2759 struct netlink_callback *cb;
2760 struct sock *sk;
2761 struct netlink_sock *nlk;
2762 int ret;
2763
2764 /* Memory mapped dump requests need to be copied to avoid looping
2765 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2766 * a reference to the skb.
2767 */
2768 if (netlink_skb_is_mmaped(skb)) {
2769 skb = skb_copy(skb, GFP_KERNEL);
2770 if (skb == NULL)
2771 return -ENOBUFS;
2772 } else
2773 atomic_inc(&skb->users);
2774
2775 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2776 if (sk == NULL) {
2777 ret = -ECONNREFUSED;
2778 goto error_free;
2779 }
2780
2781 nlk = nlk_sk(sk);
2782 mutex_lock(nlk->cb_mutex);
2783 /* A dump is in progress... */
2784 if (nlk->cb_running) {
2785 ret = -EBUSY;
2786 goto error_unlock;
2787 }
2788 /* add reference of module which cb->dump belongs to */
2789 if (!try_module_get(control->module)) {
2790 ret = -EPROTONOSUPPORT;
2791 goto error_unlock;
2792 }
2793
2794 cb = &nlk->cb;
2795 memset(cb, 0, sizeof(*cb));
2796 cb->dump = control->dump;
2797 cb->done = control->done;
2798 cb->nlh = nlh;
2799 cb->data = control->data;
2800 cb->module = control->module;
2801 cb->min_dump_alloc = control->min_dump_alloc;
2802 cb->skb = skb;
2803
2804 nlk->cb_running = true;
2805
2806 mutex_unlock(nlk->cb_mutex);
2807
2808 ret = netlink_dump(sk);
2809 sock_put(sk);
2810
2811 if (ret)
2812 return ret;
2813
2814 /* We successfully started a dump, by returning -EINTR we
2815 * signal not to send ACK even if it was requested.
2816 */
2817 return -EINTR;
2818
2819 error_unlock:
2820 sock_put(sk);
2821 mutex_unlock(nlk->cb_mutex);
2822 error_free:
2823 kfree_skb(skb);
2824 return ret;
2825 }
2826 EXPORT_SYMBOL(__netlink_dump_start);
2827
2828 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2829 {
2830 struct sk_buff *skb;
2831 struct nlmsghdr *rep;
2832 struct nlmsgerr *errmsg;
2833 size_t payload = sizeof(*errmsg);
2834
2835 /* error messages get the original request appened */
2836 if (err)
2837 payload += nlmsg_len(nlh);
2838
2839 skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
2840 NETLINK_CB(in_skb).portid, GFP_KERNEL);
2841 if (!skb) {
2842 struct sock *sk;
2843
2844 sk = netlink_lookup(sock_net(in_skb->sk),
2845 in_skb->sk->sk_protocol,
2846 NETLINK_CB(in_skb).portid);
2847 if (sk) {
2848 sk->sk_err = ENOBUFS;
2849 sk->sk_error_report(sk);
2850 sock_put(sk);
2851 }
2852 return;
2853 }
2854
2855 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2856 NLMSG_ERROR, payload, 0);
2857 errmsg = nlmsg_data(rep);
2858 errmsg->error = err;
2859 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
2860 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
2861 }
2862 EXPORT_SYMBOL(netlink_ack);
2863
2864 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2865 struct nlmsghdr *))
2866 {
2867 struct nlmsghdr *nlh;
2868 int err;
2869
2870 while (skb->len >= nlmsg_total_size(0)) {
2871 int msglen;
2872
2873 nlh = nlmsg_hdr(skb);
2874 err = 0;
2875
2876 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
2877 return 0;
2878
2879 /* Only requests are handled by the kernel */
2880 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
2881 goto ack;
2882
2883 /* Skip control messages */
2884 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
2885 goto ack;
2886
2887 err = cb(skb, nlh);
2888 if (err == -EINTR)
2889 goto skip;
2890
2891 ack:
2892 if (nlh->nlmsg_flags & NLM_F_ACK || err)
2893 netlink_ack(skb, nlh, err);
2894
2895 skip:
2896 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
2897 if (msglen > skb->len)
2898 msglen = skb->len;
2899 skb_pull(skb, msglen);
2900 }
2901
2902 return 0;
2903 }
2904 EXPORT_SYMBOL(netlink_rcv_skb);
2905
2906 /**
2907 * nlmsg_notify - send a notification netlink message
2908 * @sk: netlink socket to use
2909 * @skb: notification message
2910 * @portid: destination netlink portid for reports or 0
2911 * @group: destination multicast group or 0
2912 * @report: 1 to report back, 0 to disable
2913 * @flags: allocation flags
2914 */
2915 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
2916 unsigned int group, int report, gfp_t flags)
2917 {
2918 int err = 0;
2919
2920 if (group) {
2921 int exclude_portid = 0;
2922
2923 if (report) {
2924 atomic_inc(&skb->users);
2925 exclude_portid = portid;
2926 }
2927
2928 /* errors reported via destination sk->sk_err, but propagate
2929 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
2930 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
2931 }
2932
2933 if (report) {
2934 int err2;
2935
2936 err2 = nlmsg_unicast(sk, skb, portid);
2937 if (!err || err == -ESRCH)
2938 err = err2;
2939 }
2940
2941 return err;
2942 }
2943 EXPORT_SYMBOL(nlmsg_notify);
2944
2945 #ifdef CONFIG_PROC_FS
2946 struct nl_seq_iter {
2947 struct seq_net_private p;
2948 int link;
2949 int hash_idx;
2950 };
2951
2952 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
2953 {
2954 struct nl_seq_iter *iter = seq->private;
2955 int i, j;
2956 struct sock *s;
2957 loff_t off = 0;
2958
2959 for (i = 0; i < MAX_LINKS; i++) {
2960 struct nl_portid_hash *hash = &nl_table[i].hash;
2961
2962 for (j = 0; j <= hash->mask; j++) {
2963 sk_for_each(s, &hash->table[j]) {
2964 if (sock_net(s) != seq_file_net(seq))
2965 continue;
2966 if (off == pos) {
2967 iter->link = i;
2968 iter->hash_idx = j;
2969 return s;
2970 }
2971 ++off;
2972 }
2973 }
2974 }
2975 return NULL;
2976 }
2977
2978 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
2979 __acquires(nl_table_lock)
2980 {
2981 read_lock(&nl_table_lock);
2982 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2983 }
2984
2985 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2986 {
2987 struct sock *s;
2988 struct nl_seq_iter *iter;
2989 struct net *net;
2990 int i, j;
2991
2992 ++*pos;
2993
2994 if (v == SEQ_START_TOKEN)
2995 return netlink_seq_socket_idx(seq, 0);
2996
2997 net = seq_file_net(seq);
2998 iter = seq->private;
2999 s = v;
3000 do {
3001 s = sk_next(s);
3002 } while (s && !nl_table[s->sk_protocol].compare(net, s));
3003 if (s)
3004 return s;
3005
3006 i = iter->link;
3007 j = iter->hash_idx + 1;
3008
3009 do {
3010 struct nl_portid_hash *hash = &nl_table[i].hash;
3011
3012 for (; j <= hash->mask; j++) {
3013 s = sk_head(&hash->table[j]);
3014
3015 while (s && !nl_table[s->sk_protocol].compare(net, s))
3016 s = sk_next(s);
3017 if (s) {
3018 iter->link = i;
3019 iter->hash_idx = j;
3020 return s;
3021 }
3022 }
3023
3024 j = 0;
3025 } while (++i < MAX_LINKS);
3026
3027 return NULL;
3028 }
3029
3030 static void netlink_seq_stop(struct seq_file *seq, void *v)
3031 __releases(nl_table_lock)
3032 {
3033 read_unlock(&nl_table_lock);
3034 }
3035
3036
3037 static int netlink_seq_show(struct seq_file *seq, void *v)
3038 {
3039 if (v == SEQ_START_TOKEN) {
3040 seq_puts(seq,
3041 "sk Eth Pid Groups "
3042 "Rmem Wmem Dump Locks Drops Inode\n");
3043 } else {
3044 struct sock *s = v;
3045 struct netlink_sock *nlk = nlk_sk(s);
3046
3047 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
3048 s,
3049 s->sk_protocol,
3050 nlk->portid,
3051 nlk->groups ? (u32)nlk->groups[0] : 0,
3052 sk_rmem_alloc_get(s),
3053 sk_wmem_alloc_get(s),
3054 nlk->cb_running,
3055 atomic_read(&s->sk_refcnt),
3056 atomic_read(&s->sk_drops),
3057 sock_i_ino(s)
3058 );
3059
3060 }
3061 return 0;
3062 }
3063
3064 static const struct seq_operations netlink_seq_ops = {
3065 .start = netlink_seq_start,
3066 .next = netlink_seq_next,
3067 .stop = netlink_seq_stop,
3068 .show = netlink_seq_show,
3069 };
3070
3071
3072 static int netlink_seq_open(struct inode *inode, struct file *file)
3073 {
3074 return seq_open_net(inode, file, &netlink_seq_ops,
3075 sizeof(struct nl_seq_iter));
3076 }
3077
3078 static const struct file_operations netlink_seq_fops = {
3079 .owner = THIS_MODULE,
3080 .open = netlink_seq_open,
3081 .read = seq_read,
3082 .llseek = seq_lseek,
3083 .release = seq_release_net,
3084 };
3085
3086 #endif
3087
3088 int netlink_register_notifier(struct notifier_block *nb)
3089 {
3090 return atomic_notifier_chain_register(&netlink_chain, nb);
3091 }
3092 EXPORT_SYMBOL(netlink_register_notifier);
3093
3094 int netlink_unregister_notifier(struct notifier_block *nb)
3095 {
3096 return atomic_notifier_chain_unregister(&netlink_chain, nb);
3097 }
3098 EXPORT_SYMBOL(netlink_unregister_notifier);
3099
3100 static const struct proto_ops netlink_ops = {
3101 .family = PF_NETLINK,
3102 .owner = THIS_MODULE,
3103 .release = netlink_release,
3104 .bind = netlink_bind,
3105 .connect = netlink_connect,
3106 .socketpair = sock_no_socketpair,
3107 .accept = sock_no_accept,
3108 .getname = netlink_getname,
3109 .poll = netlink_poll,
3110 .ioctl = sock_no_ioctl,
3111 .listen = sock_no_listen,
3112 .shutdown = sock_no_shutdown,
3113 .setsockopt = netlink_setsockopt,
3114 .getsockopt = netlink_getsockopt,
3115 .sendmsg = netlink_sendmsg,
3116 .recvmsg = netlink_recvmsg,
3117 .mmap = netlink_mmap,
3118 .sendpage = sock_no_sendpage,
3119 };
3120
3121 static const struct net_proto_family netlink_family_ops = {
3122 .family = PF_NETLINK,
3123 .create = netlink_create,
3124 .owner = THIS_MODULE, /* for consistency 8) */
3125 };
3126
3127 static int __net_init netlink_net_init(struct net *net)
3128 {
3129 #ifdef CONFIG_PROC_FS
3130 if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
3131 return -ENOMEM;
3132 #endif
3133 return 0;
3134 }
3135
3136 static void __net_exit netlink_net_exit(struct net *net)
3137 {
3138 #ifdef CONFIG_PROC_FS
3139 remove_proc_entry("netlink", net->proc_net);
3140 #endif
3141 }
3142
3143 static void __init netlink_add_usersock_entry(void)
3144 {
3145 struct listeners *listeners;
3146 int groups = 32;
3147
3148 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
3149 if (!listeners)
3150 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
3151
3152 netlink_table_grab();
3153
3154 nl_table[NETLINK_USERSOCK].groups = groups;
3155 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
3156 nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
3157 nl_table[NETLINK_USERSOCK].registered = 1;
3158 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
3159
3160 netlink_table_ungrab();
3161 }
3162
3163 static struct pernet_operations __net_initdata netlink_net_ops = {
3164 .init = netlink_net_init,
3165 .exit = netlink_net_exit,
3166 };
3167
3168 static int __init netlink_proto_init(void)
3169 {
3170 int i;
3171 unsigned long limit;
3172 unsigned int order;
3173 int err = proto_register(&netlink_proto, 0);
3174
3175 if (err != 0)
3176 goto out;
3177
3178 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
3179
3180 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
3181 if (!nl_table)
3182 goto panic;
3183
3184 if (totalram_pages >= (128 * 1024))
3185 limit = totalram_pages >> (21 - PAGE_SHIFT);
3186 else
3187 limit = totalram_pages >> (23 - PAGE_SHIFT);
3188
3189 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
3190 limit = (1UL << order) / sizeof(struct hlist_head);
3191 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
3192
3193 for (i = 0; i < MAX_LINKS; i++) {
3194 struct nl_portid_hash *hash = &nl_table[i].hash;
3195
3196 hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
3197 if (!hash->table) {
3198 while (i-- > 0)
3199 nl_portid_hash_free(nl_table[i].hash.table,
3200 1 * sizeof(*hash->table));
3201 kfree(nl_table);
3202 goto panic;
3203 }
3204 hash->max_shift = order;
3205 hash->shift = 0;
3206 hash->mask = 0;
3207 hash->rehash_time = jiffies;
3208
3209 nl_table[i].compare = netlink_compare;
3210 }
3211
3212 INIT_LIST_HEAD(&netlink_tap_all);
3213
3214 netlink_add_usersock_entry();
3215
3216 sock_register(&netlink_family_ops);
3217 register_pernet_subsys(&netlink_net_ops);
3218 /* The netlink device handler may be needed early. */
3219 rtnetlink_init();
3220 out:
3221 return err;
3222 panic:
3223 panic("netlink_init: Cannot allocate nl_table\n");
3224 }
3225
3226 core_initcall(netlink_proto_init);
This page took 0.431864 seconds and 5 git commands to generate.