net: Add support for batching network namespace cleanups
[deliverable/linux.git] / net / core / net_namespace.c
CommitLineData
5f256bec
EB
1#include <linux/workqueue.h>
2#include <linux/rtnetlink.h>
3#include <linux/cache.h>
4#include <linux/slab.h>
5#include <linux/list.h>
6#include <linux/delay.h>
9dd776b6 7#include <linux/sched.h>
c93cf61f 8#include <linux/idr.h>
11a28d37 9#include <linux/rculist.h>
30ffee84 10#include <linux/nsproxy.h>
2b035b39 11#include <linux/netdevice.h>
5f256bec 12#include <net/net_namespace.h>
dec827d1 13#include <net/netns/generic.h>
2b035b39 14#include <net/rtnetlink.h>
5f256bec
EB
15
16/*
17 * Our network namespace constructor/destructor lists
18 */
19
20static LIST_HEAD(pernet_list);
21static struct list_head *first_device = &pernet_list;
22static DEFINE_MUTEX(net_mutex);
23
5f256bec 24LIST_HEAD(net_namespace_list);
b76a461f 25EXPORT_SYMBOL_GPL(net_namespace_list);
5f256bec 26
5f256bec 27struct net init_net;
ff4b9502 28EXPORT_SYMBOL(init_net);
5f256bec 29
dec827d1
PE
30#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
31
2b035b39
EB
32static void unregister_netdevices(struct net *net, struct list_head *list)
33{
34 struct net_device *dev;
35 /* At exit all network devices most be removed from a network
36 * namespace. Do this in the reverse order of registeration.
37 */
38 for_each_netdev_reverse(net, dev) {
39 if (dev->rtnl_link_ops)
40 dev->rtnl_link_ops->dellink(dev, list);
41 else
42 unregister_netdevice_queue(dev, list);
43 }
44}
45
f875bae0
EB
46static int ops_init(const struct pernet_operations *ops, struct net *net)
47{
48 int err;
49 if (ops->id && ops->size) {
50 void *data = kzalloc(ops->size, GFP_KERNEL);
51 if (!data)
52 return -ENOMEM;
53
54 err = net_assign_generic(net, *ops->id, data);
55 if (err) {
56 kfree(data);
57 return err;
58 }
59 }
60 if (ops->init)
61 return ops->init(net);
62 return 0;
63}
64
65static void ops_free(const struct pernet_operations *ops, struct net *net)
66{
67 if (ops->id && ops->size) {
68 int id = *ops->id;
69 kfree(net_generic(net, id));
70 }
71}
72
72ad937a
EB
73static void ops_exit_list(const struct pernet_operations *ops,
74 struct list_head *net_exit_list)
75{
76 struct net *net;
77 if (ops->exit) {
78 list_for_each_entry(net, net_exit_list, exit_list)
79 ops->exit(net);
80 }
81 if (&ops->list == first_device) {
82 LIST_HEAD(dev_kill_list);
83 rtnl_lock();
84 list_for_each_entry(net, net_exit_list, exit_list)
85 unregister_netdevices(net, &dev_kill_list);
86 unregister_netdevice_many(&dev_kill_list);
87 rtnl_unlock();
88 }
89 if (ops->exit_batch)
90 ops->exit_batch(net_exit_list);
91}
92
93static void ops_free_list(const struct pernet_operations *ops,
94 struct list_head *net_exit_list)
95{
96 struct net *net;
97 if (ops->size && ops->id) {
98 list_for_each_entry(net, net_exit_list, exit_list)
99 ops_free(ops, net);
100 }
101}
102
5f256bec
EB
103/*
104 * setup_net runs the initializers for the network namespace object.
105 */
1a2ee93d 106static __net_init int setup_net(struct net *net)
5f256bec
EB
107{
108 /* Must be called with net_mutex held */
f875bae0 109 const struct pernet_operations *ops, *saved_ops;
486a87f1 110 int error = 0;
72ad937a 111 LIST_HEAD(net_exit_list);
5f256bec 112
5f256bec 113 atomic_set(&net->count, 1);
486a87f1 114
5d1e4468 115#ifdef NETNS_REFCNT_DEBUG
5f256bec 116 atomic_set(&net->use_count, 0);
5d1e4468 117#endif
5f256bec 118
768f3591 119 list_for_each_entry(ops, &pernet_list, list) {
f875bae0
EB
120 error = ops_init(ops, net);
121 if (error < 0)
122 goto out_undo;
5f256bec
EB
123 }
124out:
125 return error;
768f3591 126
5f256bec
EB
127out_undo:
128 /* Walk through the list backwards calling the exit functions
129 * for the pernet modules whose init functions did not fail.
130 */
72ad937a 131 list_add(&net->exit_list, &net_exit_list);
f875bae0 132 saved_ops = ops;
72ad937a
EB
133 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
134 ops_exit_list(ops, &net_exit_list);
135
f875bae0
EB
136 ops = saved_ops;
137 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
72ad937a 138 ops_free_list(ops, &net_exit_list);
310928d9
DL
139
140 rcu_barrier();
5f256bec
EB
141 goto out;
142}
143
486a87f1 144static struct net_generic *net_alloc_generic(void)
6a1a3b9f 145{
486a87f1
DL
146 struct net_generic *ng;
147 size_t generic_size = sizeof(struct net_generic) +
148 INITIAL_NET_GEN_PTRS * sizeof(void *);
149
150 ng = kzalloc(generic_size, GFP_KERNEL);
151 if (ng)
152 ng->len = INITIAL_NET_GEN_PTRS;
153
154 return ng;
6a1a3b9f
PE
155}
156
ebe47d47
CN
157#ifdef CONFIG_NET_NS
158static struct kmem_cache *net_cachep;
159static struct workqueue_struct *netns_wq;
160
486a87f1 161static struct net *net_alloc(void)
45a19b0a 162{
486a87f1
DL
163 struct net *net = NULL;
164 struct net_generic *ng;
165
166 ng = net_alloc_generic();
167 if (!ng)
168 goto out;
169
170 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
45a19b0a 171 if (!net)
486a87f1 172 goto out_free;
45a19b0a 173
486a87f1
DL
174 rcu_assign_pointer(net->gen, ng);
175out:
176 return net;
177
178out_free:
179 kfree(ng);
180 goto out;
181}
182
183static void net_free(struct net *net)
184{
5d1e4468 185#ifdef NETNS_REFCNT_DEBUG
45a19b0a
JFS
186 if (unlikely(atomic_read(&net->use_count) != 0)) {
187 printk(KERN_EMERG "network namespace not free! Usage: %d\n",
188 atomic_read(&net->use_count));
189 return;
190 }
5d1e4468 191#endif
4ef079cc 192 kfree(net->gen);
45a19b0a
JFS
193 kmem_cache_free(net_cachep, net);
194}
195
088eb2d9 196static struct net *net_create(void)
9dd776b6 197{
088eb2d9
AD
198 struct net *net;
199 int rv;
9dd776b6 200
088eb2d9
AD
201 net = net_alloc();
202 if (!net)
203 return ERR_PTR(-ENOMEM);
9dd776b6 204 mutex_lock(&net_mutex);
088eb2d9
AD
205 rv = setup_net(net);
206 if (rv == 0) {
486a87f1 207 rtnl_lock();
11a28d37 208 list_add_tail_rcu(&net->list, &net_namespace_list);
486a87f1
DL
209 rtnl_unlock();
210 }
9dd776b6 211 mutex_unlock(&net_mutex);
088eb2d9
AD
212 if (rv < 0) {
213 net_free(net);
214 return ERR_PTR(rv);
215 }
216 return net;
217}
486a87f1 218
088eb2d9
AD
219struct net *copy_net_ns(unsigned long flags, struct net *old_net)
220{
221 if (!(flags & CLONE_NEWNET))
222 return get_net(old_net);
223 return net_create();
9dd776b6
EB
224}
225
2b035b39
EB
226static DEFINE_SPINLOCK(cleanup_list_lock);
227static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
228
6a1a3b9f
PE
229static void cleanup_net(struct work_struct *work)
230{
f875bae0 231 const struct pernet_operations *ops;
2b035b39
EB
232 struct net *net, *tmp;
233 LIST_HEAD(net_kill_list);
72ad937a 234 LIST_HEAD(net_exit_list);
6a1a3b9f 235
2b035b39
EB
236 /* Atomically snapshot the list of namespaces to cleanup */
237 spin_lock_irq(&cleanup_list_lock);
238 list_replace_init(&cleanup_list, &net_kill_list);
239 spin_unlock_irq(&cleanup_list_lock);
6a1a3b9f
PE
240
241 mutex_lock(&net_mutex);
242
243 /* Don't let anyone else find us. */
244 rtnl_lock();
72ad937a 245 list_for_each_entry(net, &net_kill_list, cleanup_list) {
2b035b39 246 list_del_rcu(&net->list);
72ad937a
EB
247 list_add_tail(&net->exit_list, &net_exit_list);
248 }
6a1a3b9f
PE
249 rtnl_unlock();
250
11a28d37
JB
251 /*
252 * Another CPU might be rcu-iterating the list, wait for it.
253 * This needs to be before calling the exit() notifiers, so
254 * the rcu_barrier() below isn't sufficient alone.
255 */
256 synchronize_rcu();
257
6a1a3b9f 258 /* Run all of the network namespace exit methods */
72ad937a
EB
259 list_for_each_entry_reverse(ops, &pernet_list, list)
260 ops_exit_list(ops, &net_exit_list);
261
f875bae0 262 /* Free the net generic variables */
72ad937a
EB
263 list_for_each_entry_reverse(ops, &pernet_list, list)
264 ops_free_list(ops, &net_exit_list);
6a1a3b9f
PE
265
266 mutex_unlock(&net_mutex);
267
268 /* Ensure there are no outstanding rcu callbacks using this
269 * network namespace.
270 */
271 rcu_barrier();
272
273 /* Finally it is safe to free my network namespace structure */
72ad937a
EB
274 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
275 list_del_init(&net->exit_list);
2b035b39
EB
276 net_free(net);
277 }
6a1a3b9f 278}
2b035b39 279static DECLARE_WORK(net_cleanup_work, cleanup_net);
6a1a3b9f
PE
280
281void __put_net(struct net *net)
282{
283 /* Cleanup the network namespace in process context */
2b035b39
EB
284 unsigned long flags;
285
286 spin_lock_irqsave(&cleanup_list_lock, flags);
287 list_add(&net->cleanup_list, &cleanup_list);
288 spin_unlock_irqrestore(&cleanup_list_lock, flags);
289
290 queue_work(netns_wq, &net_cleanup_work);
6a1a3b9f
PE
291}
292EXPORT_SYMBOL_GPL(__put_net);
293
294#else
295struct net *copy_net_ns(unsigned long flags, struct net *old_net)
296{
297 if (flags & CLONE_NEWNET)
298 return ERR_PTR(-EINVAL);
299 return old_net;
300}
301#endif
302
30ffee84
JB
303struct net *get_net_ns_by_pid(pid_t pid)
304{
305 struct task_struct *tsk;
306 struct net *net;
307
308 /* Lookup the network namespace */
309 net = ERR_PTR(-ESRCH);
310 rcu_read_lock();
311 tsk = find_task_by_vpid(pid);
312 if (tsk) {
313 struct nsproxy *nsproxy;
314 nsproxy = task_nsproxy(tsk);
315 if (nsproxy)
316 net = get_net(nsproxy->net_ns);
317 }
318 rcu_read_unlock();
319 return net;
320}
321EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
322
5f256bec
EB
323static int __init net_ns_init(void)
324{
486a87f1 325 struct net_generic *ng;
5f256bec 326
d57a9212 327#ifdef CONFIG_NET_NS
5f256bec
EB
328 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
329 SMP_CACHE_BYTES,
330 SLAB_PANIC, NULL);
3ef1355d
BT
331
332 /* Create workqueue for cleanup */
333 netns_wq = create_singlethread_workqueue("netns");
334 if (!netns_wq)
335 panic("Could not create netns workq");
d57a9212 336#endif
3ef1355d 337
486a87f1
DL
338 ng = net_alloc_generic();
339 if (!ng)
340 panic("Could not allocate generic netns");
341
342 rcu_assign_pointer(init_net.gen, ng);
343
5f256bec 344 mutex_lock(&net_mutex);
ca0f3112
SH
345 if (setup_net(&init_net))
346 panic("Could not setup the initial network namespace");
5f256bec 347
f4618d39 348 rtnl_lock();
11a28d37 349 list_add_tail_rcu(&init_net.list, &net_namespace_list);
f4618d39 350 rtnl_unlock();
5f256bec
EB
351
352 mutex_unlock(&net_mutex);
5f256bec
EB
353
354 return 0;
355}
356
357pure_initcall(net_ns_init);
358
ed160e83 359#ifdef CONFIG_NET_NS
f875bae0
EB
360static int __register_pernet_operations(struct list_head *list,
361 struct pernet_operations *ops)
5f256bec 362{
72ad937a 363 struct net *net;
5f256bec 364 int error;
72ad937a 365 LIST_HEAD(net_exit_list);
5f256bec 366
5f256bec 367 list_add_tail(&ops->list, list);
f875bae0 368 if (ops->init || (ops->id && ops->size)) {
1dba323b 369 for_each_net(net) {
f875bae0 370 error = ops_init(ops, net);
5f256bec
EB
371 if (error)
372 goto out_undo;
72ad937a 373 list_add_tail(&net->exit_list, &net_exit_list);
5f256bec
EB
374 }
375 }
1dba323b 376 return 0;
5f256bec
EB
377
378out_undo:
379 /* If I have an error cleanup all namespaces I initialized */
380 list_del(&ops->list);
72ad937a
EB
381 ops_exit_list(ops, &net_exit_list);
382 ops_free_list(ops, &net_exit_list);
1dba323b 383 return error;
5f256bec
EB
384}
385
f875bae0 386static void __unregister_pernet_operations(struct pernet_operations *ops)
5f256bec
EB
387{
388 struct net *net;
72ad937a 389 LIST_HEAD(net_exit_list);
5f256bec
EB
390
391 list_del(&ops->list);
72ad937a
EB
392 for_each_net(net)
393 list_add_tail(&net->exit_list, &net_exit_list);
394 ops_exit_list(ops, &net_exit_list);
395 ops_free_list(ops, &net_exit_list);
5f256bec
EB
396}
397
ed160e83
DL
398#else
399
f875bae0
EB
400static int __register_pernet_operations(struct list_head *list,
401 struct pernet_operations *ops)
ed160e83 402{
f875bae0
EB
403 int err = 0;
404 err = ops_init(ops, &init_net);
405 if (err)
406 ops_free(ops, &init_net);
407 return err;
408
ed160e83
DL
409}
410
f875bae0 411static void __unregister_pernet_operations(struct pernet_operations *ops)
ed160e83 412{
72ad937a
EB
413 LIST_HEAD(net_exit_list);
414 list_add(&init_net.exit_list, &net_exit_list);
415 ops_exit_list(ops, &net_exit_list);
416 ops_free_list(ops, &net_exit_list);
ed160e83 417}
f875bae0
EB
418
419#endif /* CONFIG_NET_NS */
ed160e83 420
c93cf61f
PE
421static DEFINE_IDA(net_generic_ids);
422
f875bae0
EB
423static int register_pernet_operations(struct list_head *list,
424 struct pernet_operations *ops)
425{
426 int error;
427
428 if (ops->id) {
429again:
430 error = ida_get_new_above(&net_generic_ids, 1, ops->id);
431 if (error < 0) {
432 if (error == -EAGAIN) {
433 ida_pre_get(&net_generic_ids, GFP_KERNEL);
434 goto again;
435 }
436 return error;
437 }
438 }
439 error = __register_pernet_operations(list, ops);
440 if (error && ops->id)
441 ida_remove(&net_generic_ids, *ops->id);
442
443 return error;
444}
445
446static void unregister_pernet_operations(struct pernet_operations *ops)
447{
448
449 __unregister_pernet_operations(ops);
450 if (ops->id)
451 ida_remove(&net_generic_ids, *ops->id);
452}
453
5f256bec
EB
454/**
455 * register_pernet_subsys - register a network namespace subsystem
456 * @ops: pernet operations structure for the subsystem
457 *
458 * Register a subsystem which has init and exit functions
459 * that are called when network namespaces are created and
460 * destroyed respectively.
461 *
462 * When registered all network namespace init functions are
463 * called for every existing network namespace. Allowing kernel
464 * modules to have a race free view of the set of network namespaces.
465 *
466 * When a new network namespace is created all of the init
467 * methods are called in the order in which they were registered.
468 *
469 * When a network namespace is destroyed all of the exit methods
470 * are called in the reverse of the order with which they were
471 * registered.
472 */
473int register_pernet_subsys(struct pernet_operations *ops)
474{
475 int error;
476 mutex_lock(&net_mutex);
477 error = register_pernet_operations(first_device, ops);
478 mutex_unlock(&net_mutex);
479 return error;
480}
481EXPORT_SYMBOL_GPL(register_pernet_subsys);
482
483/**
484 * unregister_pernet_subsys - unregister a network namespace subsystem
485 * @ops: pernet operations structure to manipulate
486 *
487 * Remove the pernet operations structure from the list to be
53379e57 488 * used when network namespaces are created or destroyed. In
5f256bec
EB
489 * addition run the exit method for all existing network
490 * namespaces.
491 */
492void unregister_pernet_subsys(struct pernet_operations *module)
493{
494 mutex_lock(&net_mutex);
495 unregister_pernet_operations(module);
496 mutex_unlock(&net_mutex);
497}
498EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
499
500/**
501 * register_pernet_device - register a network namespace device
502 * @ops: pernet operations structure for the subsystem
503 *
504 * Register a device which has init and exit functions
505 * that are called when network namespaces are created and
506 * destroyed respectively.
507 *
508 * When registered all network namespace init functions are
509 * called for every existing network namespace. Allowing kernel
510 * modules to have a race free view of the set of network namespaces.
511 *
512 * When a new network namespace is created all of the init
513 * methods are called in the order in which they were registered.
514 *
515 * When a network namespace is destroyed all of the exit methods
516 * are called in the reverse of the order with which they were
517 * registered.
518 */
519int register_pernet_device(struct pernet_operations *ops)
520{
521 int error;
522 mutex_lock(&net_mutex);
523 error = register_pernet_operations(&pernet_list, ops);
524 if (!error && (first_device == &pernet_list))
525 first_device = &ops->list;
526 mutex_unlock(&net_mutex);
527 return error;
528}
529EXPORT_SYMBOL_GPL(register_pernet_device);
530
531/**
532 * unregister_pernet_device - unregister a network namespace netdevice
533 * @ops: pernet operations structure to manipulate
534 *
535 * Remove the pernet operations structure from the list to be
53379e57 536 * used when network namespaces are created or destroyed. In
5f256bec
EB
537 * addition run the exit method for all existing network
538 * namespaces.
539 */
540void unregister_pernet_device(struct pernet_operations *ops)
541{
542 mutex_lock(&net_mutex);
543 if (&ops->list == first_device)
544 first_device = first_device->next;
545 unregister_pernet_operations(ops);
546 mutex_unlock(&net_mutex);
547}
548EXPORT_SYMBOL_GPL(unregister_pernet_device);
c93cf61f 549
dec827d1
PE
550static void net_generic_release(struct rcu_head *rcu)
551{
552 struct net_generic *ng;
553
554 ng = container_of(rcu, struct net_generic, rcu);
555 kfree(ng);
556}
557
558int net_assign_generic(struct net *net, int id, void *data)
559{
560 struct net_generic *ng, *old_ng;
561
562 BUG_ON(!mutex_is_locked(&net_mutex));
563 BUG_ON(id == 0);
564
565 ng = old_ng = net->gen;
566 if (old_ng->len >= id)
567 goto assign;
568
569 ng = kzalloc(sizeof(struct net_generic) +
570 id * sizeof(void *), GFP_KERNEL);
571 if (ng == NULL)
572 return -ENOMEM;
573
574 /*
575 * Some synchronisation notes:
576 *
577 * The net_generic explores the net->gen array inside rcu
578 * read section. Besides once set the net->gen->ptr[x]
579 * pointer never changes (see rules in netns/generic.h).
580 *
581 * That said, we simply duplicate this array and schedule
582 * the old copy for kfree after a grace period.
583 */
584
585 ng->len = id;
14458630 586 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
dec827d1
PE
587
588 rcu_assign_pointer(net->gen, ng);
589 call_rcu(&old_ng->rcu, net_generic_release);
590assign:
591 ng->ptr[id - 1] = data;
592 return 0;
593}
594EXPORT_SYMBOL_GPL(net_assign_generic);
This page took 0.292472 seconds and 5 git commands to generate.