Merge git://1984.lsi.us.es/nf-next
authorDavid S. Miller <davem@davemloft.net>
Thu, 23 Aug 2012 01:48:21 +0000 (18:48 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 23 Aug 2012 01:48:52 +0000 (18:48 -0700)
Pablo Neira Ayuso says:

====================
This is the first batch of Netfilter and IPVS updates for your
net-next tree. Mostly cleanups for the Netfilter side. They are:

* Remove unnecessary RTNL locking now that we have support
  for namespace in nf_conntrack, from Patrick McHardy.

* Cleanup to eliminate unnecessary goto in the initialization
  path of several Netfilter tables, from Jean Sacren.

* Another cleanup from Wu Fengguang, this time to PTR_RET instead
  of if IS_ERR then return PTR_ERR.

* Use list_for_each_entry_continue_rcu in nf_iterate, from
  Michael Wang.

* Add pmtu_disc sysctl option to disable PMTU in their tunneling
  transmitter, from Julian Anastasov.

* Generalize application protocol registration in IPVS and modify
  IPVS FTP helper to use it, from Julian Anastasov.

* update Kconfig. The IPVS FTP helper depends on the Netfilter FTP
  helper for NAT support, from Julian Anastasov.

* Add logic to update PMTU for IPIP packets in IPVS, again
  from Julian Anastasov.

* A couple of sparse warning fixes for IPVS and Netfilter from
  Claudiu Ghioc and Patrick McHardy respectively.

Patrick's IPv6 NAT changes will follow after this batch, I need
to flush this batch first before refreshing my tree.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
23 files changed:
include/net/ip_vs.h
net/bridge/netfilter/ebtable_filter.c
net/bridge/netfilter/ebtable_nat.c
net/ipv4/netfilter/iptable_filter.c
net/ipv4/netfilter/iptable_mangle.c
net/ipv4/netfilter/iptable_raw.c
net/ipv4/netfilter/iptable_security.c
net/ipv6/netfilter/ip6table_filter.c
net/ipv6/netfilter/ip6table_mangle.c
net/ipv6/netfilter/ip6table_raw.c
net/ipv6/netfilter/ip6table_security.c
net/netfilter/core.c
net/netfilter/ipvs/Kconfig
net/netfilter/ipvs/ip_vs_app.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_ftp.c
net/netfilter/ipvs/ip_vs_xmit.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nfnetlink_acct.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/xt_NFQUEUE.c
net/netfilter/xt_osf.c

index 95374d1696a163a75f8aa006bf99fe958df195aa..ee75ccdf5188cbf4eac840714a22b456d6700ad6 100644 (file)
@@ -808,8 +808,6 @@ struct netns_ipvs {
        struct list_head        rs_table[IP_VS_RTAB_SIZE];
        /* ip_vs_app */
        struct list_head        app_list;
-       /* ip_vs_ftp */
-       struct ip_vs_app        *ftp_app;
        /* ip_vs_proto */
        #define IP_VS_PROTO_TAB_SIZE    32      /* must be power of 2 */
        struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
@@ -890,6 +888,7 @@ struct netns_ipvs {
        unsigned int            sysctl_sync_refresh_period;
        int                     sysctl_sync_retries;
        int                     sysctl_nat_icmp_send;
+       int                     sysctl_pmtu_disc;
 
        /* ip_vs_lblc */
        int                     sysctl_lblc_expiration;
@@ -976,6 +975,11 @@ static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
        return ipvs->sysctl_sync_sock_size;
 }
 
+static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
+{
+       return ipvs->sysctl_pmtu_disc;
+}
+
 #else
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1018,6 +1022,11 @@ static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
        return 0;
 }
 
+static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
+{
+       return 1;
+}
+
 #endif
 
 /*
@@ -1179,7 +1188,8 @@ extern void ip_vs_service_net_cleanup(struct net *net);
  *      (from ip_vs_app.c)
  */
 #define IP_VS_APP_MAX_PORTS  8
-extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern struct ip_vs_app *register_ip_vs_app(struct net *net,
+                                           struct ip_vs_app *app);
 extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
 extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
index 42e6bd0945745f99ae2cb718b55efed2410f704b..3c2e9dced9e0afd8a5ed3522357eb0faf63e8289 100644 (file)
@@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 static int __net_init frame_filter_net_init(struct net *net)
 {
        net->xt.frame_filter = ebt_register_table(net, &frame_filter);
-       if (IS_ERR(net->xt.frame_filter))
-               return PTR_ERR(net->xt.frame_filter);
-       return 0;
+       return PTR_RET(net->xt.frame_filter);
 }
 
 static void __net_exit frame_filter_net_exit(struct net *net)
index 6dc2f878ae0533a58455f3b2b76f49681fb42f5f..10871bc77908ec8799a482636995771fd47a6b03 100644 (file)
@@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 static int __net_init frame_nat_net_init(struct net *net)
 {
        net->xt.frame_nat = ebt_register_table(net, &frame_nat);
-       if (IS_ERR(net->xt.frame_nat))
-               return PTR_ERR(net->xt.frame_nat);
-       return 0;
+       return PTR_RET(net->xt.frame_nat);
 }
 
 static void __net_exit frame_nat_net_exit(struct net *net)
index 851acec852d284bbe61f7fc9fd9b1cf2ab4bb1f1..6b3da5cf54e96d99054170c77a911e3af9e9d139 100644 (file)
@@ -69,9 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
        net->ipv4.iptable_filter =
                ipt_register_table(net, &packet_filter, repl);
        kfree(repl);
-       if (IS_ERR(net->ipv4.iptable_filter))
-               return PTR_ERR(net->ipv4.iptable_filter);
-       return 0;
+       return PTR_RET(net->ipv4.iptable_filter);
 }
 
 static void __net_exit iptable_filter_net_exit(struct net *net)
@@ -96,14 +94,10 @@ static int __init iptable_filter_init(void)
        filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
        if (IS_ERR(filter_ops)) {
                ret = PTR_ERR(filter_ops);
-               goto cleanup_table;
+               unregister_pernet_subsys(&iptable_filter_net_ops);
        }
 
        return ret;
-
- cleanup_table:
-       unregister_pernet_subsys(&iptable_filter_net_ops);
-       return ret;
 }
 
 static void __exit iptable_filter_fini(void)
index aef5d1fbe77dc39b5e6b951a97897e02ead47a62..85d88f20644701f63ad9b8250d9fd391c108ee55 100644 (file)
@@ -104,9 +104,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
        net->ipv4.iptable_mangle =
                ipt_register_table(net, &packet_mangler, repl);
        kfree(repl);
-       if (IS_ERR(net->ipv4.iptable_mangle))
-               return PTR_ERR(net->ipv4.iptable_mangle);
-       return 0;
+       return PTR_RET(net->ipv4.iptable_mangle);
 }
 
 static void __net_exit iptable_mangle_net_exit(struct net *net)
@@ -131,14 +129,10 @@ static int __init iptable_mangle_init(void)
        mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
        if (IS_ERR(mangle_ops)) {
                ret = PTR_ERR(mangle_ops);
-               goto cleanup_table;
+               unregister_pernet_subsys(&iptable_mangle_net_ops);
        }
 
        return ret;
-
- cleanup_table:
-       unregister_pernet_subsys(&iptable_mangle_net_ops);
-       return ret;
 }
 
 static void __exit iptable_mangle_fini(void)
index 07fb710cd722f329ea297b764f50cfeb0ad8175e..03d9696d3c6eb27b24eed32ad3fb0c4e069d9bf3 100644 (file)
@@ -48,9 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
        net->ipv4.iptable_raw =
                ipt_register_table(net, &packet_raw, repl);
        kfree(repl);
-       if (IS_ERR(net->ipv4.iptable_raw))
-               return PTR_ERR(net->ipv4.iptable_raw);
-       return 0;
+       return PTR_RET(net->ipv4.iptable_raw);
 }
 
 static void __net_exit iptable_raw_net_exit(struct net *net)
@@ -75,14 +73,10 @@ static int __init iptable_raw_init(void)
        rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
        if (IS_ERR(rawtable_ops)) {
                ret = PTR_ERR(rawtable_ops);
-               goto cleanup_table;
+               unregister_pernet_subsys(&iptable_raw_net_ops);
        }
 
        return ret;
-
- cleanup_table:
-       unregister_pernet_subsys(&iptable_raw_net_ops);
-       return ret;
 }
 
 static void __exit iptable_raw_fini(void)
index be45bdc4c60251a0936e8e5f7d0c6ea56d0e6eec..b283d8e2601abfadb80c1024fc7558862784f7b2 100644 (file)
@@ -66,10 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net)
        net->ipv4.iptable_security =
                ipt_register_table(net, &security_table, repl);
        kfree(repl);
-       if (IS_ERR(net->ipv4.iptable_security))
-               return PTR_ERR(net->ipv4.iptable_security);
-
-       return 0;
+       return PTR_RET(net->ipv4.iptable_security);
 }
 
 static void __net_exit iptable_security_net_exit(struct net *net)
index 325e59a0224ffa3f0b08e7474628d614e2d3ee49..beb5777d20437321cf19d37ad3e7c630d8f0663c 100644 (file)
@@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
        net->ipv6.ip6table_filter =
                ip6t_register_table(net, &packet_filter, repl);
        kfree(repl);
-       if (IS_ERR(net->ipv6.ip6table_filter))
-               return PTR_ERR(net->ipv6.ip6table_filter);
-       return 0;
+       return PTR_RET(net->ipv6.ip6table_filter);
 }
 
 static void __net_exit ip6table_filter_net_exit(struct net *net)
index 4d782405f125da5e7819333ce7dfadd501044c12..7431121b87dee6fa628f53d37dfa7f93c546a60b 100644 (file)
@@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
        net->ipv6.ip6table_mangle =
                ip6t_register_table(net, &packet_mangler, repl);
        kfree(repl);
-       if (IS_ERR(net->ipv6.ip6table_mangle))
-               return PTR_ERR(net->ipv6.ip6table_mangle);
-       return 0;
+       return PTR_RET(net->ipv6.ip6table_mangle);
 }
 
 static void __net_exit ip6table_mangle_net_exit(struct net *net)
index 5b9926a011bd99faff714042746f161ee58a152a..60d1bddff7a038c54c518e850c64f08745accccb 100644 (file)
@@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
        net->ipv6.ip6table_raw =
                ip6t_register_table(net, &packet_raw, repl);
        kfree(repl);
-       if (IS_ERR(net->ipv6.ip6table_raw))
-               return PTR_ERR(net->ipv6.ip6table_raw);
-       return 0;
+       return PTR_RET(net->ipv6.ip6table_raw);
 }
 
 static void __net_exit ip6table_raw_net_exit(struct net *net)
index 91aa2b4d83c9c1571d4538ad380dfae5296a180b..db155351339c7c63ed48d23233ef47919192a05c 100644 (file)
@@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
        net->ipv6.ip6table_security =
                ip6t_register_table(net, &security_table, repl);
        kfree(repl);
-       if (IS_ERR(net->ipv6.ip6table_security))
-               return PTR_ERR(net->ipv6.ip6table_security);
-
-       return 0;
+       return PTR_RET(net->ipv6.ip6table_security);
 }
 
 static void __net_exit ip6table_security_net_exit(struct net *net)
index 0bc6b60db4df2fda1a7cd2f1b57747f655d046a3..8f4b0b2b6f8033ed696671f8538a260d19e84466 100644 (file)
@@ -131,14 +131,13 @@ unsigned int nf_iterate(struct list_head *head,
                        int hook_thresh)
 {
        unsigned int verdict;
+       struct nf_hook_ops *elem = list_entry_rcu(*i, struct nf_hook_ops, list);
 
        /*
         * The caller must not block between calls to this
         * function because of risk of continuing from deleted element.
         */
-       list_for_each_continue_rcu(*i, head) {
-               struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
-
+       list_for_each_entry_continue_rcu(elem, head, list) {
                if (hook_thresh > elem->priority)
                        continue;
 
@@ -155,11 +154,14 @@ repeat:
                                continue;
                        }
 #endif
-                       if (verdict != NF_REPEAT)
+                       if (verdict != NF_REPEAT) {
+                               *i = &elem->list;
                                return verdict;
+                       }
                        goto repeat;
                }
        }
+       *i = &elem->list;
        return NF_ACCEPT;
 }
 
index f9871385a65eddca7f9b942f0c83794c5c147339..8b2cffdfdd9985e7397a0b800d5e2c28af20a4b7 100644 (file)
@@ -250,7 +250,8 @@ comment 'IPVS application helper'
 
 config IP_VS_FTP
        tristate "FTP protocol helper"
-        depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
+       depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \
+               NF_CONNTRACK_FTP
        select IP_VS_NFCT
        ---help---
          FTP is a protocol that transfers IP address and/or port number in
index 64f9e8f13207e94463240b330abc668fd607df70..9713e6e86d472f2e2aefc5c4ebfd8f95a5cda62c 100644 (file)
@@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
 }
 
 
-/*
- *     ip_vs_app registration routine
- */
-int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
+/* Register application for netns */
+struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
        struct netns_ipvs *ipvs = net_ipvs(net);
-       /* increase the module use count */
-       ip_vs_use_count_inc();
+       struct ip_vs_app *a;
+       int err = 0;
+
+       if (!ipvs)
+               return ERR_PTR(-ENOENT);
 
        mutex_lock(&__ip_vs_app_mutex);
 
-       list_add(&app->a_list, &ipvs->app_list);
+       list_for_each_entry(a, &ipvs->app_list, a_list) {
+               if (!strcmp(app->name, a->name)) {
+                       err = -EEXIST;
+                       goto out_unlock;
+               }
+       }
+       a = kmemdup(app, sizeof(*app), GFP_KERNEL);
+       if (!a) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+       INIT_LIST_HEAD(&a->incs_list);
+       list_add(&a->a_list, &ipvs->app_list);
+       /* increase the module use count */
+       ip_vs_use_count_inc();
 
+out_unlock:
        mutex_unlock(&__ip_vs_app_mutex);
 
-       return 0;
+       return err ? ERR_PTR(err) : a;
 }
 
 
@@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
  */
 void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
-       struct ip_vs_app *inc, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_app *a, *anxt, *inc, *nxt;
+
+       if (!ipvs)
+               return;
 
        mutex_lock(&__ip_vs_app_mutex);
 
-       list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-               ip_vs_app_inc_release(net, inc);
-       }
+       list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
+               if (app && strcmp(app->name, a->name))
+                       continue;
+               list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
+                       ip_vs_app_inc_release(net, inc);
+               }
 
-       list_del(&app->a_list);
+               list_del(&a->a_list);
+               kfree(a);
 
-       mutex_unlock(&__ip_vs_app_mutex);
+               /* decrease the module use count */
+               ip_vs_use_count_dec();
+       }
 
-       /* decrease the module use count */
-       ip_vs_use_count_dec();
+       mutex_unlock(&__ip_vs_app_mutex);
 }
 
 
@@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net)
 
 void __net_exit ip_vs_app_net_cleanup(struct net *net)
 {
+       unregister_ip_vs_app(net, NULL /* all */);
        proc_net_remove(net, "ip_vs_app");
 }
index b54eccef40b5cf7ecb74a1c7f1950f48d7823413..58918e20f9d5b038c2181b893b0e0458dbd3a8f2 100644 (file)
@@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
        struct ip_vs_proto_data *pd;
-       unsigned int offset, ihl, verdict;
+       unsigned int offset, offset2, ihl, verdict;
+       bool ipip;
 
        *related = 1;
 
@@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
        net = skb_net(skb);
 
+       /* Special case for errors for IPIP packets */
+       ipip = false;
+       if (cih->protocol == IPPROTO_IPIP) {
+               if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+                       return NF_ACCEPT;
+               /* Error for our IPIP must arrive at LOCAL_IN */
+               if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
+                       return NF_ACCEPT;
+               offset += cih->ihl * 4;
+               cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+               if (cih == NULL)
+                       return NF_ACCEPT; /* The packet looks wrong, ignore */
+               ipip = true;
+       }
+
        pd = ip_vs_proto_data_get(net, cih->protocol);
        if (!pd)
                return NF_ACCEPT;
@@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
        IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
                      "Checking incoming ICMP for");
 
+       offset2 = offset;
        offset += cih->ihl * 4;
 
        ip_vs_fill_iphdr(AF_INET, cih, &ciph);
-       /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
+       /* The embedded headers contain source and dest in reverse order.
+        * For IPIP this is error for request, not for reply.
+        */
+       cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1);
        if (!cp)
                return NF_ACCEPT;
 
@@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
                goto out;
        }
 
+       if (ipip) {
+               __be32 info = ic->un.gateway;
+
+               /* Update the MTU */
+               if (ic->type == ICMP_DEST_UNREACH &&
+                   ic->code == ICMP_FRAG_NEEDED) {
+                       struct ip_vs_dest *dest = cp->dest;
+                       u32 mtu = ntohs(ic->un.frag.mtu);
+
+                       /* Strip outer IP and ICMP, go to IPIP header */
+                       __skb_pull(skb, ihl + sizeof(_icmph));
+                       offset2 -= ihl + sizeof(_icmph);
+                       skb_reset_network_header(skb);
+                       IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
+                               &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
+                       rcu_read_lock();
+                       ipv4_update_pmtu(skb, dev_net(skb->dev),
+                                        mtu, 0, 0, 0, 0);
+                       rcu_read_unlock();
+                       /* Client uses PMTUD? */
+                       if (!(cih->frag_off & htons(IP_DF)))
+                               goto ignore_ipip;
+                       /* Prefer the resulting PMTU */
+                       if (dest) {
+                               spin_lock(&dest->dst_lock);
+                               if (dest->dst_cache)
+                                       mtu = dst_mtu(dest->dst_cache);
+                               spin_unlock(&dest->dst_lock);
+                       }
+                       if (mtu > 68 + sizeof(struct iphdr))
+                               mtu -= sizeof(struct iphdr);
+                       info = htonl(mtu);
+               }
+               /* Strip outer IP, ICMP and IPIP, go to IP header of
+                * original request.
+                */
+               __skb_pull(skb, offset2);
+               skb_reset_network_header(skb);
+               IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
+                       &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
+                       ic->type, ic->code, ntohl(info));
+               icmp_send(skb, ic->type, ic->code, info);
+               /* ICMP can be shorter but anyways, account it */
+               ip_vs_out_stats(cp, skb);
+
+ignore_ipip:
+               consume_skb(skb);
+               verdict = NF_STOLEN;
+               goto out;
+       }
+
        /* do the statistics and put it back */
        ip_vs_in_stats(cp, skb);
        if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
index 72bf32a84874718927a4bcbdc2e26395be00bdd8..3c601378d27e348291d174b979d8f532d9f8d774 100644 (file)
@@ -1801,6 +1801,12 @@ static struct ctl_table vs_vars[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "pmtu_disc",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
 #ifdef CONFIG_IP_VS_DEBUG
        {
                .procname       = "debug_level",
@@ -3676,7 +3682,7 @@ static void ip_vs_genl_unregister(void)
  * per netns intit/exit func.
  */
 #ifdef CONFIG_SYSCTL
-int __net_init ip_vs_control_net_init_sysctl(struct net *net)
+static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 {
        int idx;
        struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3727,6 +3733,8 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
        ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
        tbl[idx++].data = &ipvs->sysctl_sync_retries;
        tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+       ipvs->sysctl_pmtu_disc = 1;
+       tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
 
 
        ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
@@ -3744,7 +3752,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
        return 0;
 }
 
-void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
 {
        struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -3755,8 +3763,8 @@ void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
 
 #else
 
-int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
-void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
+static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
 
 #endif
 
index b20b29c903efdc0a0593f90547661446c58b47e6..ad70b7e4ac4a5cf44e91a806e40231cbd9af35fb 100644 (file)
@@ -441,16 +441,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
 
        if (!ipvs)
                return -ENOENT;
-       app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL);
-       if (!app)
-               return -ENOMEM;
-       INIT_LIST_HEAD(&app->a_list);
-       INIT_LIST_HEAD(&app->incs_list);
-       ipvs->ftp_app = app;
 
-       ret = register_ip_vs_app(net, app);
-       if (ret)
-               goto err_exit;
+       app = register_ip_vs_app(net, &ip_vs_ftp);
+       if (IS_ERR(app))
+               return PTR_ERR(app);
 
        for (i = 0; i < ports_count; i++) {
                if (!ports[i])
@@ -464,9 +458,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
        return 0;
 
 err_unreg:
-       unregister_ip_vs_app(net, app);
-err_exit:
-       kfree(ipvs->ftp_app);
+       unregister_ip_vs_app(net, &ip_vs_ftp);
        return ret;
 }
 /*
@@ -474,10 +466,7 @@ err_exit:
  */
 static void __ip_vs_ftp_exit(struct net *net)
 {
-       struct netns_ipvs *ipvs = net_ipvs(net);
-
-       unregister_ip_vs_app(net, ipvs->ftp_app);
-       kfree(ipvs->ftp_app);
+       unregister_ip_vs_app(net, &ip_vs_ftp);
 }
 
 static struct pernet_operations ip_vs_ftp_ops = {
index 65b616ae1716366f6437b9e760b22aabe7f9b4d1..543a554008aff712f7685c24faf8374d67b663d4 100644 (file)
@@ -49,6 +49,7 @@ enum {
        IP_VS_RT_MODE_RDR       = 4, /* Allow redirect from remote daddr to
                                      * local
                                      */
+       IP_VS_RT_MODE_CONNECT   = 8, /* Always bind route to saddr */
 };
 
 /*
@@ -84,6 +85,42 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
        return dst;
 }
 
+/* Get route to daddr, update *saddr, optionally bind route to saddr */
+static struct rtable *do_output_route4(struct net *net, __be32 daddr,
+                                      u32 rtos, int rt_mode, __be32 *saddr)
+{
+       struct flowi4 fl4;
+       struct rtable *rt;
+       int loop = 0;
+
+       memset(&fl4, 0, sizeof(fl4));
+       fl4.daddr = daddr;
+       fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
+       fl4.flowi4_tos = rtos;
+
+retry:
+       rt = ip_route_output_key(net, &fl4);
+       if (IS_ERR(rt)) {
+               /* Invalid saddr ? */
+               if (PTR_ERR(rt) == -EINVAL && *saddr &&
+                   rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
+                       *saddr = 0;
+                       flowi4_update_output(&fl4, 0, rtos, daddr, 0);
+                       goto retry;
+               }
+               IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
+               return NULL;
+       } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
+               ip_rt_put(rt);
+               *saddr = fl4.saddr;
+               flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
+               loop++;
+               goto retry;
+       }
+       *saddr = fl4.saddr;
+       return rt;
+}
+
 /* Get route to destination or remote server */
 static struct rtable *
 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
@@ -98,20 +135,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
                spin_lock(&dest->dst_lock);
                if (!(rt = (struct rtable *)
                      __ip_vs_dst_check(dest, rtos))) {
-                       struct flowi4 fl4;
-
-                       memset(&fl4, 0, sizeof(fl4));
-                       fl4.daddr = dest->addr.ip;
-                       fl4.flowi4_tos = rtos;
-                       rt = ip_route_output_key(net, &fl4);
-                       if (IS_ERR(rt)) {
+                       rt = do_output_route4(net, dest->addr.ip, rtos,
+                                             rt_mode, &dest->dst_saddr.ip);
+                       if (!rt) {
                                spin_unlock(&dest->dst_lock);
-                               IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
-                                            &dest->addr.ip);
                                return NULL;
                        }
                        __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
-                       dest->dst_saddr.ip = fl4.saddr;
                        IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
                                  "rtos=%X\n",
                                  &dest->addr.ip, &dest->dst_saddr.ip,
@@ -122,19 +152,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
                        *ret_saddr = dest->dst_saddr.ip;
                spin_unlock(&dest->dst_lock);
        } else {
-               struct flowi4 fl4;
+               __be32 saddr = htonl(INADDR_ANY);
 
-               memset(&fl4, 0, sizeof(fl4));
-               fl4.daddr = daddr;
-               fl4.flowi4_tos = rtos;
-               rt = ip_route_output_key(net, &fl4);
-               if (IS_ERR(rt)) {
-                       IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
-                                    &daddr);
+               /* For such unconfigured boxes avoid many route lookups
+                * for performance reasons because we do not remember saddr
+                */
+               rt_mode &= ~IP_VS_RT_MODE_CONNECT;
+               rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
+               if (!rt)
                        return NULL;
-               }
                if (ret_saddr)
-                       *ret_saddr = fl4.saddr;
+                       *ret_saddr = saddr;
        }
 
        local = rt->rt_flags & RTCF_LOCAL;
@@ -331,6 +359,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
        old_dst = dest->dst_cache;
        dest->dst_cache = NULL;
        dst_release(old_dst);
+       dest->dst_saddr.ip = 0;
 }
 
 #define IP_VS_XMIT_TUNNEL(skb, cp)                             \
@@ -766,12 +795,13 @@ int
 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                  struct ip_vs_protocol *pp)
 {
+       struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
        struct rtable *rt;                      /* Route to the other host */
        __be32 saddr;                           /* Source for tunnel */
        struct net_device *tdev;                /* Device to other host */
        struct iphdr  *old_iph = ip_hdr(skb);
        u8     tos = old_iph->tos;
-       __be16 df = old_iph->frag_off;
+       __be16 df;
        struct iphdr  *iph;                     /* Our new IP header */
        unsigned int max_headroom;              /* The extra header space needed */
        int    mtu;
@@ -781,7 +811,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                      RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
-                                                  IP_VS_RT_MODE_NON_LOCAL,
+                                                  IP_VS_RT_MODE_NON_LOCAL |
+                                                  IP_VS_RT_MODE_CONNECT,
                                                   &saddr)))
                goto tx_error_icmp;
        if (rt->rt_flags & RTCF_LOCAL) {
@@ -796,13 +827,13 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
                goto tx_error_put;
        }
-       if (skb_dst(skb))
+       if (rt_is_output_route(skb_rtable(skb)))
                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
-       df |= (old_iph->frag_off & htons(IP_DF));
+       /* Copy DF, reset fragment offset and MF */
+       df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
 
-       if ((old_iph->frag_off & htons(IP_DF) &&
-           mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
+       if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
                goto tx_error_put;
index 0dc63854390f70f738b85df4c41bbbd2a9f314da..51e928db48c846f469da93ed70ed072807f3359f 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/notifier.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-#include <linux/rtnetlink.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
@@ -294,9 +293,7 @@ void nf_conntrack_l3proto_unregister(struct net *net,
        nf_ct_l3proto_unregister_sysctl(net, proto);
 
        /* Remove all contrack entries for this protocol */
-       rtnl_lock();
        nf_ct_iterate_cleanup(net, kill_l3proto, proto);
-       rtnl_unlock();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
 
@@ -502,9 +499,7 @@ void nf_conntrack_l4proto_unregister(struct net *net,
        nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
 
        /* Remove all contrack entries for this protocol */
-       rtnl_lock();
        nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
-       rtnl_unlock();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
 
index b2e7310ca0b8e05d9835c4898f9670993d5ebed3..d7ec928790717ef6d1f81d26a0e65279615806eb 100644 (file)
@@ -79,11 +79,11 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 
        if (tb[NFACCT_BYTES]) {
                atomic64_set(&nfacct->bytes,
-                            be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES])));
+                            be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES])));
        }
        if (tb[NFACCT_PKTS]) {
                atomic64_set(&nfacct->pkts,
-                            be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS])));
+                            be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
        }
        atomic_set(&nfacct->refcnt, 1);
        list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
index d6836193d479a00b155884a259a6d1f2f0245a4c..32a1ba3f3e27dd9b648d918e8b72a31b7ef2a53e 100644 (file)
@@ -74,7 +74,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
        if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
                return -EINVAL;
 
-       tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM]));
+       tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
        tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
 
        return 0;
index 7babe7d687169d6231fa17149bfd10cb52ee7ebe..817f9e9f2b16c70930df5e51d62f5fd606e76f69 100644 (file)
@@ -43,7 +43,7 @@ static u32 hash_v4(const struct sk_buff *skb)
        const struct iphdr *iph = ip_hdr(skb);
 
        /* packets in either direction go into same queue */
-       if (iph->saddr < iph->daddr)
+       if ((__force u32)iph->saddr < (__force u32)iph->daddr)
                return jhash_3words((__force u32)iph->saddr,
                        (__force u32)iph->daddr, iph->protocol, jhash_initval);
 
@@ -57,7 +57,8 @@ static u32 hash_v6(const struct sk_buff *skb)
        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
        u32 a, b, c;
 
-       if (ip6h->saddr.s6_addr32[3] < ip6h->daddr.s6_addr32[3]) {
+       if ((__force u32)ip6h->saddr.s6_addr32[3] <
+           (__force u32)ip6h->daddr.s6_addr32[3]) {
                a = (__force u32) ip6h->saddr.s6_addr32[3];
                b = (__force u32) ip6h->daddr.s6_addr32[3];
        } else {
@@ -65,7 +66,8 @@ static u32 hash_v6(const struct sk_buff *skb)
                a = (__force u32) ip6h->daddr.s6_addr32[3];
        }
 
-       if (ip6h->saddr.s6_addr32[1] < ip6h->daddr.s6_addr32[1])
+       if ((__force u32)ip6h->saddr.s6_addr32[1] <
+           (__force u32)ip6h->daddr.s6_addr32[1])
                c = (__force u32) ip6h->saddr.s6_addr32[1];
        else
                c = (__force u32) ip6h->daddr.s6_addr32[1];
index 846f895cb656ddf48989d28900c55b2bae9f475e..a5e673d32bdaec2dec9fb7fce26905fff1459b77 100644 (file)
@@ -269,7 +269,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
                                                mss <<= 8;
                                                mss |= optp[2];
 
-                                               mss = ntohs(mss);
+                                               mss = ntohs((__force __be16)mss);
                                                break;
                                        case OSFOPT_TS:
                                                loop_cont = 1;
This page took 0.043404 seconds and 5 git commands to generate.