2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
10 * ip_vs_sync: sync connection info from master load balancer to backups
14 * Alexandre Cassen : Added master & backup support at a time.
15 * Alexandre Cassen : Added SyncID support for incoming sync
17 * Justin Ossevoort : Fix endian problem on sync message size.
20 #define KMSG_COMPONENT "IPVS"
21 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/inetdevice.h>
26 #include <linux/net.h>
27 #include <linux/completion.h>
28 #include <linux/delay.h>
29 #include <linux/skbuff.h>
31 #include <linux/igmp.h> /* for ip_mc_join_group */
32 #include <linux/udp.h>
33 #include <linux/err.h>
34 #include <linux/kthread.h>
35 #include <linux/wait.h>
36 #include <linux/kernel.h>
41 #include <net/ip_vs.h>
43 #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
44 #define IP_VS_SYNC_PORT 8848 /* multicast port */
48 * IPVS sync connection entry
50 struct ip_vs_sync_conn
{
53 /* Protocol, addresses and port numbers */
54 __u8 protocol
; /* Which protocol (TCP/UDP) */
58 __be32 caddr
; /* client address */
59 __be32 vaddr
; /* virtual address */
60 __be32 daddr
; /* destination address */
62 /* Flags and state transition */
63 __be16 flags
; /* status flags */
64 __be16 state
; /* state info */
66 /* The sequence options start here */
69 struct ip_vs_sync_conn_options
{
70 struct ip_vs_seq in_seq
; /* incoming seq. struct */
71 struct ip_vs_seq out_seq
; /* outgoing seq. struct */
74 struct ip_vs_sync_thread_data
{
79 #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
80 #define FULL_CONN_SIZE \
81 (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
85 The master mulitcasts messages to the backup load balancers in the
89 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
90 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
91 | Count Conns | SyncID | Size |
92 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
94 | IPVS Sync Connection (1) |
95 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
99 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
101 | IPVS Sync Connection (n) |
102 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
105 #define SYNC_MESG_HEADER_LEN 4
106 #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
108 struct ip_vs_sync_mesg
{
113 /* ip_vs_sync_conn entries start here */
116 /* the maximum length of sync (sending/receiving) message */
117 static int sync_send_mesg_maxlen
;
118 static int sync_recv_mesg_maxlen
;
120 struct ip_vs_sync_buff
{
121 struct list_head list
;
122 unsigned long firstuse
;
124 /* pointers for the message data */
125 struct ip_vs_sync_mesg
*mesg
;
131 /* the sync_buff list head and the lock */
132 static LIST_HEAD(ip_vs_sync_queue
);
133 static DEFINE_SPINLOCK(ip_vs_sync_lock
);
135 /* current sync_buff for accepting new conn entries */
136 static struct ip_vs_sync_buff
*curr_sb
= NULL
;
137 static DEFINE_SPINLOCK(curr_sb_lock
);
139 /* ipvs sync daemon state */
140 volatile int ip_vs_sync_state
= IP_VS_STATE_NONE
;
141 volatile int ip_vs_master_syncid
= 0;
142 volatile int ip_vs_backup_syncid
= 0;
144 /* multicast interface name */
145 char ip_vs_master_mcast_ifn
[IP_VS_IFNAME_MAXLEN
];
146 char ip_vs_backup_mcast_ifn
[IP_VS_IFNAME_MAXLEN
];
148 /* sync daemon tasks */
149 static struct task_struct
*sync_master_thread
;
150 static struct task_struct
*sync_backup_thread
;
153 static struct sockaddr_in mcast_addr
= {
154 .sin_family
= AF_INET
,
155 .sin_port
= cpu_to_be16(IP_VS_SYNC_PORT
),
156 .sin_addr
.s_addr
= cpu_to_be32(IP_VS_SYNC_GROUP
),
160 static inline struct ip_vs_sync_buff
*sb_dequeue(void)
162 struct ip_vs_sync_buff
*sb
;
164 spin_lock_bh(&ip_vs_sync_lock
);
165 if (list_empty(&ip_vs_sync_queue
)) {
168 sb
= list_entry(ip_vs_sync_queue
.next
,
169 struct ip_vs_sync_buff
,
173 spin_unlock_bh(&ip_vs_sync_lock
);
178 static inline struct ip_vs_sync_buff
* ip_vs_sync_buff_create(void)
180 struct ip_vs_sync_buff
*sb
;
182 if (!(sb
=kmalloc(sizeof(struct ip_vs_sync_buff
), GFP_ATOMIC
)))
185 if (!(sb
->mesg
=kmalloc(sync_send_mesg_maxlen
, GFP_ATOMIC
))) {
189 sb
->mesg
->nr_conns
= 0;
190 sb
->mesg
->syncid
= ip_vs_master_syncid
;
192 sb
->head
= (unsigned char *)sb
->mesg
+ 4;
193 sb
->end
= (unsigned char *)sb
->mesg
+ sync_send_mesg_maxlen
;
194 sb
->firstuse
= jiffies
;
198 static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff
*sb
)
204 static inline void sb_queue_tail(struct ip_vs_sync_buff
*sb
)
206 spin_lock(&ip_vs_sync_lock
);
207 if (ip_vs_sync_state
& IP_VS_STATE_MASTER
)
208 list_add_tail(&sb
->list
, &ip_vs_sync_queue
);
210 ip_vs_sync_buff_release(sb
);
211 spin_unlock(&ip_vs_sync_lock
);
215 * Get the current sync buffer if it has been created for more
216 * than the specified time or the specified time is zero.
218 static inline struct ip_vs_sync_buff
*
219 get_curr_sync_buff(unsigned long time
)
221 struct ip_vs_sync_buff
*sb
;
223 spin_lock_bh(&curr_sb_lock
);
224 if (curr_sb
&& (time
== 0 ||
225 time_before(jiffies
- curr_sb
->firstuse
, time
))) {
230 spin_unlock_bh(&curr_sb_lock
);
236 * Add an ip_vs_conn information into the current sync_buff.
237 * Called by ip_vs_in.
239 void ip_vs_sync_conn(struct ip_vs_conn
*cp
)
241 struct ip_vs_sync_mesg
*m
;
242 struct ip_vs_sync_conn
*s
;
245 spin_lock(&curr_sb_lock
);
247 if (!(curr_sb
=ip_vs_sync_buff_create())) {
248 spin_unlock(&curr_sb_lock
);
249 pr_err("ip_vs_sync_buff_create failed.\n");
254 len
= (cp
->flags
& IP_VS_CONN_F_SEQ_MASK
) ? FULL_CONN_SIZE
:
257 s
= (struct ip_vs_sync_conn
*)curr_sb
->head
;
260 s
->protocol
= cp
->protocol
;
261 s
->cport
= cp
->cport
;
262 s
->vport
= cp
->vport
;
263 s
->dport
= cp
->dport
;
264 s
->caddr
= cp
->caddr
.ip
;
265 s
->vaddr
= cp
->vaddr
.ip
;
266 s
->daddr
= cp
->daddr
.ip
;
267 s
->flags
= htons(cp
->flags
& ~IP_VS_CONN_F_HASHED
);
268 s
->state
= htons(cp
->state
);
269 if (cp
->flags
& IP_VS_CONN_F_SEQ_MASK
) {
270 struct ip_vs_sync_conn_options
*opt
=
271 (struct ip_vs_sync_conn_options
*)&s
[1];
272 memcpy(opt
, &cp
->in_seq
, sizeof(*opt
));
277 curr_sb
->head
+= len
;
279 /* check if there is a space for next one */
280 if (curr_sb
->head
+FULL_CONN_SIZE
> curr_sb
->end
) {
281 sb_queue_tail(curr_sb
);
284 spin_unlock(&curr_sb_lock
);
286 /* synchronize its controller if it has */
288 ip_vs_sync_conn(cp
->control
);
293 * Process received multicast message and create the corresponding
294 * ip_vs_conn entries.
296 static void ip_vs_process_message(const char *buffer
, const size_t buflen
)
298 struct ip_vs_sync_mesg
*m
= (struct ip_vs_sync_mesg
*)buffer
;
299 struct ip_vs_sync_conn
*s
;
300 struct ip_vs_sync_conn_options
*opt
;
301 struct ip_vs_conn
*cp
;
302 struct ip_vs_protocol
*pp
;
303 struct ip_vs_dest
*dest
;
307 if (buflen
< sizeof(struct ip_vs_sync_mesg
)) {
308 IP_VS_ERR_RL("sync message header too short\n");
312 /* Convert size back to host byte order */
313 m
->size
= ntohs(m
->size
);
315 if (buflen
!= m
->size
) {
316 IP_VS_ERR_RL("bogus sync message size\n");
320 /* SyncID sanity check */
321 if (ip_vs_backup_syncid
!= 0 && m
->syncid
!= ip_vs_backup_syncid
) {
322 IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
327 p
= (char *)buffer
+ sizeof(struct ip_vs_sync_mesg
);
328 for (i
=0; i
<m
->nr_conns
; i
++) {
329 unsigned flags
, state
;
331 if (p
+ SIMPLE_CONN_SIZE
> buffer
+buflen
) {
332 IP_VS_ERR_RL("bogus conn in sync message\n");
335 s
= (struct ip_vs_sync_conn
*) p
;
336 flags
= ntohs(s
->flags
) | IP_VS_CONN_F_SYNC
;
337 flags
&= ~IP_VS_CONN_F_HASHED
;
338 if (flags
& IP_VS_CONN_F_SEQ_MASK
) {
339 opt
= (struct ip_vs_sync_conn_options
*)&s
[1];
341 if (p
> buffer
+buflen
) {
342 IP_VS_ERR_RL("bogus conn options in sync message\n");
347 p
+= SIMPLE_CONN_SIZE
;
350 state
= ntohs(s
->state
);
351 if (!(flags
& IP_VS_CONN_F_TEMPLATE
)) {
352 pp
= ip_vs_proto_get(s
->protocol
);
354 IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
358 if (state
>= pp
->num_states
) {
359 IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
364 /* protocol in templates is not used for state/timeout */
367 IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
373 if (!(flags
& IP_VS_CONN_F_TEMPLATE
))
374 cp
= ip_vs_conn_in_get(AF_INET
, s
->protocol
,
375 (union nf_inet_addr
*)&s
->caddr
,
377 (union nf_inet_addr
*)&s
->vaddr
,
380 cp
= ip_vs_ct_in_get(AF_INET
, s
->protocol
,
381 (union nf_inet_addr
*)&s
->caddr
,
383 (union nf_inet_addr
*)&s
->vaddr
,
387 * Find the appropriate destination for the connection.
388 * If it is not found the connection will remain unbound
391 dest
= ip_vs_find_dest(AF_INET
,
392 (union nf_inet_addr
*)&s
->daddr
,
394 (union nf_inet_addr
*)&s
->vaddr
,
397 /* Set the approprite ativity flag */
398 if (s
->protocol
== IPPROTO_TCP
) {
399 if (state
!= IP_VS_TCP_S_ESTABLISHED
)
400 flags
|= IP_VS_CONN_F_INACTIVE
;
402 flags
&= ~IP_VS_CONN_F_INACTIVE
;
403 } else if (s
->protocol
== IPPROTO_SCTP
) {
404 if (state
!= IP_VS_SCTP_S_ESTABLISHED
)
405 flags
|= IP_VS_CONN_F_INACTIVE
;
407 flags
&= ~IP_VS_CONN_F_INACTIVE
;
409 cp
= ip_vs_conn_new(AF_INET
, s
->protocol
,
410 (union nf_inet_addr
*)&s
->caddr
,
412 (union nf_inet_addr
*)&s
->vaddr
,
414 (union nf_inet_addr
*)&s
->daddr
,
418 atomic_dec(&dest
->refcnt
);
420 pr_err("ip_vs_conn_new failed\n");
423 } else if (!cp
->dest
) {
424 dest
= ip_vs_try_bind_dest(cp
);
426 atomic_dec(&dest
->refcnt
);
427 } else if ((cp
->dest
) && (cp
->protocol
== IPPROTO_TCP
) &&
428 (cp
->state
!= state
)) {
429 /* update active/inactive flag for the connection */
431 if (!(cp
->flags
& IP_VS_CONN_F_INACTIVE
) &&
432 (state
!= IP_VS_TCP_S_ESTABLISHED
)) {
433 atomic_dec(&dest
->activeconns
);
434 atomic_inc(&dest
->inactconns
);
435 cp
->flags
|= IP_VS_CONN_F_INACTIVE
;
436 } else if ((cp
->flags
& IP_VS_CONN_F_INACTIVE
) &&
437 (state
== IP_VS_TCP_S_ESTABLISHED
)) {
438 atomic_inc(&dest
->activeconns
);
439 atomic_dec(&dest
->inactconns
);
440 cp
->flags
&= ~IP_VS_CONN_F_INACTIVE
;
442 } else if ((cp
->dest
) && (cp
->protocol
== IPPROTO_SCTP
) &&
443 (cp
->state
!= state
)) {
445 if (!(cp
->flags
& IP_VS_CONN_F_INACTIVE
) &&
446 (state
!= IP_VS_SCTP_S_ESTABLISHED
)) {
447 atomic_dec(&dest
->activeconns
);
448 atomic_inc(&dest
->inactconns
);
449 cp
->flags
&= ~IP_VS_CONN_F_INACTIVE
;
454 memcpy(&cp
->in_seq
, opt
, sizeof(*opt
));
455 atomic_set(&cp
->in_pkts
, sysctl_ip_vs_sync_threshold
[0]);
457 cp
->old_state
= cp
->state
;
459 * We can not recover the right timeout for templates
460 * in all cases, we can not find the right fwmark
461 * virtual service. If needed, we can do it for
462 * non-fwmark persistent services.
464 if (!(flags
& IP_VS_CONN_F_TEMPLATE
) && pp
->timeout_table
)
465 cp
->timeout
= pp
->timeout_table
[state
];
467 cp
->timeout
= (3*60*HZ
);
474 * Setup loopback of outgoing multicasts on a sending socket
476 static void set_mcast_loop(struct sock
*sk
, u_char loop
)
478 struct inet_sock
*inet
= inet_sk(sk
);
480 /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
482 inet
->mc_loop
= loop
? 1 : 0;
487 * Specify TTL for outgoing multicasts on a sending socket
489 static void set_mcast_ttl(struct sock
*sk
, u_char ttl
)
491 struct inet_sock
*inet
= inet_sk(sk
);
493 /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
500 * Specifiy default interface for outgoing multicasts
502 static int set_mcast_if(struct sock
*sk
, char *ifname
)
504 struct net_device
*dev
;
505 struct inet_sock
*inet
= inet_sk(sk
);
507 if ((dev
= __dev_get_by_name(&init_net
, ifname
)) == NULL
)
510 if (sk
->sk_bound_dev_if
&& dev
->ifindex
!= sk
->sk_bound_dev_if
)
514 inet
->mc_index
= dev
->ifindex
;
515 /* inet->mc_addr = 0; */
523 * Set the maximum length of sync message according to the
524 * specified interface's MTU.
526 static int set_sync_mesg_maxlen(int sync_state
)
528 struct net_device
*dev
;
531 if (sync_state
== IP_VS_STATE_MASTER
) {
532 if ((dev
= __dev_get_by_name(&init_net
, ip_vs_master_mcast_ifn
)) == NULL
)
535 num
= (dev
->mtu
- sizeof(struct iphdr
) -
536 sizeof(struct udphdr
) -
537 SYNC_MESG_HEADER_LEN
- 20) / SIMPLE_CONN_SIZE
;
538 sync_send_mesg_maxlen
= SYNC_MESG_HEADER_LEN
+
539 SIMPLE_CONN_SIZE
* min(num
, MAX_CONNS_PER_SYNCBUFF
);
540 IP_VS_DBG(7, "setting the maximum length of sync sending "
541 "message %d.\n", sync_send_mesg_maxlen
);
542 } else if (sync_state
== IP_VS_STATE_BACKUP
) {
543 if ((dev
= __dev_get_by_name(&init_net
, ip_vs_backup_mcast_ifn
)) == NULL
)
546 sync_recv_mesg_maxlen
= dev
->mtu
-
547 sizeof(struct iphdr
) - sizeof(struct udphdr
);
548 IP_VS_DBG(7, "setting the maximum length of sync receiving "
549 "message %d.\n", sync_recv_mesg_maxlen
);
557 * Join a multicast group.
558 * the group is specified by a class D multicast address 224.0.0.0/8
559 * in the in_addr structure passed in as a parameter.
562 join_mcast_group(struct sock
*sk
, struct in_addr
*addr
, char *ifname
)
564 struct ip_mreqn mreq
;
565 struct net_device
*dev
;
568 memset(&mreq
, 0, sizeof(mreq
));
569 memcpy(&mreq
.imr_multiaddr
, addr
, sizeof(struct in_addr
));
571 if ((dev
= __dev_get_by_name(&init_net
, ifname
)) == NULL
)
573 if (sk
->sk_bound_dev_if
&& dev
->ifindex
!= sk
->sk_bound_dev_if
)
576 mreq
.imr_ifindex
= dev
->ifindex
;
579 ret
= ip_mc_join_group(sk
, &mreq
);
586 static int bind_mcastif_addr(struct socket
*sock
, char *ifname
)
588 struct net_device
*dev
;
590 struct sockaddr_in sin
;
592 if ((dev
= __dev_get_by_name(&init_net
, ifname
)) == NULL
)
595 addr
= inet_select_addr(dev
, 0, RT_SCOPE_UNIVERSE
);
597 pr_err("You probably need to specify IP address on "
598 "multicast interface.\n");
600 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
603 /* Now bind the socket with the address of multicast interface */
604 sin
.sin_family
= AF_INET
;
605 sin
.sin_addr
.s_addr
= addr
;
608 return sock
->ops
->bind(sock
, (struct sockaddr
*)&sin
, sizeof(sin
));
612 * Set up sending multicast socket over UDP
614 static struct socket
* make_send_sock(void)
619 /* First create a socket */
620 result
= sock_create_kern(PF_INET
, SOCK_DGRAM
, IPPROTO_UDP
, &sock
);
622 pr_err("Error during creation of socket; terminating\n");
623 return ERR_PTR(result
);
626 result
= set_mcast_if(sock
->sk
, ip_vs_master_mcast_ifn
);
628 pr_err("Error setting outbound mcast interface\n");
632 set_mcast_loop(sock
->sk
, 0);
633 set_mcast_ttl(sock
->sk
, 1);
635 result
= bind_mcastif_addr(sock
, ip_vs_master_mcast_ifn
);
637 pr_err("Error binding address of the mcast interface\n");
641 result
= sock
->ops
->connect(sock
, (struct sockaddr
*) &mcast_addr
,
642 sizeof(struct sockaddr
), 0);
644 pr_err("Error connecting to the multicast addr\n");
652 return ERR_PTR(result
);
657 * Set up receiving multicast socket over UDP
659 static struct socket
* make_receive_sock(void)
664 /* First create a socket */
665 result
= sock_create_kern(PF_INET
, SOCK_DGRAM
, IPPROTO_UDP
, &sock
);
667 pr_err("Error during creation of socket; terminating\n");
668 return ERR_PTR(result
);
671 /* it is equivalent to the REUSEADDR option in user-space */
672 sock
->sk
->sk_reuse
= 1;
674 result
= sock
->ops
->bind(sock
, (struct sockaddr
*) &mcast_addr
,
675 sizeof(struct sockaddr
));
677 pr_err("Error binding to the multicast addr\n");
681 /* join the multicast group */
682 result
= join_mcast_group(sock
->sk
,
683 (struct in_addr
*) &mcast_addr
.sin_addr
,
684 ip_vs_backup_mcast_ifn
);
686 pr_err("Error joining to the multicast group\n");
694 return ERR_PTR(result
);
699 ip_vs_send_async(struct socket
*sock
, const char *buffer
, const size_t length
)
701 struct msghdr msg
= {.msg_flags
= MSG_DONTWAIT
|MSG_NOSIGNAL
};
706 iov
.iov_base
= (void *)buffer
;
707 iov
.iov_len
= length
;
709 len
= kernel_sendmsg(sock
, &msg
, &iov
, 1, (size_t)(length
));
716 ip_vs_send_sync_msg(struct socket
*sock
, struct ip_vs_sync_mesg
*msg
)
722 /* Put size in network byte order */
723 msg
->size
= htons(msg
->size
);
725 if (ip_vs_send_async(sock
, (char *)msg
, msize
) != msize
)
726 pr_err("ip_vs_send_async error\n");
730 ip_vs_receive(struct socket
*sock
, char *buffer
, const size_t buflen
)
732 struct msghdr msg
= {NULL
,};
738 /* Receive a packet */
739 iov
.iov_base
= buffer
;
740 iov
.iov_len
= (size_t)buflen
;
742 len
= kernel_recvmsg(sock
, &msg
, &iov
, 1, buflen
, 0);
752 static int sync_thread_master(void *data
)
754 struct ip_vs_sync_thread_data
*tinfo
= data
;
755 struct ip_vs_sync_buff
*sb
;
757 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
759 ip_vs_master_mcast_ifn
, ip_vs_master_syncid
);
761 while (!kthread_should_stop()) {
762 while ((sb
= sb_dequeue())) {
763 ip_vs_send_sync_msg(tinfo
->sock
, sb
->mesg
);
764 ip_vs_sync_buff_release(sb
);
767 /* check if entries stay in curr_sb for 2 seconds */
768 sb
= get_curr_sync_buff(2 * HZ
);
770 ip_vs_send_sync_msg(tinfo
->sock
, sb
->mesg
);
771 ip_vs_sync_buff_release(sb
);
774 schedule_timeout_interruptible(HZ
);
777 /* clean up the sync_buff queue */
778 while ((sb
=sb_dequeue())) {
779 ip_vs_sync_buff_release(sb
);
782 /* clean up the current sync_buff */
783 if ((sb
= get_curr_sync_buff(0))) {
784 ip_vs_sync_buff_release(sb
);
787 /* release the sending multicast socket */
788 sock_release(tinfo
->sock
);
795 static int sync_thread_backup(void *data
)
797 struct ip_vs_sync_thread_data
*tinfo
= data
;
800 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
802 ip_vs_backup_mcast_ifn
, ip_vs_backup_syncid
);
804 while (!kthread_should_stop()) {
805 wait_event_interruptible(*tinfo
->sock
->sk
->sk_sleep
,
806 !skb_queue_empty(&tinfo
->sock
->sk
->sk_receive_queue
)
807 || kthread_should_stop());
809 /* do we have data now? */
810 while (!skb_queue_empty(&(tinfo
->sock
->sk
->sk_receive_queue
))) {
811 len
= ip_vs_receive(tinfo
->sock
, tinfo
->buf
,
812 sync_recv_mesg_maxlen
);
814 pr_err("receiving message error\n");
818 /* disable bottom half, because it accesses the data
819 shared by softirq while getting/creating conns */
821 ip_vs_process_message(tinfo
->buf
, len
);
826 /* release the sending multicast socket */
827 sock_release(tinfo
->sock
);
835 int start_sync_thread(int state
, char *mcast_ifn
, __u8 syncid
)
837 struct ip_vs_sync_thread_data
*tinfo
;
838 struct task_struct
**realtask
, *task
;
840 char *name
, *buf
= NULL
;
841 int (*threadfn
)(void *data
);
842 int result
= -ENOMEM
;
844 IP_VS_DBG(7, "%s(): pid %d\n", __func__
, task_pid_nr(current
));
845 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
846 sizeof(struct ip_vs_sync_conn
));
848 if (state
== IP_VS_STATE_MASTER
) {
849 if (sync_master_thread
)
852 strlcpy(ip_vs_master_mcast_ifn
, mcast_ifn
,
853 sizeof(ip_vs_master_mcast_ifn
));
854 ip_vs_master_syncid
= syncid
;
855 realtask
= &sync_master_thread
;
856 name
= "ipvs_syncmaster";
857 threadfn
= sync_thread_master
;
858 sock
= make_send_sock();
859 } else if (state
== IP_VS_STATE_BACKUP
) {
860 if (sync_backup_thread
)
863 strlcpy(ip_vs_backup_mcast_ifn
, mcast_ifn
,
864 sizeof(ip_vs_backup_mcast_ifn
));
865 ip_vs_backup_syncid
= syncid
;
866 realtask
= &sync_backup_thread
;
867 name
= "ipvs_syncbackup";
868 threadfn
= sync_thread_backup
;
869 sock
= make_receive_sock();
875 result
= PTR_ERR(sock
);
879 set_sync_mesg_maxlen(state
);
880 if (state
== IP_VS_STATE_BACKUP
) {
881 buf
= kmalloc(sync_recv_mesg_maxlen
, GFP_KERNEL
);
886 tinfo
= kmalloc(sizeof(*tinfo
), GFP_KERNEL
);
893 task
= kthread_run(threadfn
, tinfo
, name
);
895 result
= PTR_ERR(task
);
901 ip_vs_sync_state
|= state
;
903 /* increase the module use count */
904 ip_vs_use_count_inc();
919 int stop_sync_thread(int state
)
921 IP_VS_DBG(7, "%s(): pid %d\n", __func__
, task_pid_nr(current
));
923 if (state
== IP_VS_STATE_MASTER
) {
924 if (!sync_master_thread
)
927 pr_info("stopping master sync thread %d ...\n",
928 task_pid_nr(sync_master_thread
));
931 * The lock synchronizes with sb_queue_tail(), so that we don't
932 * add sync buffers to the queue, when we are already in
933 * progress of stopping the master sync daemon.
936 spin_lock_bh(&ip_vs_sync_lock
);
937 ip_vs_sync_state
&= ~IP_VS_STATE_MASTER
;
938 spin_unlock_bh(&ip_vs_sync_lock
);
939 kthread_stop(sync_master_thread
);
940 sync_master_thread
= NULL
;
941 } else if (state
== IP_VS_STATE_BACKUP
) {
942 if (!sync_backup_thread
)
945 pr_info("stopping backup sync thread %d ...\n",
946 task_pid_nr(sync_backup_thread
));
948 ip_vs_sync_state
&= ~IP_VS_STATE_BACKUP
;
949 kthread_stop(sync_backup_thread
);
950 sync_backup_thread
= NULL
;
955 /* decrease the module use count */
956 ip_vs_use_count_dec();