IPVS: netns awareness to ip_vs_sync
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
blob03f86312b4bbb6bdaaad592e11354cf56446f36b
1 /*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
17 * Changes:
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
51 #include <asm/uaccess.h>
53 #include <net/ip_vs.h>
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
61 /* lock for table with the real services */
62 static DEFINE_RWLOCK(__ip_vs_rs_lock);
64 /* lock for state and timeout tables */
65 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
67 /* lock for drop entry handling */
68 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
70 /* lock for drop packet handling */
71 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
73 /* 1/rate drop and drop-entry variables */
74 int ip_vs_drop_rate = 0;
75 int ip_vs_drop_counter = 0;
76 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
78 /* number of virtual services */
79 static int ip_vs_num_services = 0;
81 /* sysctl variables */
82 static int sysctl_ip_vs_drop_entry = 0;
83 static int sysctl_ip_vs_drop_packet = 0;
84 static int sysctl_ip_vs_secure_tcp = 0;
85 static int sysctl_ip_vs_amemthresh = 1024;
86 static int sysctl_ip_vs_am_droprate = 10;
87 int sysctl_ip_vs_cache_bypass = 0;
88 int sysctl_ip_vs_expire_nodest_conn = 0;
89 int sysctl_ip_vs_expire_quiescent_template = 0;
90 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
91 int sysctl_ip_vs_nat_icmp_send = 0;
92 #ifdef CONFIG_IP_VS_NFCT
93 int sysctl_ip_vs_conntrack;
94 #endif
95 int sysctl_ip_vs_snat_reroute = 1;
96 int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
98 #ifdef CONFIG_IP_VS_DEBUG
99 static int sysctl_ip_vs_debug_level = 0;
101 int ip_vs_get_debug_level(void)
103 return sysctl_ip_vs_debug_level;
105 #endif
107 #ifdef CONFIG_IP_VS_IPV6
108 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
109 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
111 struct rt6_info *rt;
112 struct flowi fl = {
113 .oif = 0,
114 .fl6_dst = *addr,
115 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
118 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
119 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
120 return 1;
122 return 0;
124 #endif
126 * update_defense_level is called from keventd and from sysctl,
127 * so it needs to protect itself from softirqs
129 static void update_defense_level(struct netns_ipvs *ipvs)
131 struct sysinfo i;
132 static int old_secure_tcp = 0;
133 int availmem;
134 int nomem;
135 int to_change = -1;
137 /* we only count free and buffered memory (in pages) */
138 si_meminfo(&i);
139 availmem = i.freeram + i.bufferram;
140 /* however in linux 2.5 the i.bufferram is total page cache size,
141 we need adjust it */
142 /* si_swapinfo(&i); */
143 /* availmem = availmem - (i.totalswap - i.freeswap); */
145 nomem = (availmem < sysctl_ip_vs_amemthresh);
147 local_bh_disable();
149 /* drop_entry */
150 spin_lock(&__ip_vs_dropentry_lock);
151 switch (sysctl_ip_vs_drop_entry) {
152 case 0:
153 atomic_set(&ip_vs_dropentry, 0);
154 break;
155 case 1:
156 if (nomem) {
157 atomic_set(&ip_vs_dropentry, 1);
158 sysctl_ip_vs_drop_entry = 2;
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
162 break;
163 case 2:
164 if (nomem) {
165 atomic_set(&ip_vs_dropentry, 1);
166 } else {
167 atomic_set(&ip_vs_dropentry, 0);
168 sysctl_ip_vs_drop_entry = 1;
170 break;
171 case 3:
172 atomic_set(&ip_vs_dropentry, 1);
173 break;
175 spin_unlock(&__ip_vs_dropentry_lock);
177 /* drop_packet */
178 spin_lock(&__ip_vs_droppacket_lock);
179 switch (sysctl_ip_vs_drop_packet) {
180 case 0:
181 ip_vs_drop_rate = 0;
182 break;
183 case 1:
184 if (nomem) {
185 ip_vs_drop_rate = ip_vs_drop_counter
186 = sysctl_ip_vs_amemthresh /
187 (sysctl_ip_vs_amemthresh-availmem);
188 sysctl_ip_vs_drop_packet = 2;
189 } else {
190 ip_vs_drop_rate = 0;
192 break;
193 case 2:
194 if (nomem) {
195 ip_vs_drop_rate = ip_vs_drop_counter
196 = sysctl_ip_vs_amemthresh /
197 (sysctl_ip_vs_amemthresh-availmem);
198 } else {
199 ip_vs_drop_rate = 0;
200 sysctl_ip_vs_drop_packet = 1;
202 break;
203 case 3:
204 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
205 break;
207 spin_unlock(&__ip_vs_droppacket_lock);
209 /* secure_tcp */
210 spin_lock(&ip_vs_securetcp_lock);
211 switch (sysctl_ip_vs_secure_tcp) {
212 case 0:
213 if (old_secure_tcp >= 2)
214 to_change = 0;
215 break;
216 case 1:
217 if (nomem) {
218 if (old_secure_tcp < 2)
219 to_change = 1;
220 sysctl_ip_vs_secure_tcp = 2;
221 } else {
222 if (old_secure_tcp >= 2)
223 to_change = 0;
225 break;
226 case 2:
227 if (nomem) {
228 if (old_secure_tcp < 2)
229 to_change = 1;
230 } else {
231 if (old_secure_tcp >= 2)
232 to_change = 0;
233 sysctl_ip_vs_secure_tcp = 1;
235 break;
236 case 3:
237 if (old_secure_tcp < 2)
238 to_change = 1;
239 break;
241 old_secure_tcp = sysctl_ip_vs_secure_tcp;
242 if (to_change >= 0)
243 ip_vs_protocol_timeout_change(ipvs,
244 sysctl_ip_vs_secure_tcp > 1);
245 spin_unlock(&ip_vs_securetcp_lock);
247 local_bh_enable();
252 * Timer for checking the defense
254 #define DEFENSE_TIMER_PERIOD 1*HZ
255 static void defense_work_handler(struct work_struct *work);
256 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
258 static void defense_work_handler(struct work_struct *work)
260 struct net *net = &init_net;
261 struct netns_ipvs *ipvs = net_ipvs(net);
263 update_defense_level(ipvs);
264 if (atomic_read(&ip_vs_dropentry))
265 ip_vs_random_dropentry();
267 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
271 ip_vs_use_count_inc(void)
273 return try_module_get(THIS_MODULE);
276 void
277 ip_vs_use_count_dec(void)
279 module_put(THIS_MODULE);
284 * Hash table: for virtual service lookups
286 #define IP_VS_SVC_TAB_BITS 8
287 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
288 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
290 /* the service table hashed by <protocol, addr, port> */
291 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
292 /* the service table hashed by fwmark */
293 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
296 * Trash for destinations
298 static LIST_HEAD(ip_vs_dest_trash);
301 * FTP & NULL virtual service counters
303 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
304 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
308 * Returns hash value for virtual service
310 static inline unsigned
311 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
312 const union nf_inet_addr *addr, __be16 port)
314 register unsigned porth = ntohs(port);
315 __be32 addr_fold = addr->ip;
317 #ifdef CONFIG_IP_VS_IPV6
318 if (af == AF_INET6)
319 addr_fold = addr->ip6[0]^addr->ip6[1]^
320 addr->ip6[2]^addr->ip6[3];
321 #endif
322 addr_fold ^= ((size_t)net>>8);
324 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
325 & IP_VS_SVC_TAB_MASK;
329 * Returns hash value of fwmark for virtual service lookup
331 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
333 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
337 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
338 * or in the ip_vs_svc_fwm_table by fwmark.
339 * Should be called with locked tables.
341 static int ip_vs_svc_hash(struct ip_vs_service *svc)
343 unsigned hash;
345 if (svc->flags & IP_VS_SVC_F_HASHED) {
346 pr_err("%s(): request for already hashed, called from %pF\n",
347 __func__, __builtin_return_address(0));
348 return 0;
351 if (svc->fwmark == 0) {
353 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
355 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
356 &svc->addr, svc->port);
357 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
358 } else {
360 * Hash it by fwmark in svc_fwm_table
362 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
363 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
366 svc->flags |= IP_VS_SVC_F_HASHED;
367 /* increase its refcnt because it is referenced by the svc table */
368 atomic_inc(&svc->refcnt);
369 return 1;
374 * Unhashes a service from svc_table / svc_fwm_table.
375 * Should be called with locked tables.
377 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
379 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
380 pr_err("%s(): request for unhash flagged, called from %pF\n",
381 __func__, __builtin_return_address(0));
382 return 0;
385 if (svc->fwmark == 0) {
386 /* Remove it from the svc_table table */
387 list_del(&svc->s_list);
388 } else {
389 /* Remove it from the svc_fwm_table table */
390 list_del(&svc->f_list);
393 svc->flags &= ~IP_VS_SVC_F_HASHED;
394 atomic_dec(&svc->refcnt);
395 return 1;
400 * Get service by {netns, proto,addr,port} in the service table.
402 static inline struct ip_vs_service *
403 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
404 const union nf_inet_addr *vaddr, __be16 vport)
406 unsigned hash;
407 struct ip_vs_service *svc;
409 /* Check for "full" addressed entries */
410 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
412 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
413 if ((svc->af == af)
414 && ip_vs_addr_equal(af, &svc->addr, vaddr)
415 && (svc->port == vport)
416 && (svc->protocol == protocol)
417 && net_eq(svc->net, net)) {
418 /* HIT */
419 return svc;
423 return NULL;
428 * Get service by {fwmark} in the service table.
430 static inline struct ip_vs_service *
431 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
433 unsigned hash;
434 struct ip_vs_service *svc;
436 /* Check for fwmark addressed entries */
437 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
439 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
440 if (svc->fwmark == fwmark && svc->af == af
441 && net_eq(svc->net, net)) {
442 /* HIT */
443 return svc;
447 return NULL;
450 struct ip_vs_service *
451 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
452 const union nf_inet_addr *vaddr, __be16 vport)
454 struct ip_vs_service *svc;
456 read_lock(&__ip_vs_svc_lock);
459 * Check the table hashed by fwmark first
461 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
462 if (fwmark && svc)
463 goto out;
466 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries
469 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
471 if (svc == NULL
472 && protocol == IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter)
474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
476 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections.
479 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
482 if (svc == NULL
483 && atomic_read(&ip_vs_nullsvc_counter)) {
485 * Check if the catch-all port (port zero) exists
487 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
490 out:
491 if (svc)
492 atomic_inc(&svc->usecnt);
493 read_unlock(&__ip_vs_svc_lock);
495 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
496 fwmark, ip_vs_proto_name(protocol),
497 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
498 svc ? "hit" : "not hit");
500 return svc;
504 static inline void
505 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
507 atomic_inc(&svc->refcnt);
508 dest->svc = svc;
511 static void
512 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
514 struct ip_vs_service *svc = dest->svc;
516 dest->svc = NULL;
517 if (atomic_dec_and_test(&svc->refcnt)) {
518 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
519 svc->fwmark,
520 IP_VS_DBG_ADDR(svc->af, &svc->addr),
521 ntohs(svc->port), atomic_read(&svc->usecnt));
522 kfree(svc);
528 * Returns hash value for real service
530 static inline unsigned ip_vs_rs_hashkey(int af,
531 const union nf_inet_addr *addr,
532 __be16 port)
534 register unsigned porth = ntohs(port);
535 __be32 addr_fold = addr->ip;
537 #ifdef CONFIG_IP_VS_IPV6
538 if (af == AF_INET6)
539 addr_fold = addr->ip6[0]^addr->ip6[1]^
540 addr->ip6[2]^addr->ip6[3];
541 #endif
543 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
544 & IP_VS_RTAB_MASK;
548 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
549 * should be called with locked tables.
551 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
553 unsigned hash;
555 if (!list_empty(&dest->d_list)) {
556 return 0;
560 * Hash by proto,addr,port,
561 * which are the parameters of the real service.
563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
565 list_add(&dest->d_list, &ipvs->rs_table[hash]);
567 return 1;
571 * UNhashes ip_vs_dest from rs_table.
572 * should be called with locked tables.
574 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
577 * Remove it from the rs_table table.
579 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list);
581 INIT_LIST_HEAD(&dest->d_list);
584 return 1;
588 * Lookup real service by <proto,addr,port> in the real service table.
590 struct ip_vs_dest *
591 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
592 const union nf_inet_addr *daddr,
593 __be16 dport)
595 struct netns_ipvs *ipvs = net_ipvs(net);
596 unsigned hash;
597 struct ip_vs_dest *dest;
600 * Check for "full" addressed entries
601 * Return the first found entry
603 hash = ip_vs_rs_hashkey(af, daddr, dport);
605 read_lock(&__ip_vs_rs_lock);
606 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
607 if ((dest->af == af)
608 && ip_vs_addr_equal(af, &dest->addr, daddr)
609 && (dest->port == dport)
610 && ((dest->protocol == protocol) ||
611 dest->vfwmark)) {
612 /* HIT */
613 read_unlock(&__ip_vs_rs_lock);
614 return dest;
617 read_unlock(&__ip_vs_rs_lock);
619 return NULL;
623 * Lookup destination by {addr,port} in the given service
625 static struct ip_vs_dest *
626 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
627 __be16 dport)
629 struct ip_vs_dest *dest;
632 * Find the destination for the given service
634 list_for_each_entry(dest, &svc->destinations, n_list) {
635 if ((dest->af == svc->af)
636 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
637 && (dest->port == dport)) {
638 /* HIT */
639 return dest;
643 return NULL;
647 * Find destination by {daddr,dport,vaddr,protocol}
648 * Cretaed to be used in ip_vs_process_message() in
649 * the backup synchronization daemon. It finds the
650 * destination to be bound to the received connection
651 * on the backup.
653 * ip_vs_lookup_real_service() looked promissing, but
654 * seems not working as expected.
656 struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
657 const union nf_inet_addr *daddr,
658 __be16 dport,
659 const union nf_inet_addr *vaddr,
660 __be16 vport, __u16 protocol, __u32 fwmark)
662 struct ip_vs_dest *dest;
663 struct ip_vs_service *svc;
665 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
666 if (!svc)
667 return NULL;
668 dest = ip_vs_lookup_dest(svc, daddr, dport);
669 if (dest)
670 atomic_inc(&dest->refcnt);
671 ip_vs_service_put(svc);
672 return dest;
676 * Lookup dest by {svc,addr,port} in the destination trash.
677 * The destination trash is used to hold the destinations that are removed
678 * from the service table but are still referenced by some conn entries.
679 * The reason to add the destination trash is when the dest is temporary
680 * down (either by administrator or by monitor program), the dest can be
681 * picked back from the trash, the remaining connections to the dest can
682 * continue, and the counting information of the dest is also useful for
683 * scheduling.
685 static struct ip_vs_dest *
686 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
687 __be16 dport)
689 struct ip_vs_dest *dest, *nxt;
692 * Find the destination in trash
694 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
695 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
696 "dest->refcnt=%d\n",
697 dest->vfwmark,
698 IP_VS_DBG_ADDR(svc->af, &dest->addr),
699 ntohs(dest->port),
700 atomic_read(&dest->refcnt));
701 if (dest->af == svc->af &&
702 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
703 dest->port == dport &&
704 dest->vfwmark == svc->fwmark &&
705 dest->protocol == svc->protocol &&
706 (svc->fwmark ||
707 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
708 dest->vport == svc->port))) {
709 /* HIT */
710 return dest;
714 * Try to purge the destination from trash if not referenced
716 if (atomic_read(&dest->refcnt) == 1) {
717 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
718 "from trash\n",
719 dest->vfwmark,
720 IP_VS_DBG_ADDR(svc->af, &dest->addr),
721 ntohs(dest->port));
722 list_del(&dest->n_list);
723 ip_vs_dst_reset(dest);
724 __ip_vs_unbind_svc(dest);
725 kfree(dest);
729 return NULL;
734 * Clean up all the destinations in the trash
735 * Called by the ip_vs_control_cleanup()
737 * When the ip_vs_control_clearup is activated by ipvs module exit,
738 * the service tables must have been flushed and all the connections
739 * are expired, and the refcnt of each destination in the trash must
740 * be 1, so we simply release them here.
742 static void ip_vs_trash_cleanup(void)
744 struct ip_vs_dest *dest, *nxt;
746 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
747 list_del(&dest->n_list);
748 ip_vs_dst_reset(dest);
749 __ip_vs_unbind_svc(dest);
750 kfree(dest);
755 static void
756 ip_vs_zero_stats(struct ip_vs_stats *stats)
758 spin_lock_bh(&stats->lock);
760 memset(&stats->ustats, 0, sizeof(stats->ustats));
761 ip_vs_zero_estimator(stats);
763 spin_unlock_bh(&stats->lock);
767 * Update a destination in the given service
769 static void
770 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771 struct ip_vs_dest_user_kern *udest, int add)
773 struct netns_ipvs *ipvs = net_ipvs(svc->net);
774 int conn_flags;
776 /* set the weight and the flags */
777 atomic_set(&dest->weight, udest->weight);
778 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
779 conn_flags |= IP_VS_CONN_F_INACTIVE;
781 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
782 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
783 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
784 } else {
786 * Put the real service in rs_table if not present.
787 * For now only for NAT!
789 write_lock_bh(&__ip_vs_rs_lock);
790 ip_vs_rs_hash(ipvs, dest);
791 write_unlock_bh(&__ip_vs_rs_lock);
793 atomic_set(&dest->conn_flags, conn_flags);
795 /* bind the service */
796 if (!dest->svc) {
797 __ip_vs_bind_svc(dest, svc);
798 } else {
799 if (dest->svc != svc) {
800 __ip_vs_unbind_svc(dest);
801 ip_vs_zero_stats(&dest->stats);
802 __ip_vs_bind_svc(dest, svc);
806 /* set the dest status flags */
807 dest->flags |= IP_VS_DEST_F_AVAILABLE;
809 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
810 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
811 dest->u_threshold = udest->u_threshold;
812 dest->l_threshold = udest->l_threshold;
814 spin_lock(&dest->dst_lock);
815 ip_vs_dst_reset(dest);
816 spin_unlock(&dest->dst_lock);
818 if (add)
819 ip_vs_new_estimator(svc->net, &dest->stats);
821 write_lock_bh(&__ip_vs_svc_lock);
823 /* Wait until all other svc users go away */
824 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
826 if (add) {
827 list_add(&dest->n_list, &svc->destinations);
828 svc->num_dests++;
831 /* call the update_service, because server weight may be changed */
832 if (svc->scheduler->update_service)
833 svc->scheduler->update_service(svc);
835 write_unlock_bh(&__ip_vs_svc_lock);
840 * Create a destination for the given service
842 static int
843 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
844 struct ip_vs_dest **dest_p)
846 struct ip_vs_dest *dest;
847 unsigned atype;
849 EnterFunction(2);
851 #ifdef CONFIG_IP_VS_IPV6
852 if (svc->af == AF_INET6) {
853 atype = ipv6_addr_type(&udest->addr.in6);
854 if ((!(atype & IPV6_ADDR_UNICAST) ||
855 atype & IPV6_ADDR_LINKLOCAL) &&
856 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
857 return -EINVAL;
858 } else
859 #endif
861 atype = inet_addr_type(&init_net, udest->addr.ip);
862 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
863 return -EINVAL;
866 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
867 if (dest == NULL) {
868 pr_err("%s(): no memory.\n", __func__);
869 return -ENOMEM;
872 dest->af = svc->af;
873 dest->protocol = svc->protocol;
874 dest->vaddr = svc->addr;
875 dest->vport = svc->port;
876 dest->vfwmark = svc->fwmark;
877 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
878 dest->port = udest->port;
880 atomic_set(&dest->activeconns, 0);
881 atomic_set(&dest->inactconns, 0);
882 atomic_set(&dest->persistconns, 0);
883 atomic_set(&dest->refcnt, 1);
885 INIT_LIST_HEAD(&dest->d_list);
886 spin_lock_init(&dest->dst_lock);
887 spin_lock_init(&dest->stats.lock);
888 __ip_vs_update_dest(svc, dest, udest, 1);
890 *dest_p = dest;
892 LeaveFunction(2);
893 return 0;
898 * Add a destination into an existing service
900 static int
901 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
903 struct ip_vs_dest *dest;
904 union nf_inet_addr daddr;
905 __be16 dport = udest->port;
906 int ret;
908 EnterFunction(2);
910 if (udest->weight < 0) {
911 pr_err("%s(): server weight less than zero\n", __func__);
912 return -ERANGE;
915 if (udest->l_threshold > udest->u_threshold) {
916 pr_err("%s(): lower threshold is higher than upper threshold\n",
917 __func__);
918 return -ERANGE;
921 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
924 * Check if the dest already exists in the list
926 dest = ip_vs_lookup_dest(svc, &daddr, dport);
928 if (dest != NULL) {
929 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
930 return -EEXIST;
934 * Check if the dest already exists in the trash and
935 * is from the same service
937 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
939 if (dest != NULL) {
940 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
941 "dest->refcnt=%d, service %u/%s:%u\n",
942 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
943 atomic_read(&dest->refcnt),
944 dest->vfwmark,
945 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
946 ntohs(dest->vport));
949 * Get the destination from the trash
951 list_del(&dest->n_list);
953 __ip_vs_update_dest(svc, dest, udest, 1);
954 ret = 0;
955 } else {
957 * Allocate and initialize the dest structure
959 ret = ip_vs_new_dest(svc, udest, &dest);
961 LeaveFunction(2);
963 return ret;
968 * Edit a destination in the given service
970 static int
971 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
973 struct ip_vs_dest *dest;
974 union nf_inet_addr daddr;
975 __be16 dport = udest->port;
977 EnterFunction(2);
979 if (udest->weight < 0) {
980 pr_err("%s(): server weight less than zero\n", __func__);
981 return -ERANGE;
984 if (udest->l_threshold > udest->u_threshold) {
985 pr_err("%s(): lower threshold is higher than upper threshold\n",
986 __func__);
987 return -ERANGE;
990 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
993 * Lookup the destination list
995 dest = ip_vs_lookup_dest(svc, &daddr, dport);
997 if (dest == NULL) {
998 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
999 return -ENOENT;
1002 __ip_vs_update_dest(svc, dest, udest, 0);
1003 LeaveFunction(2);
1005 return 0;
1010 * Delete a destination (must be already unlinked from the service)
1012 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1014 ip_vs_kill_estimator(net, &dest->stats);
1017 * Remove it from the d-linked list with the real services.
1019 write_lock_bh(&__ip_vs_rs_lock);
1020 ip_vs_rs_unhash(dest);
1021 write_unlock_bh(&__ip_vs_rs_lock);
1024 * Decrease the refcnt of the dest, and free the dest
1025 * if nobody refers to it (refcnt=0). Otherwise, throw
1026 * the destination into the trash.
1028 if (atomic_dec_and_test(&dest->refcnt)) {
1029 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1030 dest->vfwmark,
1031 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1032 ntohs(dest->port));
1033 ip_vs_dst_reset(dest);
1034 /* simply decrease svc->refcnt here, let the caller check
1035 and release the service if nobody refers to it.
1036 Only user context can release destination and service,
1037 and only one user context can update virtual service at a
1038 time, so the operation here is OK */
1039 atomic_dec(&dest->svc->refcnt);
1040 kfree(dest);
1041 } else {
1042 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1043 "dest->refcnt=%d\n",
1044 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1045 ntohs(dest->port),
1046 atomic_read(&dest->refcnt));
1047 list_add(&dest->n_list, &ip_vs_dest_trash);
1048 atomic_inc(&dest->refcnt);
1054 * Unlink a destination from the given service
1056 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1057 struct ip_vs_dest *dest,
1058 int svcupd)
1060 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1063 * Remove it from the d-linked destination list.
1065 list_del(&dest->n_list);
1066 svc->num_dests--;
1069 * Call the update_service function of its scheduler
1071 if (svcupd && svc->scheduler->update_service)
1072 svc->scheduler->update_service(svc);
1077 * Delete a destination server in the given service
1079 static int
1080 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1082 struct ip_vs_dest *dest;
1083 struct net *net = svc->net;
1084 __be16 dport = udest->port;
1086 EnterFunction(2);
1088 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1090 if (dest == NULL) {
1091 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1092 return -ENOENT;
1095 write_lock_bh(&__ip_vs_svc_lock);
1098 * Wait until all other svc users go away.
1100 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1103 * Unlink dest from the service
1105 __ip_vs_unlink_dest(svc, dest, 1);
1107 write_unlock_bh(&__ip_vs_svc_lock);
1110 * Delete the destination
1112 __ip_vs_del_dest(net, dest);
1114 LeaveFunction(2);
1116 return 0;
1121 * Add a service into the service hash table
1123 static int
1124 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1125 struct ip_vs_service **svc_p)
1127 int ret = 0;
1128 struct ip_vs_scheduler *sched = NULL;
1129 struct ip_vs_pe *pe = NULL;
1130 struct ip_vs_service *svc = NULL;
1132 /* increase the module use count */
1133 ip_vs_use_count_inc();
1135 /* Lookup the scheduler by 'u->sched_name' */
1136 sched = ip_vs_scheduler_get(u->sched_name);
1137 if (sched == NULL) {
1138 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1139 ret = -ENOENT;
1140 goto out_err;
1143 if (u->pe_name && *u->pe_name) {
1144 pe = ip_vs_pe_getbyname(u->pe_name);
1145 if (pe == NULL) {
1146 pr_info("persistence engine module ip_vs_pe_%s "
1147 "not found\n", u->pe_name);
1148 ret = -ENOENT;
1149 goto out_err;
1153 #ifdef CONFIG_IP_VS_IPV6
1154 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1155 ret = -EINVAL;
1156 goto out_err;
1158 #endif
1160 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1161 if (svc == NULL) {
1162 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1163 ret = -ENOMEM;
1164 goto out_err;
1167 /* I'm the first user of the service */
1168 atomic_set(&svc->usecnt, 0);
1169 atomic_set(&svc->refcnt, 0);
1171 svc->af = u->af;
1172 svc->protocol = u->protocol;
1173 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1174 svc->port = u->port;
1175 svc->fwmark = u->fwmark;
1176 svc->flags = u->flags;
1177 svc->timeout = u->timeout * HZ;
1178 svc->netmask = u->netmask;
1179 svc->net = net;
1181 INIT_LIST_HEAD(&svc->destinations);
1182 rwlock_init(&svc->sched_lock);
1183 spin_lock_init(&svc->stats.lock);
1185 /* Bind the scheduler */
1186 ret = ip_vs_bind_scheduler(svc, sched);
1187 if (ret)
1188 goto out_err;
1189 sched = NULL;
1191 /* Bind the ct retriever */
1192 ip_vs_bind_pe(svc, pe);
1193 pe = NULL;
1195 /* Update the virtual service counters */
1196 if (svc->port == FTPPORT)
1197 atomic_inc(&ip_vs_ftpsvc_counter);
1198 else if (svc->port == 0)
1199 atomic_inc(&ip_vs_nullsvc_counter);
1201 ip_vs_new_estimator(net, &svc->stats);
1203 /* Count only IPv4 services for old get/setsockopt interface */
1204 if (svc->af == AF_INET)
1205 ip_vs_num_services++;
1207 /* Hash the service into the service table */
1208 write_lock_bh(&__ip_vs_svc_lock);
1209 ip_vs_svc_hash(svc);
1210 write_unlock_bh(&__ip_vs_svc_lock);
1212 *svc_p = svc;
1213 return 0;
1215 out_err:
1216 if (svc != NULL) {
1217 ip_vs_unbind_scheduler(svc);
1218 if (svc->inc) {
1219 local_bh_disable();
1220 ip_vs_app_inc_put(svc->inc);
1221 local_bh_enable();
1223 kfree(svc);
1225 ip_vs_scheduler_put(sched);
1226 ip_vs_pe_put(pe);
1228 /* decrease the module use count */
1229 ip_vs_use_count_dec();
1231 return ret;
1236 * Edit a service and bind it with a new scheduler
1238 static int
1239 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1241 struct ip_vs_scheduler *sched, *old_sched;
1242 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1243 int ret = 0;
1246 * Lookup the scheduler, by 'u->sched_name'
1248 sched = ip_vs_scheduler_get(u->sched_name);
1249 if (sched == NULL) {
1250 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1251 return -ENOENT;
1253 old_sched = sched;
1255 if (u->pe_name && *u->pe_name) {
1256 pe = ip_vs_pe_getbyname(u->pe_name);
1257 if (pe == NULL) {
1258 pr_info("persistence engine module ip_vs_pe_%s "
1259 "not found\n", u->pe_name);
1260 ret = -ENOENT;
1261 goto out;
1263 old_pe = pe;
1266 #ifdef CONFIG_IP_VS_IPV6
1267 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1268 ret = -EINVAL;
1269 goto out;
1271 #endif
1273 write_lock_bh(&__ip_vs_svc_lock);
1276 * Wait until all other svc users go away.
1278 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1281 * Set the flags and timeout value
1283 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1284 svc->timeout = u->timeout * HZ;
1285 svc->netmask = u->netmask;
1287 old_sched = svc->scheduler;
1288 if (sched != old_sched) {
1290 * Unbind the old scheduler
1292 if ((ret = ip_vs_unbind_scheduler(svc))) {
1293 old_sched = sched;
1294 goto out_unlock;
1298 * Bind the new scheduler
1300 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1302 * If ip_vs_bind_scheduler fails, restore the old
1303 * scheduler.
1304 * The main reason of failure is out of memory.
1306 * The question is if the old scheduler can be
1307 * restored all the time. TODO: if it cannot be
1308 * restored some time, we must delete the service,
1309 * otherwise the system may crash.
1311 ip_vs_bind_scheduler(svc, old_sched);
1312 old_sched = sched;
1313 goto out_unlock;
1317 old_pe = svc->pe;
1318 if (pe != old_pe) {
1319 ip_vs_unbind_pe(svc);
1320 ip_vs_bind_pe(svc, pe);
1323 out_unlock:
1324 write_unlock_bh(&__ip_vs_svc_lock);
1325 out:
1326 ip_vs_scheduler_put(old_sched);
1327 ip_vs_pe_put(old_pe);
1328 return ret;
1333 * Delete a service from the service list
1334 * - The service must be unlinked, unlocked and not referenced!
1335 * - We are called under _bh lock
1337 static void __ip_vs_del_service(struct ip_vs_service *svc)
1339 struct ip_vs_dest *dest, *nxt;
1340 struct ip_vs_scheduler *old_sched;
1341 struct ip_vs_pe *old_pe;
1343 pr_info("%s: enter\n", __func__);
1345 /* Count only IPv4 services for old get/setsockopt interface */
1346 if (svc->af == AF_INET)
1347 ip_vs_num_services--;
1349 ip_vs_kill_estimator(svc->net, &svc->stats);
1351 /* Unbind scheduler */
1352 old_sched = svc->scheduler;
1353 ip_vs_unbind_scheduler(svc);
1354 ip_vs_scheduler_put(old_sched);
1356 /* Unbind persistence engine */
1357 old_pe = svc->pe;
1358 ip_vs_unbind_pe(svc);
1359 ip_vs_pe_put(old_pe);
1361 /* Unbind app inc */
1362 if (svc->inc) {
1363 ip_vs_app_inc_put(svc->inc);
1364 svc->inc = NULL;
1368 * Unlink the whole destination list
1370 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1371 __ip_vs_unlink_dest(svc, dest, 0);
1372 __ip_vs_del_dest(svc->net, dest);
1376 * Update the virtual service counters
1378 if (svc->port == FTPPORT)
1379 atomic_dec(&ip_vs_ftpsvc_counter);
1380 else if (svc->port == 0)
1381 atomic_dec(&ip_vs_nullsvc_counter);
1384 * Free the service if nobody refers to it
1386 if (atomic_read(&svc->refcnt) == 0) {
1387 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1388 svc->fwmark,
1389 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1390 ntohs(svc->port), atomic_read(&svc->usecnt));
1391 kfree(svc);
1394 /* decrease the module use count */
1395 ip_vs_use_count_dec();
1399 * Unlink a service from list and try to delete it if its refcnt reached 0
1401 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1404 * Unhash it from the service table
1406 write_lock_bh(&__ip_vs_svc_lock);
1408 ip_vs_svc_unhash(svc);
1411 * Wait until all the svc users go away.
1413 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1415 __ip_vs_del_service(svc);
1417 write_unlock_bh(&__ip_vs_svc_lock);
1421 * Delete a service from the service list
1423 static int ip_vs_del_service(struct ip_vs_service *svc)
1425 if (svc == NULL)
1426 return -EEXIST;
1427 ip_vs_unlink_service(svc);
1429 return 0;
1434 * Flush all the virtual services
1436 static int ip_vs_flush(struct net *net)
1438 int idx;
1439 struct ip_vs_service *svc, *nxt;
1442 * Flush the service table hashed by <netns,protocol,addr,port>
1444 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1445 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1446 s_list) {
1447 if (net_eq(svc->net, net))
1448 ip_vs_unlink_service(svc);
1453 * Flush the service table hashed by fwmark
1455 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1456 list_for_each_entry_safe(svc, nxt,
1457 &ip_vs_svc_fwm_table[idx], f_list) {
1458 if (net_eq(svc->net, net))
1459 ip_vs_unlink_service(svc);
1463 return 0;
1468 * Zero counters in a service or all services
1470 static int ip_vs_zero_service(struct ip_vs_service *svc)
1472 struct ip_vs_dest *dest;
1474 write_lock_bh(&__ip_vs_svc_lock);
1475 list_for_each_entry(dest, &svc->destinations, n_list) {
1476 ip_vs_zero_stats(&dest->stats);
1478 ip_vs_zero_stats(&svc->stats);
1479 write_unlock_bh(&__ip_vs_svc_lock);
1480 return 0;
1483 static int ip_vs_zero_all(struct net *net)
1485 int idx;
1486 struct ip_vs_service *svc;
1488 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1489 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1490 if (net_eq(svc->net, net))
1491 ip_vs_zero_service(svc);
1495 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1496 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1497 if (net_eq(svc->net, net))
1498 ip_vs_zero_service(svc);
1502 ip_vs_zero_stats(&ip_vs_stats);
1503 return 0;
1507 static int
1508 proc_do_defense_mode(ctl_table *table, int write,
1509 void __user *buffer, size_t *lenp, loff_t *ppos)
1511 struct net *net = current->nsproxy->net_ns;
1512 int *valp = table->data;
1513 int val = *valp;
1514 int rc;
1516 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1517 if (write && (*valp != val)) {
1518 if ((*valp < 0) || (*valp > 3)) {
1519 /* Restore the correct value */
1520 *valp = val;
1521 } else {
1522 update_defense_level(net_ipvs(net));
1525 return rc;
1529 static int
1530 proc_do_sync_threshold(ctl_table *table, int write,
1531 void __user *buffer, size_t *lenp, loff_t *ppos)
1533 int *valp = table->data;
1534 int val[2];
1535 int rc;
1537 /* backup the value first */
1538 memcpy(val, valp, sizeof(val));
1540 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1541 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1542 /* Restore the correct value */
1543 memcpy(valp, val, sizeof(val));
1545 return rc;
1548 static int
1549 proc_do_sync_mode(ctl_table *table, int write,
1550 void __user *buffer, size_t *lenp, loff_t *ppos)
1552 int *valp = table->data;
1553 int val = *valp;
1554 int rc;
1556 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1557 if (write && (*valp != val)) {
1558 if ((*valp < 0) || (*valp > 1)) {
1559 /* Restore the correct value */
1560 *valp = val;
1561 } else {
1562 struct net *net = current->nsproxy->net_ns;
1563 ip_vs_sync_switch_mode(net, val);
1566 return rc;
1570 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1573 static struct ctl_table vs_vars[] = {
1575 .procname = "amemthresh",
1576 .data = &sysctl_ip_vs_amemthresh,
1577 .maxlen = sizeof(int),
1578 .mode = 0644,
1579 .proc_handler = proc_dointvec,
1581 #ifdef CONFIG_IP_VS_DEBUG
1583 .procname = "debug_level",
1584 .data = &sysctl_ip_vs_debug_level,
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
1587 .proc_handler = proc_dointvec,
1589 #endif
1591 .procname = "am_droprate",
1592 .data = &sysctl_ip_vs_am_droprate,
1593 .maxlen = sizeof(int),
1594 .mode = 0644,
1595 .proc_handler = proc_dointvec,
1598 .procname = "drop_entry",
1599 .data = &sysctl_ip_vs_drop_entry,
1600 .maxlen = sizeof(int),
1601 .mode = 0644,
1602 .proc_handler = proc_do_defense_mode,
1605 .procname = "drop_packet",
1606 .data = &sysctl_ip_vs_drop_packet,
1607 .maxlen = sizeof(int),
1608 .mode = 0644,
1609 .proc_handler = proc_do_defense_mode,
1611 #ifdef CONFIG_IP_VS_NFCT
1613 .procname = "conntrack",
1614 .data = &sysctl_ip_vs_conntrack,
1615 .maxlen = sizeof(int),
1616 .mode = 0644,
1617 .proc_handler = &proc_dointvec,
1619 #endif
1621 .procname = "secure_tcp",
1622 .data = &sysctl_ip_vs_secure_tcp,
1623 .maxlen = sizeof(int),
1624 .mode = 0644,
1625 .proc_handler = proc_do_defense_mode,
1628 .procname = "snat_reroute",
1629 .data = &sysctl_ip_vs_snat_reroute,
1630 .maxlen = sizeof(int),
1631 .mode = 0644,
1632 .proc_handler = &proc_dointvec,
1635 .procname = "sync_version",
1636 .data = &sysctl_ip_vs_sync_ver,
1637 .maxlen = sizeof(int),
1638 .mode = 0644,
1639 .proc_handler = &proc_do_sync_mode,
1641 #if 0
1643 .procname = "timeout_established",
1644 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1645 .maxlen = sizeof(int),
1646 .mode = 0644,
1647 .proc_handler = proc_dointvec_jiffies,
1650 .procname = "timeout_synsent",
1651 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1652 .maxlen = sizeof(int),
1653 .mode = 0644,
1654 .proc_handler = proc_dointvec_jiffies,
1657 .procname = "timeout_synrecv",
1658 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1659 .maxlen = sizeof(int),
1660 .mode = 0644,
1661 .proc_handler = proc_dointvec_jiffies,
1664 .procname = "timeout_finwait",
1665 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1666 .maxlen = sizeof(int),
1667 .mode = 0644,
1668 .proc_handler = proc_dointvec_jiffies,
1671 .procname = "timeout_timewait",
1672 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1673 .maxlen = sizeof(int),
1674 .mode = 0644,
1675 .proc_handler = proc_dointvec_jiffies,
1678 .procname = "timeout_close",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
1682 .proc_handler = proc_dointvec_jiffies,
1685 .procname = "timeout_closewait",
1686 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1687 .maxlen = sizeof(int),
1688 .mode = 0644,
1689 .proc_handler = proc_dointvec_jiffies,
1692 .procname = "timeout_lastack",
1693 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1694 .maxlen = sizeof(int),
1695 .mode = 0644,
1696 .proc_handler = proc_dointvec_jiffies,
1699 .procname = "timeout_listen",
1700 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
1703 .proc_handler = proc_dointvec_jiffies,
1706 .procname = "timeout_synack",
1707 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1708 .maxlen = sizeof(int),
1709 .mode = 0644,
1710 .proc_handler = proc_dointvec_jiffies,
1713 .procname = "timeout_udp",
1714 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1715 .maxlen = sizeof(int),
1716 .mode = 0644,
1717 .proc_handler = proc_dointvec_jiffies,
1720 .procname = "timeout_icmp",
1721 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1722 .maxlen = sizeof(int),
1723 .mode = 0644,
1724 .proc_handler = proc_dointvec_jiffies,
1726 #endif
1728 .procname = "cache_bypass",
1729 .data = &sysctl_ip_vs_cache_bypass,
1730 .maxlen = sizeof(int),
1731 .mode = 0644,
1732 .proc_handler = proc_dointvec,
1735 .procname = "expire_nodest_conn",
1736 .data = &sysctl_ip_vs_expire_nodest_conn,
1737 .maxlen = sizeof(int),
1738 .mode = 0644,
1739 .proc_handler = proc_dointvec,
1742 .procname = "expire_quiescent_template",
1743 .data = &sysctl_ip_vs_expire_quiescent_template,
1744 .maxlen = sizeof(int),
1745 .mode = 0644,
1746 .proc_handler = proc_dointvec,
1749 .procname = "sync_threshold",
1750 .data = &sysctl_ip_vs_sync_threshold,
1751 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1752 .mode = 0644,
1753 .proc_handler = proc_do_sync_threshold,
1756 .procname = "nat_icmp_send",
1757 .data = &sysctl_ip_vs_nat_icmp_send,
1758 .maxlen = sizeof(int),
1759 .mode = 0644,
1760 .proc_handler = proc_dointvec,
1765 const struct ctl_path net_vs_ctl_path[] = {
1766 { .procname = "net", },
1767 { .procname = "ipv4", },
1768 { .procname = "vs", },
1771 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1773 static struct ctl_table_header * sysctl_header;
1775 #ifdef CONFIG_PROC_FS
1777 struct ip_vs_iter {
1778 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1779 struct list_head *table;
1780 int bucket;
1784 * Write the contents of the VS rule table to a PROCfs file.
1785 * (It is kept just for backward compatibility)
1787 static inline const char *ip_vs_fwd_name(unsigned flags)
1789 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1790 case IP_VS_CONN_F_LOCALNODE:
1791 return "Local";
1792 case IP_VS_CONN_F_TUNNEL:
1793 return "Tunnel";
1794 case IP_VS_CONN_F_DROUTE:
1795 return "Route";
1796 default:
1797 return "Masq";
1802 /* Get the Nth entry in the two lists */
1803 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1805 struct net *net = seq_file_net(seq);
1806 struct ip_vs_iter *iter = seq->private;
1807 int idx;
1808 struct ip_vs_service *svc;
1810 /* look in hash by protocol */
1811 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1812 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1813 if (net_eq(svc->net, net) && pos-- == 0) {
1814 iter->table = ip_vs_svc_table;
1815 iter->bucket = idx;
1816 return svc;
1821 /* keep looking in fwmark */
1822 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1823 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1824 if (net_eq(svc->net, net) && pos-- == 0) {
1825 iter->table = ip_vs_svc_fwm_table;
1826 iter->bucket = idx;
1827 return svc;
1832 return NULL;
1835 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1836 __acquires(__ip_vs_svc_lock)
1839 read_lock_bh(&__ip_vs_svc_lock);
1840 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1844 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1846 struct list_head *e;
1847 struct ip_vs_iter *iter;
1848 struct ip_vs_service *svc;
1850 ++*pos;
1851 if (v == SEQ_START_TOKEN)
1852 return ip_vs_info_array(seq,0);
1854 svc = v;
1855 iter = seq->private;
1857 if (iter->table == ip_vs_svc_table) {
1858 /* next service in table hashed by protocol */
1859 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1860 return list_entry(e, struct ip_vs_service, s_list);
1863 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1864 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1865 s_list) {
1866 return svc;
1870 iter->table = ip_vs_svc_fwm_table;
1871 iter->bucket = -1;
1872 goto scan_fwmark;
1875 /* next service in hashed by fwmark */
1876 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1877 return list_entry(e, struct ip_vs_service, f_list);
1879 scan_fwmark:
1880 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1881 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1882 f_list)
1883 return svc;
1886 return NULL;
1889 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1890 __releases(__ip_vs_svc_lock)
1892 read_unlock_bh(&__ip_vs_svc_lock);
1896 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1898 if (v == SEQ_START_TOKEN) {
1899 seq_printf(seq,
1900 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1901 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1902 seq_puts(seq,
1903 "Prot LocalAddress:Port Scheduler Flags\n");
1904 seq_puts(seq,
1905 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1906 } else {
1907 const struct ip_vs_service *svc = v;
1908 const struct ip_vs_iter *iter = seq->private;
1909 const struct ip_vs_dest *dest;
1911 if (iter->table == ip_vs_svc_table) {
1912 #ifdef CONFIG_IP_VS_IPV6
1913 if (svc->af == AF_INET6)
1914 seq_printf(seq, "%s [%pI6]:%04X %s ",
1915 ip_vs_proto_name(svc->protocol),
1916 &svc->addr.in6,
1917 ntohs(svc->port),
1918 svc->scheduler->name);
1919 else
1920 #endif
1921 seq_printf(seq, "%s %08X:%04X %s %s ",
1922 ip_vs_proto_name(svc->protocol),
1923 ntohl(svc->addr.ip),
1924 ntohs(svc->port),
1925 svc->scheduler->name,
1926 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1927 } else {
1928 seq_printf(seq, "FWM %08X %s %s",
1929 svc->fwmark, svc->scheduler->name,
1930 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1933 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1934 seq_printf(seq, "persistent %d %08X\n",
1935 svc->timeout,
1936 ntohl(svc->netmask));
1937 else
1938 seq_putc(seq, '\n');
1940 list_for_each_entry(dest, &svc->destinations, n_list) {
1941 #ifdef CONFIG_IP_VS_IPV6
1942 if (dest->af == AF_INET6)
1943 seq_printf(seq,
1944 " -> [%pI6]:%04X"
1945 " %-7s %-6d %-10d %-10d\n",
1946 &dest->addr.in6,
1947 ntohs(dest->port),
1948 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1949 atomic_read(&dest->weight),
1950 atomic_read(&dest->activeconns),
1951 atomic_read(&dest->inactconns));
1952 else
1953 #endif
1954 seq_printf(seq,
1955 " -> %08X:%04X "
1956 "%-7s %-6d %-10d %-10d\n",
1957 ntohl(dest->addr.ip),
1958 ntohs(dest->port),
1959 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1960 atomic_read(&dest->weight),
1961 atomic_read(&dest->activeconns),
1962 atomic_read(&dest->inactconns));
1966 return 0;
1969 static const struct seq_operations ip_vs_info_seq_ops = {
1970 .start = ip_vs_info_seq_start,
1971 .next = ip_vs_info_seq_next,
1972 .stop = ip_vs_info_seq_stop,
1973 .show = ip_vs_info_seq_show,
1976 static int ip_vs_info_open(struct inode *inode, struct file *file)
1978 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1979 sizeof(struct ip_vs_iter));
1982 static const struct file_operations ip_vs_info_fops = {
1983 .owner = THIS_MODULE,
1984 .open = ip_vs_info_open,
1985 .read = seq_read,
1986 .llseek = seq_lseek,
1987 .release = seq_release_private,
1990 #endif
1992 struct ip_vs_stats ip_vs_stats = {
1993 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1996 #ifdef CONFIG_PROC_FS
1997 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2000 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2001 seq_puts(seq,
2002 " Total Incoming Outgoing Incoming Outgoing\n");
2003 seq_printf(seq,
2004 " Conns Packets Packets Bytes Bytes\n");
2006 spin_lock_bh(&ip_vs_stats.lock);
2007 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
2008 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
2009 (unsigned long long) ip_vs_stats.ustats.inbytes,
2010 (unsigned long long) ip_vs_stats.ustats.outbytes);
2012 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2013 seq_puts(seq,
2014 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2015 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
2016 ip_vs_stats.ustats.cps,
2017 ip_vs_stats.ustats.inpps,
2018 ip_vs_stats.ustats.outpps,
2019 ip_vs_stats.ustats.inbps,
2020 ip_vs_stats.ustats.outbps);
2021 spin_unlock_bh(&ip_vs_stats.lock);
2023 return 0;
2026 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2028 return single_open_net(inode, file, ip_vs_stats_show);
2031 static const struct file_operations ip_vs_stats_fops = {
2032 .owner = THIS_MODULE,
2033 .open = ip_vs_stats_seq_open,
2034 .read = seq_read,
2035 .llseek = seq_lseek,
2036 .release = single_release,
2039 #endif
2042 * Set timeout values for tcp tcpfin udp in the timeout_table.
2044 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2046 struct ip_vs_proto_data *pd;
2048 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2049 u->tcp_timeout,
2050 u->tcp_fin_timeout,
2051 u->udp_timeout);
2053 #ifdef CONFIG_IP_VS_PROTO_TCP
2054 if (u->tcp_timeout) {
2055 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2056 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2057 = u->tcp_timeout * HZ;
2060 if (u->tcp_fin_timeout) {
2061 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2062 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2063 = u->tcp_fin_timeout * HZ;
2065 #endif
2067 #ifdef CONFIG_IP_VS_PROTO_UDP
2068 if (u->udp_timeout) {
2069 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2070 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2071 = u->udp_timeout * HZ;
2073 #endif
2074 return 0;
2078 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2079 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2080 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2081 sizeof(struct ip_vs_dest_user))
2082 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2083 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2084 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2086 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2087 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2088 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2089 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2090 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2091 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2092 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2093 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2094 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2095 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2096 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2097 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2100 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2101 struct ip_vs_service_user *usvc_compat)
2103 memset(usvc, 0, sizeof(*usvc));
2105 usvc->af = AF_INET;
2106 usvc->protocol = usvc_compat->protocol;
2107 usvc->addr.ip = usvc_compat->addr;
2108 usvc->port = usvc_compat->port;
2109 usvc->fwmark = usvc_compat->fwmark;
2111 /* Deep copy of sched_name is not needed here */
2112 usvc->sched_name = usvc_compat->sched_name;
2114 usvc->flags = usvc_compat->flags;
2115 usvc->timeout = usvc_compat->timeout;
2116 usvc->netmask = usvc_compat->netmask;
2119 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2120 struct ip_vs_dest_user *udest_compat)
2122 memset(udest, 0, sizeof(*udest));
2124 udest->addr.ip = udest_compat->addr;
2125 udest->port = udest_compat->port;
2126 udest->conn_flags = udest_compat->conn_flags;
2127 udest->weight = udest_compat->weight;
2128 udest->u_threshold = udest_compat->u_threshold;
2129 udest->l_threshold = udest_compat->l_threshold;
2132 static int
2133 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2135 struct net *net = sock_net(sk);
2136 int ret;
2137 unsigned char arg[MAX_ARG_LEN];
2138 struct ip_vs_service_user *usvc_compat;
2139 struct ip_vs_service_user_kern usvc;
2140 struct ip_vs_service *svc;
2141 struct ip_vs_dest_user *udest_compat;
2142 struct ip_vs_dest_user_kern udest;
2144 if (!capable(CAP_NET_ADMIN))
2145 return -EPERM;
2147 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2148 return -EINVAL;
2149 if (len < 0 || len > MAX_ARG_LEN)
2150 return -EINVAL;
2151 if (len != set_arglen[SET_CMDID(cmd)]) {
2152 pr_err("set_ctl: len %u != %u\n",
2153 len, set_arglen[SET_CMDID(cmd)]);
2154 return -EINVAL;
2157 if (copy_from_user(arg, user, len) != 0)
2158 return -EFAULT;
2160 /* increase the module use count */
2161 ip_vs_use_count_inc();
2163 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2164 ret = -ERESTARTSYS;
2165 goto out_dec;
2168 if (cmd == IP_VS_SO_SET_FLUSH) {
2169 /* Flush the virtual service */
2170 ret = ip_vs_flush(net);
2171 goto out_unlock;
2172 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2173 /* Set timeout values for (tcp tcpfin udp) */
2174 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2175 goto out_unlock;
2176 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2177 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2178 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2179 dm->syncid);
2180 goto out_unlock;
2181 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2182 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2183 ret = stop_sync_thread(net, dm->state);
2184 goto out_unlock;
2187 usvc_compat = (struct ip_vs_service_user *)arg;
2188 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2190 /* We only use the new structs internally, so copy userspace compat
2191 * structs to extended internal versions */
2192 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2193 ip_vs_copy_udest_compat(&udest, udest_compat);
2195 if (cmd == IP_VS_SO_SET_ZERO) {
2196 /* if no service address is set, zero counters in all */
2197 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2198 ret = ip_vs_zero_all(net);
2199 goto out_unlock;
2203 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2204 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2205 usvc.protocol != IPPROTO_SCTP) {
2206 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2207 usvc.protocol, &usvc.addr.ip,
2208 ntohs(usvc.port), usvc.sched_name);
2209 ret = -EFAULT;
2210 goto out_unlock;
2213 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2214 if (usvc.fwmark == 0)
2215 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2216 &usvc.addr, usvc.port);
2217 else
2218 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2220 if (cmd != IP_VS_SO_SET_ADD
2221 && (svc == NULL || svc->protocol != usvc.protocol)) {
2222 ret = -ESRCH;
2223 goto out_unlock;
2226 switch (cmd) {
2227 case IP_VS_SO_SET_ADD:
2228 if (svc != NULL)
2229 ret = -EEXIST;
2230 else
2231 ret = ip_vs_add_service(net, &usvc, &svc);
2232 break;
2233 case IP_VS_SO_SET_EDIT:
2234 ret = ip_vs_edit_service(svc, &usvc);
2235 break;
2236 case IP_VS_SO_SET_DEL:
2237 ret = ip_vs_del_service(svc);
2238 if (!ret)
2239 goto out_unlock;
2240 break;
2241 case IP_VS_SO_SET_ZERO:
2242 ret = ip_vs_zero_service(svc);
2243 break;
2244 case IP_VS_SO_SET_ADDDEST:
2245 ret = ip_vs_add_dest(svc, &udest);
2246 break;
2247 case IP_VS_SO_SET_EDITDEST:
2248 ret = ip_vs_edit_dest(svc, &udest);
2249 break;
2250 case IP_VS_SO_SET_DELDEST:
2251 ret = ip_vs_del_dest(svc, &udest);
2252 break;
2253 default:
2254 ret = -EINVAL;
2257 out_unlock:
2258 mutex_unlock(&__ip_vs_mutex);
2259 out_dec:
2260 /* decrease the module use count */
2261 ip_vs_use_count_dec();
2263 return ret;
2267 static void
2268 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2270 spin_lock_bh(&src->lock);
2271 memcpy(dst, &src->ustats, sizeof(*dst));
2272 spin_unlock_bh(&src->lock);
2275 static void
2276 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2278 dst->protocol = src->protocol;
2279 dst->addr = src->addr.ip;
2280 dst->port = src->port;
2281 dst->fwmark = src->fwmark;
2282 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2283 dst->flags = src->flags;
2284 dst->timeout = src->timeout / HZ;
2285 dst->netmask = src->netmask;
2286 dst->num_dests = src->num_dests;
2287 ip_vs_copy_stats(&dst->stats, &src->stats);
2290 static inline int
2291 __ip_vs_get_service_entries(struct net *net,
2292 const struct ip_vs_get_services *get,
2293 struct ip_vs_get_services __user *uptr)
2295 int idx, count=0;
2296 struct ip_vs_service *svc;
2297 struct ip_vs_service_entry entry;
2298 int ret = 0;
2300 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2301 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2302 /* Only expose IPv4 entries to old interface */
2303 if (svc->af != AF_INET || !net_eq(svc->net, net))
2304 continue;
2306 if (count >= get->num_services)
2307 goto out;
2308 memset(&entry, 0, sizeof(entry));
2309 ip_vs_copy_service(&entry, svc);
2310 if (copy_to_user(&uptr->entrytable[count],
2311 &entry, sizeof(entry))) {
2312 ret = -EFAULT;
2313 goto out;
2315 count++;
2319 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2320 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2321 /* Only expose IPv4 entries to old interface */
2322 if (svc->af != AF_INET || !net_eq(svc->net, net))
2323 continue;
2325 if (count >= get->num_services)
2326 goto out;
2327 memset(&entry, 0, sizeof(entry));
2328 ip_vs_copy_service(&entry, svc);
2329 if (copy_to_user(&uptr->entrytable[count],
2330 &entry, sizeof(entry))) {
2331 ret = -EFAULT;
2332 goto out;
2334 count++;
2337 out:
2338 return ret;
2341 static inline int
2342 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2343 struct ip_vs_get_dests __user *uptr)
2345 struct ip_vs_service *svc;
2346 union nf_inet_addr addr = { .ip = get->addr };
2347 int ret = 0;
2349 if (get->fwmark)
2350 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2351 else
2352 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2353 get->port);
2355 if (svc) {
2356 int count = 0;
2357 struct ip_vs_dest *dest;
2358 struct ip_vs_dest_entry entry;
2360 list_for_each_entry(dest, &svc->destinations, n_list) {
2361 if (count >= get->num_dests)
2362 break;
2364 entry.addr = dest->addr.ip;
2365 entry.port = dest->port;
2366 entry.conn_flags = atomic_read(&dest->conn_flags);
2367 entry.weight = atomic_read(&dest->weight);
2368 entry.u_threshold = dest->u_threshold;
2369 entry.l_threshold = dest->l_threshold;
2370 entry.activeconns = atomic_read(&dest->activeconns);
2371 entry.inactconns = atomic_read(&dest->inactconns);
2372 entry.persistconns = atomic_read(&dest->persistconns);
2373 ip_vs_copy_stats(&entry.stats, &dest->stats);
2374 if (copy_to_user(&uptr->entrytable[count],
2375 &entry, sizeof(entry))) {
2376 ret = -EFAULT;
2377 break;
2379 count++;
2381 } else
2382 ret = -ESRCH;
2383 return ret;
2386 static inline void
2387 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2389 struct ip_vs_proto_data *pd;
2391 #ifdef CONFIG_IP_VS_PROTO_TCP
2392 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2393 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2394 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2395 #endif
2396 #ifdef CONFIG_IP_VS_PROTO_UDP
2397 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2398 u->udp_timeout =
2399 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2400 #endif
2404 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2405 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2406 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2407 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2408 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2409 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2410 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2412 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2413 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2414 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2415 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2416 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2417 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2418 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2419 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2422 static int
2423 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2425 unsigned char arg[128];
2426 int ret = 0;
2427 unsigned int copylen;
2428 struct net *net = sock_net(sk);
2429 struct netns_ipvs *ipvs = net_ipvs(net);
2431 BUG_ON(!net);
2432 if (!capable(CAP_NET_ADMIN))
2433 return -EPERM;
2435 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2436 return -EINVAL;
2438 if (*len < get_arglen[GET_CMDID(cmd)]) {
2439 pr_err("get_ctl: len %u < %u\n",
2440 *len, get_arglen[GET_CMDID(cmd)]);
2441 return -EINVAL;
2444 copylen = get_arglen[GET_CMDID(cmd)];
2445 if (copylen > 128)
2446 return -EINVAL;
2448 if (copy_from_user(arg, user, copylen) != 0)
2449 return -EFAULT;
2451 if (mutex_lock_interruptible(&__ip_vs_mutex))
2452 return -ERESTARTSYS;
2454 switch (cmd) {
2455 case IP_VS_SO_GET_VERSION:
2457 char buf[64];
2459 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2460 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2461 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2462 ret = -EFAULT;
2463 goto out;
2465 *len = strlen(buf)+1;
2467 break;
2469 case IP_VS_SO_GET_INFO:
2471 struct ip_vs_getinfo info;
2472 info.version = IP_VS_VERSION_CODE;
2473 info.size = ip_vs_conn_tab_size;
2474 info.num_services = ip_vs_num_services;
2475 if (copy_to_user(user, &info, sizeof(info)) != 0)
2476 ret = -EFAULT;
2478 break;
2480 case IP_VS_SO_GET_SERVICES:
2482 struct ip_vs_get_services *get;
2483 int size;
2485 get = (struct ip_vs_get_services *)arg;
2486 size = sizeof(*get) +
2487 sizeof(struct ip_vs_service_entry) * get->num_services;
2488 if (*len != size) {
2489 pr_err("length: %u != %u\n", *len, size);
2490 ret = -EINVAL;
2491 goto out;
2493 ret = __ip_vs_get_service_entries(net, get, user);
2495 break;
2497 case IP_VS_SO_GET_SERVICE:
2499 struct ip_vs_service_entry *entry;
2500 struct ip_vs_service *svc;
2501 union nf_inet_addr addr;
2503 entry = (struct ip_vs_service_entry *)arg;
2504 addr.ip = entry->addr;
2505 if (entry->fwmark)
2506 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2507 else
2508 svc = __ip_vs_service_find(net, AF_INET,
2509 entry->protocol, &addr,
2510 entry->port);
2511 if (svc) {
2512 ip_vs_copy_service(entry, svc);
2513 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2514 ret = -EFAULT;
2515 } else
2516 ret = -ESRCH;
2518 break;
2520 case IP_VS_SO_GET_DESTS:
2522 struct ip_vs_get_dests *get;
2523 int size;
2525 get = (struct ip_vs_get_dests *)arg;
2526 size = sizeof(*get) +
2527 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2528 if (*len != size) {
2529 pr_err("length: %u != %u\n", *len, size);
2530 ret = -EINVAL;
2531 goto out;
2533 ret = __ip_vs_get_dest_entries(net, get, user);
2535 break;
2537 case IP_VS_SO_GET_TIMEOUT:
2539 struct ip_vs_timeout_user t;
2541 __ip_vs_get_timeouts(net, &t);
2542 if (copy_to_user(user, &t, sizeof(t)) != 0)
2543 ret = -EFAULT;
2545 break;
2547 case IP_VS_SO_GET_DAEMON:
2549 struct ip_vs_daemon_user d[2];
2551 memset(&d, 0, sizeof(d));
2552 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2553 d[0].state = IP_VS_STATE_MASTER;
2554 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2555 sizeof(d[0].mcast_ifn));
2556 d[0].syncid = ipvs->master_syncid;
2558 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2559 d[1].state = IP_VS_STATE_BACKUP;
2560 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2561 sizeof(d[1].mcast_ifn));
2562 d[1].syncid = ipvs->backup_syncid;
2564 if (copy_to_user(user, &d, sizeof(d)) != 0)
2565 ret = -EFAULT;
2567 break;
2569 default:
2570 ret = -EINVAL;
2573 out:
2574 mutex_unlock(&__ip_vs_mutex);
2575 return ret;
2579 static struct nf_sockopt_ops ip_vs_sockopts = {
2580 .pf = PF_INET,
2581 .set_optmin = IP_VS_BASE_CTL,
2582 .set_optmax = IP_VS_SO_SET_MAX+1,
2583 .set = do_ip_vs_set_ctl,
2584 .get_optmin = IP_VS_BASE_CTL,
2585 .get_optmax = IP_VS_SO_GET_MAX+1,
2586 .get = do_ip_vs_get_ctl,
2587 .owner = THIS_MODULE,
2591 * Generic Netlink interface
2594 /* IPVS genetlink family */
2595 static struct genl_family ip_vs_genl_family = {
2596 .id = GENL_ID_GENERATE,
2597 .hdrsize = 0,
2598 .name = IPVS_GENL_NAME,
2599 .version = IPVS_GENL_VERSION,
2600 .maxattr = IPVS_CMD_MAX,
2603 /* Policy used for first-level command attributes */
2604 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2605 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2606 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2607 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2608 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2609 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2610 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2613 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2614 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2615 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2616 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2617 .len = IP_VS_IFNAME_MAXLEN },
2618 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2621 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2622 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2623 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2624 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2625 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2626 .len = sizeof(union nf_inet_addr) },
2627 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2628 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2629 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2630 .len = IP_VS_SCHEDNAME_MAXLEN },
2631 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2632 .len = IP_VS_PENAME_MAXLEN },
2633 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2634 .len = sizeof(struct ip_vs_flags) },
2635 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2636 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2637 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2640 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2641 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2642 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2643 .len = sizeof(union nf_inet_addr) },
2644 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2645 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2646 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2647 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2648 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2649 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2650 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2651 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2652 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2655 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2656 struct ip_vs_stats *stats)
2658 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2659 if (!nl_stats)
2660 return -EMSGSIZE;
2662 spin_lock_bh(&stats->lock);
2664 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2665 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2666 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2667 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2668 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2669 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2670 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2671 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2672 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2673 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2675 spin_unlock_bh(&stats->lock);
2677 nla_nest_end(skb, nl_stats);
2679 return 0;
2681 nla_put_failure:
2682 spin_unlock_bh(&stats->lock);
2683 nla_nest_cancel(skb, nl_stats);
2684 return -EMSGSIZE;
2687 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2688 struct ip_vs_service *svc)
2690 struct nlattr *nl_service;
2691 struct ip_vs_flags flags = { .flags = svc->flags,
2692 .mask = ~0 };
2694 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2695 if (!nl_service)
2696 return -EMSGSIZE;
2698 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2700 if (svc->fwmark) {
2701 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2702 } else {
2703 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2704 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2705 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2708 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2709 if (svc->pe)
2710 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2711 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2712 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2713 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2715 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2716 goto nla_put_failure;
2718 nla_nest_end(skb, nl_service);
2720 return 0;
2722 nla_put_failure:
2723 nla_nest_cancel(skb, nl_service);
2724 return -EMSGSIZE;
2727 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2728 struct ip_vs_service *svc,
2729 struct netlink_callback *cb)
2731 void *hdr;
2733 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2734 &ip_vs_genl_family, NLM_F_MULTI,
2735 IPVS_CMD_NEW_SERVICE);
2736 if (!hdr)
2737 return -EMSGSIZE;
2739 if (ip_vs_genl_fill_service(skb, svc) < 0)
2740 goto nla_put_failure;
2742 return genlmsg_end(skb, hdr);
2744 nla_put_failure:
2745 genlmsg_cancel(skb, hdr);
2746 return -EMSGSIZE;
2749 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2750 struct netlink_callback *cb)
2752 int idx = 0, i;
2753 int start = cb->args[0];
2754 struct ip_vs_service *svc;
2755 struct net *net = skb_sknet(skb);
2757 mutex_lock(&__ip_vs_mutex);
2758 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2759 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2760 if (++idx <= start || !net_eq(svc->net, net))
2761 continue;
2762 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2763 idx--;
2764 goto nla_put_failure;
2769 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2770 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2771 if (++idx <= start || !net_eq(svc->net, net))
2772 continue;
2773 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2774 idx--;
2775 goto nla_put_failure;
2780 nla_put_failure:
2781 mutex_unlock(&__ip_vs_mutex);
2782 cb->args[0] = idx;
2784 return skb->len;
2787 static int ip_vs_genl_parse_service(struct net *net,
2788 struct ip_vs_service_user_kern *usvc,
2789 struct nlattr *nla, int full_entry,
2790 struct ip_vs_service **ret_svc)
2792 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2793 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2794 struct ip_vs_service *svc;
2796 /* Parse mandatory identifying service fields first */
2797 if (nla == NULL ||
2798 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2799 return -EINVAL;
2801 nla_af = attrs[IPVS_SVC_ATTR_AF];
2802 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2803 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2804 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2805 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2807 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2808 return -EINVAL;
2810 memset(usvc, 0, sizeof(*usvc));
2812 usvc->af = nla_get_u16(nla_af);
2813 #ifdef CONFIG_IP_VS_IPV6
2814 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2815 #else
2816 if (usvc->af != AF_INET)
2817 #endif
2818 return -EAFNOSUPPORT;
2820 if (nla_fwmark) {
2821 usvc->protocol = IPPROTO_TCP;
2822 usvc->fwmark = nla_get_u32(nla_fwmark);
2823 } else {
2824 usvc->protocol = nla_get_u16(nla_protocol);
2825 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2826 usvc->port = nla_get_u16(nla_port);
2827 usvc->fwmark = 0;
2830 if (usvc->fwmark)
2831 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2832 else
2833 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2834 &usvc->addr, usvc->port);
2835 *ret_svc = svc;
2837 /* If a full entry was requested, check for the additional fields */
2838 if (full_entry) {
2839 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2840 *nla_netmask;
2841 struct ip_vs_flags flags;
2843 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2844 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2845 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2846 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2847 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2849 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2850 return -EINVAL;
2852 nla_memcpy(&flags, nla_flags, sizeof(flags));
2854 /* prefill flags from service if it already exists */
2855 if (svc)
2856 usvc->flags = svc->flags;
2858 /* set new flags from userland */
2859 usvc->flags = (usvc->flags & ~flags.mask) |
2860 (flags.flags & flags.mask);
2861 usvc->sched_name = nla_data(nla_sched);
2862 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2863 usvc->timeout = nla_get_u32(nla_timeout);
2864 usvc->netmask = nla_get_u32(nla_netmask);
2867 return 0;
2870 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2871 struct nlattr *nla)
2873 struct ip_vs_service_user_kern usvc;
2874 struct ip_vs_service *svc;
2875 int ret;
2877 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2878 return ret ? ERR_PTR(ret) : svc;
2881 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2883 struct nlattr *nl_dest;
2885 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2886 if (!nl_dest)
2887 return -EMSGSIZE;
2889 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2890 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2892 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2893 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2894 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2895 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2896 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2897 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2898 atomic_read(&dest->activeconns));
2899 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2900 atomic_read(&dest->inactconns));
2901 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2902 atomic_read(&dest->persistconns));
2904 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2905 goto nla_put_failure;
2907 nla_nest_end(skb, nl_dest);
2909 return 0;
2911 nla_put_failure:
2912 nla_nest_cancel(skb, nl_dest);
2913 return -EMSGSIZE;
2916 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2917 struct netlink_callback *cb)
2919 void *hdr;
2921 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2922 &ip_vs_genl_family, NLM_F_MULTI,
2923 IPVS_CMD_NEW_DEST);
2924 if (!hdr)
2925 return -EMSGSIZE;
2927 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2928 goto nla_put_failure;
2930 return genlmsg_end(skb, hdr);
2932 nla_put_failure:
2933 genlmsg_cancel(skb, hdr);
2934 return -EMSGSIZE;
2937 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2938 struct netlink_callback *cb)
2940 int idx = 0;
2941 int start = cb->args[0];
2942 struct ip_vs_service *svc;
2943 struct ip_vs_dest *dest;
2944 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2945 struct net *net;
2947 mutex_lock(&__ip_vs_mutex);
2949 /* Try to find the service for which to dump destinations */
2950 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2951 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2952 goto out_err;
2954 net = skb_sknet(skb);
2955 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
2956 if (IS_ERR(svc) || svc == NULL)
2957 goto out_err;
2959 /* Dump the destinations */
2960 list_for_each_entry(dest, &svc->destinations, n_list) {
2961 if (++idx <= start)
2962 continue;
2963 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2964 idx--;
2965 goto nla_put_failure;
2969 nla_put_failure:
2970 cb->args[0] = idx;
2972 out_err:
2973 mutex_unlock(&__ip_vs_mutex);
2975 return skb->len;
2978 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2979 struct nlattr *nla, int full_entry)
2981 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2982 struct nlattr *nla_addr, *nla_port;
2984 /* Parse mandatory identifying destination fields first */
2985 if (nla == NULL ||
2986 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2987 return -EINVAL;
2989 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2990 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2992 if (!(nla_addr && nla_port))
2993 return -EINVAL;
2995 memset(udest, 0, sizeof(*udest));
2997 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2998 udest->port = nla_get_u16(nla_port);
3000 /* If a full entry was requested, check for the additional fields */
3001 if (full_entry) {
3002 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3003 *nla_l_thresh;
3005 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3006 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3007 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3008 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3010 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3011 return -EINVAL;
3013 udest->conn_flags = nla_get_u32(nla_fwd)
3014 & IP_VS_CONN_F_FWD_MASK;
3015 udest->weight = nla_get_u32(nla_weight);
3016 udest->u_threshold = nla_get_u32(nla_u_thresh);
3017 udest->l_threshold = nla_get_u32(nla_l_thresh);
3020 return 0;
3023 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3024 const char *mcast_ifn, __be32 syncid)
3026 struct nlattr *nl_daemon;
3028 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3029 if (!nl_daemon)
3030 return -EMSGSIZE;
3032 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3033 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3034 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3036 nla_nest_end(skb, nl_daemon);
3038 return 0;
3040 nla_put_failure:
3041 nla_nest_cancel(skb, nl_daemon);
3042 return -EMSGSIZE;
3045 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3046 const char *mcast_ifn, __be32 syncid,
3047 struct netlink_callback *cb)
3049 void *hdr;
3050 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3051 &ip_vs_genl_family, NLM_F_MULTI,
3052 IPVS_CMD_NEW_DAEMON);
3053 if (!hdr)
3054 return -EMSGSIZE;
3056 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3057 goto nla_put_failure;
3059 return genlmsg_end(skb, hdr);
3061 nla_put_failure:
3062 genlmsg_cancel(skb, hdr);
3063 return -EMSGSIZE;
3066 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3067 struct netlink_callback *cb)
3069 struct net *net = skb_net(skb);
3070 struct netns_ipvs *ipvs = net_ipvs(net);
3072 mutex_lock(&__ip_vs_mutex);
3073 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3074 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3075 ipvs->master_mcast_ifn,
3076 ipvs->master_syncid, cb) < 0)
3077 goto nla_put_failure;
3079 cb->args[0] = 1;
3082 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3083 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3084 ipvs->backup_mcast_ifn,
3085 ipvs->backup_syncid, cb) < 0)
3086 goto nla_put_failure;
3088 cb->args[1] = 1;
3091 nla_put_failure:
3092 mutex_unlock(&__ip_vs_mutex);
3094 return skb->len;
3097 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3099 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3100 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3101 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3102 return -EINVAL;
3104 return start_sync_thread(net,
3105 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3106 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3107 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3110 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3112 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3113 return -EINVAL;
3115 return stop_sync_thread(net,
3116 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3119 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3121 struct ip_vs_timeout_user t;
3123 __ip_vs_get_timeouts(net, &t);
3125 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3126 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3128 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3129 t.tcp_fin_timeout =
3130 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3132 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3133 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3135 return ip_vs_set_timeout(net, &t);
3138 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3140 struct ip_vs_service *svc = NULL;
3141 struct ip_vs_service_user_kern usvc;
3142 struct ip_vs_dest_user_kern udest;
3143 int ret = 0, cmd;
3144 int need_full_svc = 0, need_full_dest = 0;
3145 struct net *net;
3147 net = skb_sknet(skb);
3148 cmd = info->genlhdr->cmd;
3150 mutex_lock(&__ip_vs_mutex);
3152 if (cmd == IPVS_CMD_FLUSH) {
3153 ret = ip_vs_flush(net);
3154 goto out;
3155 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3156 ret = ip_vs_genl_set_config(net, info->attrs);
3157 goto out;
3158 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3159 cmd == IPVS_CMD_DEL_DAEMON) {
3161 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3163 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3164 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3165 info->attrs[IPVS_CMD_ATTR_DAEMON],
3166 ip_vs_daemon_policy)) {
3167 ret = -EINVAL;
3168 goto out;
3171 if (cmd == IPVS_CMD_NEW_DAEMON)
3172 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3173 else
3174 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3175 goto out;
3176 } else if (cmd == IPVS_CMD_ZERO &&
3177 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3178 ret = ip_vs_zero_all(net);
3179 goto out;
3182 /* All following commands require a service argument, so check if we
3183 * received a valid one. We need a full service specification when
3184 * adding / editing a service. Only identifying members otherwise. */
3185 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3186 need_full_svc = 1;
3188 ret = ip_vs_genl_parse_service(net, &usvc,
3189 info->attrs[IPVS_CMD_ATTR_SERVICE],
3190 need_full_svc, &svc);
3191 if (ret)
3192 goto out;
3194 /* Unless we're adding a new service, the service must already exist */
3195 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3196 ret = -ESRCH;
3197 goto out;
3200 /* Destination commands require a valid destination argument. For
3201 * adding / editing a destination, we need a full destination
3202 * specification. */
3203 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3204 cmd == IPVS_CMD_DEL_DEST) {
3205 if (cmd != IPVS_CMD_DEL_DEST)
3206 need_full_dest = 1;
3208 ret = ip_vs_genl_parse_dest(&udest,
3209 info->attrs[IPVS_CMD_ATTR_DEST],
3210 need_full_dest);
3211 if (ret)
3212 goto out;
3215 switch (cmd) {
3216 case IPVS_CMD_NEW_SERVICE:
3217 if (svc == NULL)
3218 ret = ip_vs_add_service(net, &usvc, &svc);
3219 else
3220 ret = -EEXIST;
3221 break;
3222 case IPVS_CMD_SET_SERVICE:
3223 ret = ip_vs_edit_service(svc, &usvc);
3224 break;
3225 case IPVS_CMD_DEL_SERVICE:
3226 ret = ip_vs_del_service(svc);
3227 /* do not use svc, it can be freed */
3228 break;
3229 case IPVS_CMD_NEW_DEST:
3230 ret = ip_vs_add_dest(svc, &udest);
3231 break;
3232 case IPVS_CMD_SET_DEST:
3233 ret = ip_vs_edit_dest(svc, &udest);
3234 break;
3235 case IPVS_CMD_DEL_DEST:
3236 ret = ip_vs_del_dest(svc, &udest);
3237 break;
3238 case IPVS_CMD_ZERO:
3239 ret = ip_vs_zero_service(svc);
3240 break;
3241 default:
3242 ret = -EINVAL;
3245 out:
3246 mutex_unlock(&__ip_vs_mutex);
3248 return ret;
3251 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3253 struct sk_buff *msg;
3254 void *reply;
3255 int ret, cmd, reply_cmd;
3256 struct net *net;
3258 net = skb_sknet(skb);
3259 cmd = info->genlhdr->cmd;
3261 if (cmd == IPVS_CMD_GET_SERVICE)
3262 reply_cmd = IPVS_CMD_NEW_SERVICE;
3263 else if (cmd == IPVS_CMD_GET_INFO)
3264 reply_cmd = IPVS_CMD_SET_INFO;
3265 else if (cmd == IPVS_CMD_GET_CONFIG)
3266 reply_cmd = IPVS_CMD_SET_CONFIG;
3267 else {
3268 pr_err("unknown Generic Netlink command\n");
3269 return -EINVAL;
3272 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3273 if (!msg)
3274 return -ENOMEM;
3276 mutex_lock(&__ip_vs_mutex);
3278 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3279 if (reply == NULL)
3280 goto nla_put_failure;
3282 switch (cmd) {
3283 case IPVS_CMD_GET_SERVICE:
3285 struct ip_vs_service *svc;
3287 svc = ip_vs_genl_find_service(net,
3288 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3289 if (IS_ERR(svc)) {
3290 ret = PTR_ERR(svc);
3291 goto out_err;
3292 } else if (svc) {
3293 ret = ip_vs_genl_fill_service(msg, svc);
3294 if (ret)
3295 goto nla_put_failure;
3296 } else {
3297 ret = -ESRCH;
3298 goto out_err;
3301 break;
3304 case IPVS_CMD_GET_CONFIG:
3306 struct ip_vs_timeout_user t;
3308 __ip_vs_get_timeouts(net, &t);
3309 #ifdef CONFIG_IP_VS_PROTO_TCP
3310 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3311 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3312 t.tcp_fin_timeout);
3313 #endif
3314 #ifdef CONFIG_IP_VS_PROTO_UDP
3315 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3316 #endif
3318 break;
3321 case IPVS_CMD_GET_INFO:
3322 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3323 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3324 ip_vs_conn_tab_size);
3325 break;
3328 genlmsg_end(msg, reply);
3329 ret = genlmsg_reply(msg, info);
3330 goto out;
3332 nla_put_failure:
3333 pr_err("not enough space in Netlink message\n");
3334 ret = -EMSGSIZE;
3336 out_err:
3337 nlmsg_free(msg);
3338 out:
3339 mutex_unlock(&__ip_vs_mutex);
3341 return ret;
3345 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3347 .cmd = IPVS_CMD_NEW_SERVICE,
3348 .flags = GENL_ADMIN_PERM,
3349 .policy = ip_vs_cmd_policy,
3350 .doit = ip_vs_genl_set_cmd,
3353 .cmd = IPVS_CMD_SET_SERVICE,
3354 .flags = GENL_ADMIN_PERM,
3355 .policy = ip_vs_cmd_policy,
3356 .doit = ip_vs_genl_set_cmd,
3359 .cmd = IPVS_CMD_DEL_SERVICE,
3360 .flags = GENL_ADMIN_PERM,
3361 .policy = ip_vs_cmd_policy,
3362 .doit = ip_vs_genl_set_cmd,
3365 .cmd = IPVS_CMD_GET_SERVICE,
3366 .flags = GENL_ADMIN_PERM,
3367 .doit = ip_vs_genl_get_cmd,
3368 .dumpit = ip_vs_genl_dump_services,
3369 .policy = ip_vs_cmd_policy,
3372 .cmd = IPVS_CMD_NEW_DEST,
3373 .flags = GENL_ADMIN_PERM,
3374 .policy = ip_vs_cmd_policy,
3375 .doit = ip_vs_genl_set_cmd,
3378 .cmd = IPVS_CMD_SET_DEST,
3379 .flags = GENL_ADMIN_PERM,
3380 .policy = ip_vs_cmd_policy,
3381 .doit = ip_vs_genl_set_cmd,
3384 .cmd = IPVS_CMD_DEL_DEST,
3385 .flags = GENL_ADMIN_PERM,
3386 .policy = ip_vs_cmd_policy,
3387 .doit = ip_vs_genl_set_cmd,
3390 .cmd = IPVS_CMD_GET_DEST,
3391 .flags = GENL_ADMIN_PERM,
3392 .policy = ip_vs_cmd_policy,
3393 .dumpit = ip_vs_genl_dump_dests,
3396 .cmd = IPVS_CMD_NEW_DAEMON,
3397 .flags = GENL_ADMIN_PERM,
3398 .policy = ip_vs_cmd_policy,
3399 .doit = ip_vs_genl_set_cmd,
3402 .cmd = IPVS_CMD_DEL_DAEMON,
3403 .flags = GENL_ADMIN_PERM,
3404 .policy = ip_vs_cmd_policy,
3405 .doit = ip_vs_genl_set_cmd,
3408 .cmd = IPVS_CMD_GET_DAEMON,
3409 .flags = GENL_ADMIN_PERM,
3410 .dumpit = ip_vs_genl_dump_daemons,
3413 .cmd = IPVS_CMD_SET_CONFIG,
3414 .flags = GENL_ADMIN_PERM,
3415 .policy = ip_vs_cmd_policy,
3416 .doit = ip_vs_genl_set_cmd,
3419 .cmd = IPVS_CMD_GET_CONFIG,
3420 .flags = GENL_ADMIN_PERM,
3421 .doit = ip_vs_genl_get_cmd,
3424 .cmd = IPVS_CMD_GET_INFO,
3425 .flags = GENL_ADMIN_PERM,
3426 .doit = ip_vs_genl_get_cmd,
3429 .cmd = IPVS_CMD_ZERO,
3430 .flags = GENL_ADMIN_PERM,
3431 .policy = ip_vs_cmd_policy,
3432 .doit = ip_vs_genl_set_cmd,
3435 .cmd = IPVS_CMD_FLUSH,
3436 .flags = GENL_ADMIN_PERM,
3437 .doit = ip_vs_genl_set_cmd,
3441 static int __init ip_vs_genl_register(void)
3443 return genl_register_family_with_ops(&ip_vs_genl_family,
3444 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3447 static void ip_vs_genl_unregister(void)
3449 genl_unregister_family(&ip_vs_genl_family);
3452 /* End of Generic Netlink interface definitions */
3455 * per netns intit/exit func.
3457 int __net_init __ip_vs_control_init(struct net *net)
3459 int idx;
3460 struct netns_ipvs *ipvs = net_ipvs(net);
3462 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3463 return -EPERM;
3465 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3466 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3468 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3469 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3470 sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
3471 vs_vars);
3472 if (sysctl_header == NULL)
3473 goto err_reg;
3474 ip_vs_new_estimator(net, &ip_vs_stats);
3475 return 0;
3477 err_reg:
3478 return -ENOMEM;
3481 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3483 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3484 return;
3486 ip_vs_kill_estimator(net, &ip_vs_stats);
3487 unregister_net_sysctl_table(sysctl_header);
3488 proc_net_remove(net, "ip_vs_stats");
3489 proc_net_remove(net, "ip_vs");
3492 static struct pernet_operations ipvs_control_ops = {
3493 .init = __ip_vs_control_init,
3494 .exit = __ip_vs_control_cleanup,
3497 int __init ip_vs_control_init(void)
3499 int idx;
3500 int ret;
3502 EnterFunction(2);
3504 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3505 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3506 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3507 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3510 ret = register_pernet_subsys(&ipvs_control_ops);
3511 if (ret) {
3512 pr_err("cannot register namespace.\n");
3513 goto err;
3516 smp_wmb(); /* Do we really need it now ? */
3518 ret = nf_register_sockopt(&ip_vs_sockopts);
3519 if (ret) {
3520 pr_err("cannot register sockopt.\n");
3521 goto err_net;
3524 ret = ip_vs_genl_register();
3525 if (ret) {
3526 pr_err("cannot register Generic Netlink interface.\n");
3527 nf_unregister_sockopt(&ip_vs_sockopts);
3528 goto err_net;
3531 /* Hook the defense timer */
3532 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3534 LeaveFunction(2);
3535 return 0;
3537 err_net:
3538 unregister_pernet_subsys(&ipvs_control_ops);
3539 err:
3540 return ret;
3544 void ip_vs_control_cleanup(void)
3546 EnterFunction(2);
3547 ip_vs_trash_cleanup();
3548 cancel_delayed_work_sync(&defense_work);
3549 cancel_work_sync(&defense_work.work);
3550 unregister_pernet_subsys(&ipvs_control_ops);
3551 ip_vs_genl_unregister();
3552 nf_unregister_sockopt(&ip_vs_sockopts);
3553 LeaveFunction(2);