ipv6: Convert to use flowi6 where applicable.
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
bloba60b20fa142e721e1805963c24d8abb00565dc62
1 /*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
17 * Changes:
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
51 #include <asm/uaccess.h>
53 #include <net/ip_vs.h>
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
61 /* sysctl variables */
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
66 int ip_vs_get_debug_level(void)
68 return sysctl_ip_vs_debug_level;
70 #endif
72 #ifdef CONFIG_IP_VS_IPV6
73 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74 static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
77 struct rt6_info *rt;
78 struct flowi6 fl6 = {
79 .daddr = *addr,
82 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
83 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
84 return 1;
86 return 0;
88 #endif
90 * update_defense_level is called from keventd and from sysctl,
91 * so it needs to protect itself from softirqs
93 static void update_defense_level(struct netns_ipvs *ipvs)
95 struct sysinfo i;
96 static int old_secure_tcp = 0;
97 int availmem;
98 int nomem;
99 int to_change = -1;
101 /* we only count free and buffered memory (in pages) */
102 si_meminfo(&i);
103 availmem = i.freeram + i.bufferram;
104 /* however in linux 2.5 the i.bufferram is total page cache size,
105 we need adjust it */
106 /* si_swapinfo(&i); */
107 /* availmem = availmem - (i.totalswap - i.freeswap); */
109 nomem = (availmem < ipvs->sysctl_amemthresh);
111 local_bh_disable();
113 /* drop_entry */
114 spin_lock(&ipvs->dropentry_lock);
115 switch (ipvs->sysctl_drop_entry) {
116 case 0:
117 atomic_set(&ipvs->dropentry, 0);
118 break;
119 case 1:
120 if (nomem) {
121 atomic_set(&ipvs->dropentry, 1);
122 ipvs->sysctl_drop_entry = 2;
123 } else {
124 atomic_set(&ipvs->dropentry, 0);
126 break;
127 case 2:
128 if (nomem) {
129 atomic_set(&ipvs->dropentry, 1);
130 } else {
131 atomic_set(&ipvs->dropentry, 0);
132 ipvs->sysctl_drop_entry = 1;
134 break;
135 case 3:
136 atomic_set(&ipvs->dropentry, 1);
137 break;
139 spin_unlock(&ipvs->dropentry_lock);
141 /* drop_packet */
142 spin_lock(&ipvs->droppacket_lock);
143 switch (ipvs->sysctl_drop_packet) {
144 case 0:
145 ipvs->drop_rate = 0;
146 break;
147 case 1:
148 if (nomem) {
149 ipvs->drop_rate = ipvs->drop_counter
150 = ipvs->sysctl_amemthresh /
151 (ipvs->sysctl_amemthresh-availmem);
152 ipvs->sysctl_drop_packet = 2;
153 } else {
154 ipvs->drop_rate = 0;
156 break;
157 case 2:
158 if (nomem) {
159 ipvs->drop_rate = ipvs->drop_counter
160 = ipvs->sysctl_amemthresh /
161 (ipvs->sysctl_amemthresh-availmem);
162 } else {
163 ipvs->drop_rate = 0;
164 ipvs->sysctl_drop_packet = 1;
166 break;
167 case 3:
168 ipvs->drop_rate = ipvs->sysctl_am_droprate;
169 break;
171 spin_unlock(&ipvs->droppacket_lock);
173 /* secure_tcp */
174 spin_lock(&ipvs->securetcp_lock);
175 switch (ipvs->sysctl_secure_tcp) {
176 case 0:
177 if (old_secure_tcp >= 2)
178 to_change = 0;
179 break;
180 case 1:
181 if (nomem) {
182 if (old_secure_tcp < 2)
183 to_change = 1;
184 ipvs->sysctl_secure_tcp = 2;
185 } else {
186 if (old_secure_tcp >= 2)
187 to_change = 0;
189 break;
190 case 2:
191 if (nomem) {
192 if (old_secure_tcp < 2)
193 to_change = 1;
194 } else {
195 if (old_secure_tcp >= 2)
196 to_change = 0;
197 ipvs->sysctl_secure_tcp = 1;
199 break;
200 case 3:
201 if (old_secure_tcp < 2)
202 to_change = 1;
203 break;
205 old_secure_tcp = ipvs->sysctl_secure_tcp;
206 if (to_change >= 0)
207 ip_vs_protocol_timeout_change(ipvs,
208 ipvs->sysctl_secure_tcp > 1);
209 spin_unlock(&ipvs->securetcp_lock);
211 local_bh_enable();
216 * Timer for checking the defense
218 #define DEFENSE_TIMER_PERIOD 1*HZ
220 static void defense_work_handler(struct work_struct *work)
222 struct netns_ipvs *ipvs =
223 container_of(work, struct netns_ipvs, defense_work.work);
225 update_defense_level(ipvs);
226 if (atomic_read(&ipvs->dropentry))
227 ip_vs_random_dropentry(ipvs->net);
228 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
232 ip_vs_use_count_inc(void)
234 return try_module_get(THIS_MODULE);
237 void
238 ip_vs_use_count_dec(void)
240 module_put(THIS_MODULE);
245 * Hash table: for virtual service lookups
247 #define IP_VS_SVC_TAB_BITS 8
248 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
249 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
251 /* the service table hashed by <protocol, addr, port> */
252 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
253 /* the service table hashed by fwmark */
254 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
258 * Returns hash value for virtual service
260 static inline unsigned
261 ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
262 const union nf_inet_addr *addr, __be16 port)
264 register unsigned porth = ntohs(port);
265 __be32 addr_fold = addr->ip;
267 #ifdef CONFIG_IP_VS_IPV6
268 if (af == AF_INET6)
269 addr_fold = addr->ip6[0]^addr->ip6[1]^
270 addr->ip6[2]^addr->ip6[3];
271 #endif
272 addr_fold ^= ((size_t)net>>8);
274 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
275 & IP_VS_SVC_TAB_MASK;
279 * Returns hash value of fwmark for virtual service lookup
281 static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
283 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
287 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
288 * or in the ip_vs_svc_fwm_table by fwmark.
289 * Should be called with locked tables.
291 static int ip_vs_svc_hash(struct ip_vs_service *svc)
293 unsigned hash;
295 if (svc->flags & IP_VS_SVC_F_HASHED) {
296 pr_err("%s(): request for already hashed, called from %pF\n",
297 __func__, __builtin_return_address(0));
298 return 0;
301 if (svc->fwmark == 0) {
303 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
305 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
306 &svc->addr, svc->port);
307 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
308 } else {
310 * Hash it by fwmark in svc_fwm_table
312 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
313 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
316 svc->flags |= IP_VS_SVC_F_HASHED;
317 /* increase its refcnt because it is referenced by the svc table */
318 atomic_inc(&svc->refcnt);
319 return 1;
324 * Unhashes a service from svc_table / svc_fwm_table.
325 * Should be called with locked tables.
327 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
329 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
330 pr_err("%s(): request for unhash flagged, called from %pF\n",
331 __func__, __builtin_return_address(0));
332 return 0;
335 if (svc->fwmark == 0) {
336 /* Remove it from the svc_table table */
337 list_del(&svc->s_list);
338 } else {
339 /* Remove it from the svc_fwm_table table */
340 list_del(&svc->f_list);
343 svc->flags &= ~IP_VS_SVC_F_HASHED;
344 atomic_dec(&svc->refcnt);
345 return 1;
350 * Get service by {netns, proto,addr,port} in the service table.
352 static inline struct ip_vs_service *
353 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
354 const union nf_inet_addr *vaddr, __be16 vport)
356 unsigned hash;
357 struct ip_vs_service *svc;
359 /* Check for "full" addressed entries */
360 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
362 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
363 if ((svc->af == af)
364 && ip_vs_addr_equal(af, &svc->addr, vaddr)
365 && (svc->port == vport)
366 && (svc->protocol == protocol)
367 && net_eq(svc->net, net)) {
368 /* HIT */
369 return svc;
373 return NULL;
378 * Get service by {fwmark} in the service table.
380 static inline struct ip_vs_service *
381 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
383 unsigned hash;
384 struct ip_vs_service *svc;
386 /* Check for fwmark addressed entries */
387 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
389 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
390 if (svc->fwmark == fwmark && svc->af == af
391 && net_eq(svc->net, net)) {
392 /* HIT */
393 return svc;
397 return NULL;
400 struct ip_vs_service *
401 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
402 const union nf_inet_addr *vaddr, __be16 vport)
404 struct ip_vs_service *svc;
405 struct netns_ipvs *ipvs = net_ipvs(net);
407 read_lock(&__ip_vs_svc_lock);
410 * Check the table hashed by fwmark first
412 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
413 if (fwmark && svc)
414 goto out;
417 * Check the table hashed by <protocol,addr,port>
418 * for "full" addressed entries
420 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
422 if (svc == NULL
423 && protocol == IPPROTO_TCP
424 && atomic_read(&ipvs->ftpsvc_counter)
425 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
427 * Check if ftp service entry exists, the packet
428 * might belong to FTP data connections.
430 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
433 if (svc == NULL
434 && atomic_read(&ipvs->nullsvc_counter)) {
436 * Check if the catch-all port (port zero) exists
438 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
441 out:
442 if (svc)
443 atomic_inc(&svc->usecnt);
444 read_unlock(&__ip_vs_svc_lock);
446 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
447 fwmark, ip_vs_proto_name(protocol),
448 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
449 svc ? "hit" : "not hit");
451 return svc;
455 static inline void
456 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
458 atomic_inc(&svc->refcnt);
459 dest->svc = svc;
462 static void
463 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
465 struct ip_vs_service *svc = dest->svc;
467 dest->svc = NULL;
468 if (atomic_dec_and_test(&svc->refcnt)) {
469 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
470 svc->fwmark,
471 IP_VS_DBG_ADDR(svc->af, &svc->addr),
472 ntohs(svc->port), atomic_read(&svc->usecnt));
473 free_percpu(svc->stats.cpustats);
474 kfree(svc);
480 * Returns hash value for real service
482 static inline unsigned ip_vs_rs_hashkey(int af,
483 const union nf_inet_addr *addr,
484 __be16 port)
486 register unsigned porth = ntohs(port);
487 __be32 addr_fold = addr->ip;
489 #ifdef CONFIG_IP_VS_IPV6
490 if (af == AF_INET6)
491 addr_fold = addr->ip6[0]^addr->ip6[1]^
492 addr->ip6[2]^addr->ip6[3];
493 #endif
495 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
496 & IP_VS_RTAB_MASK;
500 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
501 * should be called with locked tables.
503 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
505 unsigned hash;
507 if (!list_empty(&dest->d_list)) {
508 return 0;
512 * Hash by proto,addr,port,
513 * which are the parameters of the real service.
515 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
517 list_add(&dest->d_list, &ipvs->rs_table[hash]);
519 return 1;
523 * UNhashes ip_vs_dest from rs_table.
524 * should be called with locked tables.
526 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
529 * Remove it from the rs_table table.
531 if (!list_empty(&dest->d_list)) {
532 list_del(&dest->d_list);
533 INIT_LIST_HEAD(&dest->d_list);
536 return 1;
540 * Lookup real service by <proto,addr,port> in the real service table.
542 struct ip_vs_dest *
543 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
544 const union nf_inet_addr *daddr,
545 __be16 dport)
547 struct netns_ipvs *ipvs = net_ipvs(net);
548 unsigned hash;
549 struct ip_vs_dest *dest;
552 * Check for "full" addressed entries
553 * Return the first found entry
555 hash = ip_vs_rs_hashkey(af, daddr, dport);
557 read_lock(&ipvs->rs_lock);
558 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
559 if ((dest->af == af)
560 && ip_vs_addr_equal(af, &dest->addr, daddr)
561 && (dest->port == dport)
562 && ((dest->protocol == protocol) ||
563 dest->vfwmark)) {
564 /* HIT */
565 read_unlock(&ipvs->rs_lock);
566 return dest;
569 read_unlock(&ipvs->rs_lock);
571 return NULL;
575 * Lookup destination by {addr,port} in the given service
577 static struct ip_vs_dest *
578 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
579 __be16 dport)
581 struct ip_vs_dest *dest;
584 * Find the destination for the given service
586 list_for_each_entry(dest, &svc->destinations, n_list) {
587 if ((dest->af == svc->af)
588 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
589 && (dest->port == dport)) {
590 /* HIT */
591 return dest;
595 return NULL;
599 * Find destination by {daddr,dport,vaddr,protocol}
600 * Cretaed to be used in ip_vs_process_message() in
601 * the backup synchronization daemon. It finds the
602 * destination to be bound to the received connection
603 * on the backup.
605 * ip_vs_lookup_real_service() looked promissing, but
606 * seems not working as expected.
608 struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
609 const union nf_inet_addr *daddr,
610 __be16 dport,
611 const union nf_inet_addr *vaddr,
612 __be16 vport, __u16 protocol, __u32 fwmark)
614 struct ip_vs_dest *dest;
615 struct ip_vs_service *svc;
617 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
618 if (!svc)
619 return NULL;
620 dest = ip_vs_lookup_dest(svc, daddr, dport);
621 if (dest)
622 atomic_inc(&dest->refcnt);
623 ip_vs_service_put(svc);
624 return dest;
628 * Lookup dest by {svc,addr,port} in the destination trash.
629 * The destination trash is used to hold the destinations that are removed
630 * from the service table but are still referenced by some conn entries.
631 * The reason to add the destination trash is when the dest is temporary
632 * down (either by administrator or by monitor program), the dest can be
633 * picked back from the trash, the remaining connections to the dest can
634 * continue, and the counting information of the dest is also useful for
635 * scheduling.
637 static struct ip_vs_dest *
638 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
639 __be16 dport)
641 struct ip_vs_dest *dest, *nxt;
642 struct netns_ipvs *ipvs = net_ipvs(svc->net);
645 * Find the destination in trash
647 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
648 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
649 "dest->refcnt=%d\n",
650 dest->vfwmark,
651 IP_VS_DBG_ADDR(svc->af, &dest->addr),
652 ntohs(dest->port),
653 atomic_read(&dest->refcnt));
654 if (dest->af == svc->af &&
655 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
656 dest->port == dport &&
657 dest->vfwmark == svc->fwmark &&
658 dest->protocol == svc->protocol &&
659 (svc->fwmark ||
660 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
661 dest->vport == svc->port))) {
662 /* HIT */
663 return dest;
667 * Try to purge the destination from trash if not referenced
669 if (atomic_read(&dest->refcnt) == 1) {
670 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
671 "from trash\n",
672 dest->vfwmark,
673 IP_VS_DBG_ADDR(svc->af, &dest->addr),
674 ntohs(dest->port));
675 list_del(&dest->n_list);
676 ip_vs_dst_reset(dest);
677 __ip_vs_unbind_svc(dest);
678 free_percpu(dest->stats.cpustats);
679 kfree(dest);
683 return NULL;
688 * Clean up all the destinations in the trash
689 * Called by the ip_vs_control_cleanup()
691 * When the ip_vs_control_clearup is activated by ipvs module exit,
692 * the service tables must have been flushed and all the connections
693 * are expired, and the refcnt of each destination in the trash must
694 * be 1, so we simply release them here.
696 static void ip_vs_trash_cleanup(struct net *net)
698 struct ip_vs_dest *dest, *nxt;
699 struct netns_ipvs *ipvs = net_ipvs(net);
701 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
702 list_del(&dest->n_list);
703 ip_vs_dst_reset(dest);
704 __ip_vs_unbind_svc(dest);
705 free_percpu(dest->stats.cpustats);
706 kfree(dest);
711 static void
712 ip_vs_zero_stats(struct ip_vs_stats *stats)
714 spin_lock_bh(&stats->lock);
716 memset(&stats->ustats, 0, sizeof(stats->ustats));
717 ip_vs_zero_estimator(stats);
719 spin_unlock_bh(&stats->lock);
723 * Update a destination in the given service
725 static void
726 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
727 struct ip_vs_dest_user_kern *udest, int add)
729 struct netns_ipvs *ipvs = net_ipvs(svc->net);
730 int conn_flags;
732 /* set the weight and the flags */
733 atomic_set(&dest->weight, udest->weight);
734 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
735 conn_flags |= IP_VS_CONN_F_INACTIVE;
737 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
738 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
739 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
740 } else {
742 * Put the real service in rs_table if not present.
743 * For now only for NAT!
745 write_lock_bh(&ipvs->rs_lock);
746 ip_vs_rs_hash(ipvs, dest);
747 write_unlock_bh(&ipvs->rs_lock);
749 atomic_set(&dest->conn_flags, conn_flags);
751 /* bind the service */
752 if (!dest->svc) {
753 __ip_vs_bind_svc(dest, svc);
754 } else {
755 if (dest->svc != svc) {
756 __ip_vs_unbind_svc(dest);
757 ip_vs_zero_stats(&dest->stats);
758 __ip_vs_bind_svc(dest, svc);
762 /* set the dest status flags */
763 dest->flags |= IP_VS_DEST_F_AVAILABLE;
765 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
766 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
767 dest->u_threshold = udest->u_threshold;
768 dest->l_threshold = udest->l_threshold;
770 spin_lock_bh(&dest->dst_lock);
771 ip_vs_dst_reset(dest);
772 spin_unlock_bh(&dest->dst_lock);
774 if (add)
775 ip_vs_new_estimator(svc->net, &dest->stats);
777 write_lock_bh(&__ip_vs_svc_lock);
779 /* Wait until all other svc users go away */
780 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
782 if (add) {
783 list_add(&dest->n_list, &svc->destinations);
784 svc->num_dests++;
787 /* call the update_service, because server weight may be changed */
788 if (svc->scheduler->update_service)
789 svc->scheduler->update_service(svc);
791 write_unlock_bh(&__ip_vs_svc_lock);
796 * Create a destination for the given service
798 static int
799 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
800 struct ip_vs_dest **dest_p)
802 struct ip_vs_dest *dest;
803 unsigned atype;
805 EnterFunction(2);
807 #ifdef CONFIG_IP_VS_IPV6
808 if (svc->af == AF_INET6) {
809 atype = ipv6_addr_type(&udest->addr.in6);
810 if ((!(atype & IPV6_ADDR_UNICAST) ||
811 atype & IPV6_ADDR_LINKLOCAL) &&
812 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
813 return -EINVAL;
814 } else
815 #endif
817 atype = inet_addr_type(svc->net, udest->addr.ip);
818 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
819 return -EINVAL;
822 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
823 if (dest == NULL) {
824 pr_err("%s(): no memory.\n", __func__);
825 return -ENOMEM;
827 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
828 if (!dest->stats.cpustats) {
829 pr_err("%s() alloc_percpu failed\n", __func__);
830 goto err_alloc;
833 dest->af = svc->af;
834 dest->protocol = svc->protocol;
835 dest->vaddr = svc->addr;
836 dest->vport = svc->port;
837 dest->vfwmark = svc->fwmark;
838 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
839 dest->port = udest->port;
841 atomic_set(&dest->activeconns, 0);
842 atomic_set(&dest->inactconns, 0);
843 atomic_set(&dest->persistconns, 0);
844 atomic_set(&dest->refcnt, 1);
846 INIT_LIST_HEAD(&dest->d_list);
847 spin_lock_init(&dest->dst_lock);
848 spin_lock_init(&dest->stats.lock);
849 __ip_vs_update_dest(svc, dest, udest, 1);
851 *dest_p = dest;
853 LeaveFunction(2);
854 return 0;
856 err_alloc:
857 kfree(dest);
858 return -ENOMEM;
863 * Add a destination into an existing service
865 static int
866 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
868 struct ip_vs_dest *dest;
869 union nf_inet_addr daddr;
870 __be16 dport = udest->port;
871 int ret;
873 EnterFunction(2);
875 if (udest->weight < 0) {
876 pr_err("%s(): server weight less than zero\n", __func__);
877 return -ERANGE;
880 if (udest->l_threshold > udest->u_threshold) {
881 pr_err("%s(): lower threshold is higher than upper threshold\n",
882 __func__);
883 return -ERANGE;
886 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
889 * Check if the dest already exists in the list
891 dest = ip_vs_lookup_dest(svc, &daddr, dport);
893 if (dest != NULL) {
894 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
895 return -EEXIST;
899 * Check if the dest already exists in the trash and
900 * is from the same service
902 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
904 if (dest != NULL) {
905 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
906 "dest->refcnt=%d, service %u/%s:%u\n",
907 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
908 atomic_read(&dest->refcnt),
909 dest->vfwmark,
910 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
911 ntohs(dest->vport));
914 * Get the destination from the trash
916 list_del(&dest->n_list);
918 __ip_vs_update_dest(svc, dest, udest, 1);
919 ret = 0;
920 } else {
922 * Allocate and initialize the dest structure
924 ret = ip_vs_new_dest(svc, udest, &dest);
926 LeaveFunction(2);
928 return ret;
933 * Edit a destination in the given service
935 static int
936 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
938 struct ip_vs_dest *dest;
939 union nf_inet_addr daddr;
940 __be16 dport = udest->port;
942 EnterFunction(2);
944 if (udest->weight < 0) {
945 pr_err("%s(): server weight less than zero\n", __func__);
946 return -ERANGE;
949 if (udest->l_threshold > udest->u_threshold) {
950 pr_err("%s(): lower threshold is higher than upper threshold\n",
951 __func__);
952 return -ERANGE;
955 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
958 * Lookup the destination list
960 dest = ip_vs_lookup_dest(svc, &daddr, dport);
962 if (dest == NULL) {
963 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
964 return -ENOENT;
967 __ip_vs_update_dest(svc, dest, udest, 0);
968 LeaveFunction(2);
970 return 0;
975 * Delete a destination (must be already unlinked from the service)
977 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
979 struct netns_ipvs *ipvs = net_ipvs(net);
981 ip_vs_kill_estimator(net, &dest->stats);
984 * Remove it from the d-linked list with the real services.
986 write_lock_bh(&ipvs->rs_lock);
987 ip_vs_rs_unhash(dest);
988 write_unlock_bh(&ipvs->rs_lock);
991 * Decrease the refcnt of the dest, and free the dest
992 * if nobody refers to it (refcnt=0). Otherwise, throw
993 * the destination into the trash.
995 if (atomic_dec_and_test(&dest->refcnt)) {
996 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
997 dest->vfwmark,
998 IP_VS_DBG_ADDR(dest->af, &dest->addr),
999 ntohs(dest->port));
1000 ip_vs_dst_reset(dest);
1001 /* simply decrease svc->refcnt here, let the caller check
1002 and release the service if nobody refers to it.
1003 Only user context can release destination and service,
1004 and only one user context can update virtual service at a
1005 time, so the operation here is OK */
1006 atomic_dec(&dest->svc->refcnt);
1007 free_percpu(dest->stats.cpustats);
1008 kfree(dest);
1009 } else {
1010 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1011 "dest->refcnt=%d\n",
1012 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1013 ntohs(dest->port),
1014 atomic_read(&dest->refcnt));
1015 list_add(&dest->n_list, &ipvs->dest_trash);
1016 atomic_inc(&dest->refcnt);
1022 * Unlink a destination from the given service
1024 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1025 struct ip_vs_dest *dest,
1026 int svcupd)
1028 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1031 * Remove it from the d-linked destination list.
1033 list_del(&dest->n_list);
1034 svc->num_dests--;
1037 * Call the update_service function of its scheduler
1039 if (svcupd && svc->scheduler->update_service)
1040 svc->scheduler->update_service(svc);
1045 * Delete a destination server in the given service
1047 static int
1048 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1050 struct ip_vs_dest *dest;
1051 __be16 dport = udest->port;
1053 EnterFunction(2);
1055 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1057 if (dest == NULL) {
1058 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1059 return -ENOENT;
1062 write_lock_bh(&__ip_vs_svc_lock);
1065 * Wait until all other svc users go away.
1067 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1070 * Unlink dest from the service
1072 __ip_vs_unlink_dest(svc, dest, 1);
1074 write_unlock_bh(&__ip_vs_svc_lock);
1077 * Delete the destination
1079 __ip_vs_del_dest(svc->net, dest);
1081 LeaveFunction(2);
1083 return 0;
1088 * Add a service into the service hash table
1090 static int
1091 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1092 struct ip_vs_service **svc_p)
1094 int ret = 0;
1095 struct ip_vs_scheduler *sched = NULL;
1096 struct ip_vs_pe *pe = NULL;
1097 struct ip_vs_service *svc = NULL;
1098 struct netns_ipvs *ipvs = net_ipvs(net);
1100 /* increase the module use count */
1101 ip_vs_use_count_inc();
1103 /* Lookup the scheduler by 'u->sched_name' */
1104 sched = ip_vs_scheduler_get(u->sched_name);
1105 if (sched == NULL) {
1106 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1107 ret = -ENOENT;
1108 goto out_err;
1111 if (u->pe_name && *u->pe_name) {
1112 pe = ip_vs_pe_getbyname(u->pe_name);
1113 if (pe == NULL) {
1114 pr_info("persistence engine module ip_vs_pe_%s "
1115 "not found\n", u->pe_name);
1116 ret = -ENOENT;
1117 goto out_err;
1121 #ifdef CONFIG_IP_VS_IPV6
1122 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1123 ret = -EINVAL;
1124 goto out_err;
1126 #endif
1128 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1129 if (svc == NULL) {
1130 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1131 ret = -ENOMEM;
1132 goto out_err;
1134 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1135 if (!svc->stats.cpustats) {
1136 pr_err("%s() alloc_percpu failed\n", __func__);
1137 goto out_err;
1140 /* I'm the first user of the service */
1141 atomic_set(&svc->usecnt, 0);
1142 atomic_set(&svc->refcnt, 0);
1144 svc->af = u->af;
1145 svc->protocol = u->protocol;
1146 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1147 svc->port = u->port;
1148 svc->fwmark = u->fwmark;
1149 svc->flags = u->flags;
1150 svc->timeout = u->timeout * HZ;
1151 svc->netmask = u->netmask;
1152 svc->net = net;
1154 INIT_LIST_HEAD(&svc->destinations);
1155 rwlock_init(&svc->sched_lock);
1156 spin_lock_init(&svc->stats.lock);
1158 /* Bind the scheduler */
1159 ret = ip_vs_bind_scheduler(svc, sched);
1160 if (ret)
1161 goto out_err;
1162 sched = NULL;
1164 /* Bind the ct retriever */
1165 ip_vs_bind_pe(svc, pe);
1166 pe = NULL;
1168 /* Update the virtual service counters */
1169 if (svc->port == FTPPORT)
1170 atomic_inc(&ipvs->ftpsvc_counter);
1171 else if (svc->port == 0)
1172 atomic_inc(&ipvs->nullsvc_counter);
1174 ip_vs_new_estimator(net, &svc->stats);
1176 /* Count only IPv4 services for old get/setsockopt interface */
1177 if (svc->af == AF_INET)
1178 ipvs->num_services++;
1180 /* Hash the service into the service table */
1181 write_lock_bh(&__ip_vs_svc_lock);
1182 ip_vs_svc_hash(svc);
1183 write_unlock_bh(&__ip_vs_svc_lock);
1185 *svc_p = svc;
1186 return 0;
1189 out_err:
1190 if (svc != NULL) {
1191 ip_vs_unbind_scheduler(svc);
1192 if (svc->inc) {
1193 local_bh_disable();
1194 ip_vs_app_inc_put(svc->inc);
1195 local_bh_enable();
1197 if (svc->stats.cpustats)
1198 free_percpu(svc->stats.cpustats);
1199 kfree(svc);
1201 ip_vs_scheduler_put(sched);
1202 ip_vs_pe_put(pe);
1204 /* decrease the module use count */
1205 ip_vs_use_count_dec();
1207 return ret;
1212 * Edit a service and bind it with a new scheduler
1214 static int
1215 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1217 struct ip_vs_scheduler *sched, *old_sched;
1218 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1219 int ret = 0;
1222 * Lookup the scheduler, by 'u->sched_name'
1224 sched = ip_vs_scheduler_get(u->sched_name);
1225 if (sched == NULL) {
1226 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1227 return -ENOENT;
1229 old_sched = sched;
1231 if (u->pe_name && *u->pe_name) {
1232 pe = ip_vs_pe_getbyname(u->pe_name);
1233 if (pe == NULL) {
1234 pr_info("persistence engine module ip_vs_pe_%s "
1235 "not found\n", u->pe_name);
1236 ret = -ENOENT;
1237 goto out;
1239 old_pe = pe;
1242 #ifdef CONFIG_IP_VS_IPV6
1243 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1244 ret = -EINVAL;
1245 goto out;
1247 #endif
1249 write_lock_bh(&__ip_vs_svc_lock);
1252 * Wait until all other svc users go away.
1254 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1257 * Set the flags and timeout value
1259 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1260 svc->timeout = u->timeout * HZ;
1261 svc->netmask = u->netmask;
1263 old_sched = svc->scheduler;
1264 if (sched != old_sched) {
1266 * Unbind the old scheduler
1268 if ((ret = ip_vs_unbind_scheduler(svc))) {
1269 old_sched = sched;
1270 goto out_unlock;
1274 * Bind the new scheduler
1276 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1278 * If ip_vs_bind_scheduler fails, restore the old
1279 * scheduler.
1280 * The main reason of failure is out of memory.
1282 * The question is if the old scheduler can be
1283 * restored all the time. TODO: if it cannot be
1284 * restored some time, we must delete the service,
1285 * otherwise the system may crash.
1287 ip_vs_bind_scheduler(svc, old_sched);
1288 old_sched = sched;
1289 goto out_unlock;
1293 old_pe = svc->pe;
1294 if (pe != old_pe) {
1295 ip_vs_unbind_pe(svc);
1296 ip_vs_bind_pe(svc, pe);
1299 out_unlock:
1300 write_unlock_bh(&__ip_vs_svc_lock);
1301 out:
1302 ip_vs_scheduler_put(old_sched);
1303 ip_vs_pe_put(old_pe);
1304 return ret;
1309 * Delete a service from the service list
1310 * - The service must be unlinked, unlocked and not referenced!
1311 * - We are called under _bh lock
1313 static void __ip_vs_del_service(struct ip_vs_service *svc)
1315 struct ip_vs_dest *dest, *nxt;
1316 struct ip_vs_scheduler *old_sched;
1317 struct ip_vs_pe *old_pe;
1318 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1320 pr_info("%s: enter\n", __func__);
1322 /* Count only IPv4 services for old get/setsockopt interface */
1323 if (svc->af == AF_INET)
1324 ipvs->num_services--;
1326 ip_vs_kill_estimator(svc->net, &svc->stats);
1328 /* Unbind scheduler */
1329 old_sched = svc->scheduler;
1330 ip_vs_unbind_scheduler(svc);
1331 ip_vs_scheduler_put(old_sched);
1333 /* Unbind persistence engine */
1334 old_pe = svc->pe;
1335 ip_vs_unbind_pe(svc);
1336 ip_vs_pe_put(old_pe);
1338 /* Unbind app inc */
1339 if (svc->inc) {
1340 ip_vs_app_inc_put(svc->inc);
1341 svc->inc = NULL;
1345 * Unlink the whole destination list
1347 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1348 __ip_vs_unlink_dest(svc, dest, 0);
1349 __ip_vs_del_dest(svc->net, dest);
1353 * Update the virtual service counters
1355 if (svc->port == FTPPORT)
1356 atomic_dec(&ipvs->ftpsvc_counter);
1357 else if (svc->port == 0)
1358 atomic_dec(&ipvs->nullsvc_counter);
1361 * Free the service if nobody refers to it
1363 if (atomic_read(&svc->refcnt) == 0) {
1364 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1365 svc->fwmark,
1366 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1367 ntohs(svc->port), atomic_read(&svc->usecnt));
1368 free_percpu(svc->stats.cpustats);
1369 kfree(svc);
1372 /* decrease the module use count */
1373 ip_vs_use_count_dec();
1377 * Unlink a service from list and try to delete it if its refcnt reached 0
1379 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1382 * Unhash it from the service table
1384 write_lock_bh(&__ip_vs_svc_lock);
1386 ip_vs_svc_unhash(svc);
1389 * Wait until all the svc users go away.
1391 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1393 __ip_vs_del_service(svc);
1395 write_unlock_bh(&__ip_vs_svc_lock);
1399 * Delete a service from the service list
1401 static int ip_vs_del_service(struct ip_vs_service *svc)
1403 if (svc == NULL)
1404 return -EEXIST;
1405 ip_vs_unlink_service(svc);
1407 return 0;
1412 * Flush all the virtual services
1414 static int ip_vs_flush(struct net *net)
1416 int idx;
1417 struct ip_vs_service *svc, *nxt;
1420 * Flush the service table hashed by <netns,protocol,addr,port>
1422 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1423 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1424 s_list) {
1425 if (net_eq(svc->net, net))
1426 ip_vs_unlink_service(svc);
1431 * Flush the service table hashed by fwmark
1433 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1434 list_for_each_entry_safe(svc, nxt,
1435 &ip_vs_svc_fwm_table[idx], f_list) {
1436 if (net_eq(svc->net, net))
1437 ip_vs_unlink_service(svc);
1441 return 0;
1446 * Zero counters in a service or all services
1448 static int ip_vs_zero_service(struct ip_vs_service *svc)
1450 struct ip_vs_dest *dest;
1452 write_lock_bh(&__ip_vs_svc_lock);
1453 list_for_each_entry(dest, &svc->destinations, n_list) {
1454 ip_vs_zero_stats(&dest->stats);
1456 ip_vs_zero_stats(&svc->stats);
1457 write_unlock_bh(&__ip_vs_svc_lock);
1458 return 0;
1461 static int ip_vs_zero_all(struct net *net)
1463 int idx;
1464 struct ip_vs_service *svc;
1466 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1467 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1468 if (net_eq(svc->net, net))
1469 ip_vs_zero_service(svc);
1473 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1475 if (net_eq(svc->net, net))
1476 ip_vs_zero_service(svc);
1480 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1481 return 0;
1485 static int
1486 proc_do_defense_mode(ctl_table *table, int write,
1487 void __user *buffer, size_t *lenp, loff_t *ppos)
1489 struct net *net = current->nsproxy->net_ns;
1490 int *valp = table->data;
1491 int val = *valp;
1492 int rc;
1494 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1495 if (write && (*valp != val)) {
1496 if ((*valp < 0) || (*valp > 3)) {
1497 /* Restore the correct value */
1498 *valp = val;
1499 } else {
1500 update_defense_level(net_ipvs(net));
1503 return rc;
1507 static int
1508 proc_do_sync_threshold(ctl_table *table, int write,
1509 void __user *buffer, size_t *lenp, loff_t *ppos)
1511 int *valp = table->data;
1512 int val[2];
1513 int rc;
1515 /* backup the value first */
1516 memcpy(val, valp, sizeof(val));
1518 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1519 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1520 /* Restore the correct value */
1521 memcpy(valp, val, sizeof(val));
1523 return rc;
1526 static int
1527 proc_do_sync_mode(ctl_table *table, int write,
1528 void __user *buffer, size_t *lenp, loff_t *ppos)
1530 int *valp = table->data;
1531 int val = *valp;
1532 int rc;
1534 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1535 if (write && (*valp != val)) {
1536 if ((*valp < 0) || (*valp > 1)) {
1537 /* Restore the correct value */
1538 *valp = val;
1539 } else {
1540 struct net *net = current->nsproxy->net_ns;
1541 ip_vs_sync_switch_mode(net, val);
1544 return rc;
1548 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1549 * Do not change order or insert new entries without
1550 * align with netns init in __ip_vs_control_init()
1553 static struct ctl_table vs_vars[] = {
1555 .procname = "amemthresh",
1556 .maxlen = sizeof(int),
1557 .mode = 0644,
1558 .proc_handler = proc_dointvec,
1561 .procname = "am_droprate",
1562 .maxlen = sizeof(int),
1563 .mode = 0644,
1564 .proc_handler = proc_dointvec,
1567 .procname = "drop_entry",
1568 .maxlen = sizeof(int),
1569 .mode = 0644,
1570 .proc_handler = proc_do_defense_mode,
1573 .procname = "drop_packet",
1574 .maxlen = sizeof(int),
1575 .mode = 0644,
1576 .proc_handler = proc_do_defense_mode,
1578 #ifdef CONFIG_IP_VS_NFCT
1580 .procname = "conntrack",
1581 .maxlen = sizeof(int),
1582 .mode = 0644,
1583 .proc_handler = &proc_dointvec,
1585 #endif
1587 .procname = "secure_tcp",
1588 .maxlen = sizeof(int),
1589 .mode = 0644,
1590 .proc_handler = proc_do_defense_mode,
1593 .procname = "snat_reroute",
1594 .maxlen = sizeof(int),
1595 .mode = 0644,
1596 .proc_handler = &proc_dointvec,
1599 .procname = "sync_version",
1600 .maxlen = sizeof(int),
1601 .mode = 0644,
1602 .proc_handler = &proc_do_sync_mode,
1605 .procname = "cache_bypass",
1606 .maxlen = sizeof(int),
1607 .mode = 0644,
1608 .proc_handler = proc_dointvec,
1611 .procname = "expire_nodest_conn",
1612 .maxlen = sizeof(int),
1613 .mode = 0644,
1614 .proc_handler = proc_dointvec,
1617 .procname = "expire_quiescent_template",
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = proc_dointvec,
1623 .procname = "sync_threshold",
1624 .maxlen =
1625 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1626 .mode = 0644,
1627 .proc_handler = proc_do_sync_threshold,
1630 .procname = "nat_icmp_send",
1631 .maxlen = sizeof(int),
1632 .mode = 0644,
1633 .proc_handler = proc_dointvec,
1635 #ifdef CONFIG_IP_VS_DEBUG
1637 .procname = "debug_level",
1638 .data = &sysctl_ip_vs_debug_level,
1639 .maxlen = sizeof(int),
1640 .mode = 0644,
1641 .proc_handler = proc_dointvec,
1643 #endif
1644 #if 0
1646 .procname = "timeout_established",
1647 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
1650 .proc_handler = proc_dointvec_jiffies,
1653 .procname = "timeout_synsent",
1654 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1655 .maxlen = sizeof(int),
1656 .mode = 0644,
1657 .proc_handler = proc_dointvec_jiffies,
1660 .procname = "timeout_synrecv",
1661 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1662 .maxlen = sizeof(int),
1663 .mode = 0644,
1664 .proc_handler = proc_dointvec_jiffies,
1667 .procname = "timeout_finwait",
1668 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1669 .maxlen = sizeof(int),
1670 .mode = 0644,
1671 .proc_handler = proc_dointvec_jiffies,
1674 .procname = "timeout_timewait",
1675 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1676 .maxlen = sizeof(int),
1677 .mode = 0644,
1678 .proc_handler = proc_dointvec_jiffies,
1681 .procname = "timeout_close",
1682 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1683 .maxlen = sizeof(int),
1684 .mode = 0644,
1685 .proc_handler = proc_dointvec_jiffies,
1688 .procname = "timeout_closewait",
1689 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1690 .maxlen = sizeof(int),
1691 .mode = 0644,
1692 .proc_handler = proc_dointvec_jiffies,
1695 .procname = "timeout_lastack",
1696 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1697 .maxlen = sizeof(int),
1698 .mode = 0644,
1699 .proc_handler = proc_dointvec_jiffies,
1702 .procname = "timeout_listen",
1703 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1704 .maxlen = sizeof(int),
1705 .mode = 0644,
1706 .proc_handler = proc_dointvec_jiffies,
1709 .procname = "timeout_synack",
1710 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1711 .maxlen = sizeof(int),
1712 .mode = 0644,
1713 .proc_handler = proc_dointvec_jiffies,
1716 .procname = "timeout_udp",
1717 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1718 .maxlen = sizeof(int),
1719 .mode = 0644,
1720 .proc_handler = proc_dointvec_jiffies,
1723 .procname = "timeout_icmp",
1724 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1725 .maxlen = sizeof(int),
1726 .mode = 0644,
1727 .proc_handler = proc_dointvec_jiffies,
1729 #endif
1733 const struct ctl_path net_vs_ctl_path[] = {
1734 { .procname = "net", },
1735 { .procname = "ipv4", },
1736 { .procname = "vs", },
1739 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1741 #ifdef CONFIG_PROC_FS
1743 struct ip_vs_iter {
1744 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1745 struct list_head *table;
1746 int bucket;
1750 * Write the contents of the VS rule table to a PROCfs file.
1751 * (It is kept just for backward compatibility)
1753 static inline const char *ip_vs_fwd_name(unsigned flags)
1755 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1756 case IP_VS_CONN_F_LOCALNODE:
1757 return "Local";
1758 case IP_VS_CONN_F_TUNNEL:
1759 return "Tunnel";
1760 case IP_VS_CONN_F_DROUTE:
1761 return "Route";
1762 default:
1763 return "Masq";
1768 /* Get the Nth entry in the two lists */
1769 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1771 struct net *net = seq_file_net(seq);
1772 struct ip_vs_iter *iter = seq->private;
1773 int idx;
1774 struct ip_vs_service *svc;
1776 /* look in hash by protocol */
1777 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1778 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1779 if (net_eq(svc->net, net) && pos-- == 0) {
1780 iter->table = ip_vs_svc_table;
1781 iter->bucket = idx;
1782 return svc;
1787 /* keep looking in fwmark */
1788 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1789 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1790 if (net_eq(svc->net, net) && pos-- == 0) {
1791 iter->table = ip_vs_svc_fwm_table;
1792 iter->bucket = idx;
1793 return svc;
1798 return NULL;
1801 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1802 __acquires(__ip_vs_svc_lock)
1805 read_lock_bh(&__ip_vs_svc_lock);
1806 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1810 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1812 struct list_head *e;
1813 struct ip_vs_iter *iter;
1814 struct ip_vs_service *svc;
1816 ++*pos;
1817 if (v == SEQ_START_TOKEN)
1818 return ip_vs_info_array(seq,0);
1820 svc = v;
1821 iter = seq->private;
1823 if (iter->table == ip_vs_svc_table) {
1824 /* next service in table hashed by protocol */
1825 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1826 return list_entry(e, struct ip_vs_service, s_list);
1829 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1830 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1831 s_list) {
1832 return svc;
1836 iter->table = ip_vs_svc_fwm_table;
1837 iter->bucket = -1;
1838 goto scan_fwmark;
1841 /* next service in hashed by fwmark */
1842 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1843 return list_entry(e, struct ip_vs_service, f_list);
1845 scan_fwmark:
1846 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1847 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1848 f_list)
1849 return svc;
1852 return NULL;
1855 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1856 __releases(__ip_vs_svc_lock)
1858 read_unlock_bh(&__ip_vs_svc_lock);
1862 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1864 if (v == SEQ_START_TOKEN) {
1865 seq_printf(seq,
1866 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1867 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1868 seq_puts(seq,
1869 "Prot LocalAddress:Port Scheduler Flags\n");
1870 seq_puts(seq,
1871 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1872 } else {
1873 const struct ip_vs_service *svc = v;
1874 const struct ip_vs_iter *iter = seq->private;
1875 const struct ip_vs_dest *dest;
1877 if (iter->table == ip_vs_svc_table) {
1878 #ifdef CONFIG_IP_VS_IPV6
1879 if (svc->af == AF_INET6)
1880 seq_printf(seq, "%s [%pI6]:%04X %s ",
1881 ip_vs_proto_name(svc->protocol),
1882 &svc->addr.in6,
1883 ntohs(svc->port),
1884 svc->scheduler->name);
1885 else
1886 #endif
1887 seq_printf(seq, "%s %08X:%04X %s %s ",
1888 ip_vs_proto_name(svc->protocol),
1889 ntohl(svc->addr.ip),
1890 ntohs(svc->port),
1891 svc->scheduler->name,
1892 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1893 } else {
1894 seq_printf(seq, "FWM %08X %s %s",
1895 svc->fwmark, svc->scheduler->name,
1896 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1899 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1900 seq_printf(seq, "persistent %d %08X\n",
1901 svc->timeout,
1902 ntohl(svc->netmask));
1903 else
1904 seq_putc(seq, '\n');
1906 list_for_each_entry(dest, &svc->destinations, n_list) {
1907 #ifdef CONFIG_IP_VS_IPV6
1908 if (dest->af == AF_INET6)
1909 seq_printf(seq,
1910 " -> [%pI6]:%04X"
1911 " %-7s %-6d %-10d %-10d\n",
1912 &dest->addr.in6,
1913 ntohs(dest->port),
1914 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1915 atomic_read(&dest->weight),
1916 atomic_read(&dest->activeconns),
1917 atomic_read(&dest->inactconns));
1918 else
1919 #endif
1920 seq_printf(seq,
1921 " -> %08X:%04X "
1922 "%-7s %-6d %-10d %-10d\n",
1923 ntohl(dest->addr.ip),
1924 ntohs(dest->port),
1925 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1926 atomic_read(&dest->weight),
1927 atomic_read(&dest->activeconns),
1928 atomic_read(&dest->inactconns));
1932 return 0;
1935 static const struct seq_operations ip_vs_info_seq_ops = {
1936 .start = ip_vs_info_seq_start,
1937 .next = ip_vs_info_seq_next,
1938 .stop = ip_vs_info_seq_stop,
1939 .show = ip_vs_info_seq_show,
1942 static int ip_vs_info_open(struct inode *inode, struct file *file)
1944 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1945 sizeof(struct ip_vs_iter));
1948 static const struct file_operations ip_vs_info_fops = {
1949 .owner = THIS_MODULE,
1950 .open = ip_vs_info_open,
1951 .read = seq_read,
1952 .llseek = seq_lseek,
1953 .release = seq_release_private,
1956 #endif
1958 #ifdef CONFIG_PROC_FS
1959 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1961 struct net *net = seq_file_single_net(seq);
1962 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1964 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1965 seq_puts(seq,
1966 " Total Incoming Outgoing Incoming Outgoing\n");
1967 seq_printf(seq,
1968 " Conns Packets Packets Bytes Bytes\n");
1970 spin_lock_bh(&tot_stats->lock);
1971 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1972 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1973 (unsigned long long) tot_stats->ustats.inbytes,
1974 (unsigned long long) tot_stats->ustats.outbytes);
1976 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1977 seq_puts(seq,
1978 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1979 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1980 tot_stats->ustats.cps,
1981 tot_stats->ustats.inpps,
1982 tot_stats->ustats.outpps,
1983 tot_stats->ustats.inbps,
1984 tot_stats->ustats.outbps);
1985 spin_unlock_bh(&tot_stats->lock);
1987 return 0;
1990 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1992 return single_open_net(inode, file, ip_vs_stats_show);
1995 static const struct file_operations ip_vs_stats_fops = {
1996 .owner = THIS_MODULE,
1997 .open = ip_vs_stats_seq_open,
1998 .read = seq_read,
1999 .llseek = seq_lseek,
2000 .release = single_release,
2003 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2005 struct net *net = seq_file_single_net(seq);
2006 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2007 int i;
2009 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2010 seq_puts(seq,
2011 " Total Incoming Outgoing Incoming Outgoing\n");
2012 seq_printf(seq,
2013 "CPU Conns Packets Packets Bytes Bytes\n");
2015 for_each_possible_cpu(i) {
2016 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2017 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2018 i, u->ustats.conns, u->ustats.inpkts,
2019 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2020 (__u64)u->ustats.outbytes);
2023 spin_lock_bh(&tot_stats->lock);
2024 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2025 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2026 tot_stats->ustats.outpkts,
2027 (unsigned long long) tot_stats->ustats.inbytes,
2028 (unsigned long long) tot_stats->ustats.outbytes);
2030 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2031 seq_puts(seq,
2032 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2033 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2034 tot_stats->ustats.cps,
2035 tot_stats->ustats.inpps,
2036 tot_stats->ustats.outpps,
2037 tot_stats->ustats.inbps,
2038 tot_stats->ustats.outbps);
2039 spin_unlock_bh(&tot_stats->lock);
2041 return 0;
2044 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2046 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2049 static const struct file_operations ip_vs_stats_percpu_fops = {
2050 .owner = THIS_MODULE,
2051 .open = ip_vs_stats_percpu_seq_open,
2052 .read = seq_read,
2053 .llseek = seq_lseek,
2054 .release = single_release,
2056 #endif
2059 * Set timeout values for tcp tcpfin udp in the timeout_table.
2061 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2063 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2064 struct ip_vs_proto_data *pd;
2065 #endif
2067 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2068 u->tcp_timeout,
2069 u->tcp_fin_timeout,
2070 u->udp_timeout);
2072 #ifdef CONFIG_IP_VS_PROTO_TCP
2073 if (u->tcp_timeout) {
2074 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2075 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2076 = u->tcp_timeout * HZ;
2079 if (u->tcp_fin_timeout) {
2080 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2081 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2082 = u->tcp_fin_timeout * HZ;
2084 #endif
2086 #ifdef CONFIG_IP_VS_PROTO_UDP
2087 if (u->udp_timeout) {
2088 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2089 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2090 = u->udp_timeout * HZ;
2092 #endif
2093 return 0;
2097 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2098 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2099 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2100 sizeof(struct ip_vs_dest_user))
2101 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2102 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2103 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2105 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2106 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2107 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2108 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2109 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2110 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2111 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2112 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2113 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2114 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2119 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2120 struct ip_vs_service_user *usvc_compat)
2122 memset(usvc, 0, sizeof(*usvc));
2124 usvc->af = AF_INET;
2125 usvc->protocol = usvc_compat->protocol;
2126 usvc->addr.ip = usvc_compat->addr;
2127 usvc->port = usvc_compat->port;
2128 usvc->fwmark = usvc_compat->fwmark;
2130 /* Deep copy of sched_name is not needed here */
2131 usvc->sched_name = usvc_compat->sched_name;
2133 usvc->flags = usvc_compat->flags;
2134 usvc->timeout = usvc_compat->timeout;
2135 usvc->netmask = usvc_compat->netmask;
2138 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2139 struct ip_vs_dest_user *udest_compat)
2141 memset(udest, 0, sizeof(*udest));
2143 udest->addr.ip = udest_compat->addr;
2144 udest->port = udest_compat->port;
2145 udest->conn_flags = udest_compat->conn_flags;
2146 udest->weight = udest_compat->weight;
2147 udest->u_threshold = udest_compat->u_threshold;
2148 udest->l_threshold = udest_compat->l_threshold;
2151 static int
2152 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2154 struct net *net = sock_net(sk);
2155 int ret;
2156 unsigned char arg[MAX_ARG_LEN];
2157 struct ip_vs_service_user *usvc_compat;
2158 struct ip_vs_service_user_kern usvc;
2159 struct ip_vs_service *svc;
2160 struct ip_vs_dest_user *udest_compat;
2161 struct ip_vs_dest_user_kern udest;
2163 if (!capable(CAP_NET_ADMIN))
2164 return -EPERM;
2166 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2167 return -EINVAL;
2168 if (len < 0 || len > MAX_ARG_LEN)
2169 return -EINVAL;
2170 if (len != set_arglen[SET_CMDID(cmd)]) {
2171 pr_err("set_ctl: len %u != %u\n",
2172 len, set_arglen[SET_CMDID(cmd)]);
2173 return -EINVAL;
2176 if (copy_from_user(arg, user, len) != 0)
2177 return -EFAULT;
2179 /* increase the module use count */
2180 ip_vs_use_count_inc();
2182 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2183 ret = -ERESTARTSYS;
2184 goto out_dec;
2187 if (cmd == IP_VS_SO_SET_FLUSH) {
2188 /* Flush the virtual service */
2189 ret = ip_vs_flush(net);
2190 goto out_unlock;
2191 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2192 /* Set timeout values for (tcp tcpfin udp) */
2193 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2194 goto out_unlock;
2195 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2196 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2197 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2198 dm->syncid);
2199 goto out_unlock;
2200 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2201 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2202 ret = stop_sync_thread(net, dm->state);
2203 goto out_unlock;
2206 usvc_compat = (struct ip_vs_service_user *)arg;
2207 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2209 /* We only use the new structs internally, so copy userspace compat
2210 * structs to extended internal versions */
2211 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2212 ip_vs_copy_udest_compat(&udest, udest_compat);
2214 if (cmd == IP_VS_SO_SET_ZERO) {
2215 /* if no service address is set, zero counters in all */
2216 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2217 ret = ip_vs_zero_all(net);
2218 goto out_unlock;
2222 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2223 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2224 usvc.protocol != IPPROTO_SCTP) {
2225 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2226 usvc.protocol, &usvc.addr.ip,
2227 ntohs(usvc.port), usvc.sched_name);
2228 ret = -EFAULT;
2229 goto out_unlock;
2232 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2233 if (usvc.fwmark == 0)
2234 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2235 &usvc.addr, usvc.port);
2236 else
2237 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2239 if (cmd != IP_VS_SO_SET_ADD
2240 && (svc == NULL || svc->protocol != usvc.protocol)) {
2241 ret = -ESRCH;
2242 goto out_unlock;
2245 switch (cmd) {
2246 case IP_VS_SO_SET_ADD:
2247 if (svc != NULL)
2248 ret = -EEXIST;
2249 else
2250 ret = ip_vs_add_service(net, &usvc, &svc);
2251 break;
2252 case IP_VS_SO_SET_EDIT:
2253 ret = ip_vs_edit_service(svc, &usvc);
2254 break;
2255 case IP_VS_SO_SET_DEL:
2256 ret = ip_vs_del_service(svc);
2257 if (!ret)
2258 goto out_unlock;
2259 break;
2260 case IP_VS_SO_SET_ZERO:
2261 ret = ip_vs_zero_service(svc);
2262 break;
2263 case IP_VS_SO_SET_ADDDEST:
2264 ret = ip_vs_add_dest(svc, &udest);
2265 break;
2266 case IP_VS_SO_SET_EDITDEST:
2267 ret = ip_vs_edit_dest(svc, &udest);
2268 break;
2269 case IP_VS_SO_SET_DELDEST:
2270 ret = ip_vs_del_dest(svc, &udest);
2271 break;
2272 default:
2273 ret = -EINVAL;
2276 out_unlock:
2277 mutex_unlock(&__ip_vs_mutex);
2278 out_dec:
2279 /* decrease the module use count */
2280 ip_vs_use_count_dec();
2282 return ret;
2286 static void
2287 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2289 spin_lock_bh(&src->lock);
2290 memcpy(dst, &src->ustats, sizeof(*dst));
2291 spin_unlock_bh(&src->lock);
2294 static void
2295 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2297 dst->protocol = src->protocol;
2298 dst->addr = src->addr.ip;
2299 dst->port = src->port;
2300 dst->fwmark = src->fwmark;
2301 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2302 dst->flags = src->flags;
2303 dst->timeout = src->timeout / HZ;
2304 dst->netmask = src->netmask;
2305 dst->num_dests = src->num_dests;
2306 ip_vs_copy_stats(&dst->stats, &src->stats);
2309 static inline int
2310 __ip_vs_get_service_entries(struct net *net,
2311 const struct ip_vs_get_services *get,
2312 struct ip_vs_get_services __user *uptr)
2314 int idx, count=0;
2315 struct ip_vs_service *svc;
2316 struct ip_vs_service_entry entry;
2317 int ret = 0;
2319 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2320 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2321 /* Only expose IPv4 entries to old interface */
2322 if (svc->af != AF_INET || !net_eq(svc->net, net))
2323 continue;
2325 if (count >= get->num_services)
2326 goto out;
2327 memset(&entry, 0, sizeof(entry));
2328 ip_vs_copy_service(&entry, svc);
2329 if (copy_to_user(&uptr->entrytable[count],
2330 &entry, sizeof(entry))) {
2331 ret = -EFAULT;
2332 goto out;
2334 count++;
2338 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2339 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2340 /* Only expose IPv4 entries to old interface */
2341 if (svc->af != AF_INET || !net_eq(svc->net, net))
2342 continue;
2344 if (count >= get->num_services)
2345 goto out;
2346 memset(&entry, 0, sizeof(entry));
2347 ip_vs_copy_service(&entry, svc);
2348 if (copy_to_user(&uptr->entrytable[count],
2349 &entry, sizeof(entry))) {
2350 ret = -EFAULT;
2351 goto out;
2353 count++;
2356 out:
2357 return ret;
2360 static inline int
2361 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2362 struct ip_vs_get_dests __user *uptr)
2364 struct ip_vs_service *svc;
2365 union nf_inet_addr addr = { .ip = get->addr };
2366 int ret = 0;
2368 if (get->fwmark)
2369 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2370 else
2371 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2372 get->port);
2374 if (svc) {
2375 int count = 0;
2376 struct ip_vs_dest *dest;
2377 struct ip_vs_dest_entry entry;
2379 list_for_each_entry(dest, &svc->destinations, n_list) {
2380 if (count >= get->num_dests)
2381 break;
2383 entry.addr = dest->addr.ip;
2384 entry.port = dest->port;
2385 entry.conn_flags = atomic_read(&dest->conn_flags);
2386 entry.weight = atomic_read(&dest->weight);
2387 entry.u_threshold = dest->u_threshold;
2388 entry.l_threshold = dest->l_threshold;
2389 entry.activeconns = atomic_read(&dest->activeconns);
2390 entry.inactconns = atomic_read(&dest->inactconns);
2391 entry.persistconns = atomic_read(&dest->persistconns);
2392 ip_vs_copy_stats(&entry.stats, &dest->stats);
2393 if (copy_to_user(&uptr->entrytable[count],
2394 &entry, sizeof(entry))) {
2395 ret = -EFAULT;
2396 break;
2398 count++;
2400 } else
2401 ret = -ESRCH;
2402 return ret;
2405 static inline void
2406 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2408 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2409 struct ip_vs_proto_data *pd;
2410 #endif
2412 #ifdef CONFIG_IP_VS_PROTO_TCP
2413 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2414 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2415 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2416 #endif
2417 #ifdef CONFIG_IP_VS_PROTO_UDP
2418 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2419 u->udp_timeout =
2420 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2421 #endif
2425 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2426 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2427 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2428 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2429 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2430 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2431 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2433 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2434 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2435 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2436 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2437 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2438 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2439 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2440 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2443 static int
2444 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2446 unsigned char arg[128];
2447 int ret = 0;
2448 unsigned int copylen;
2449 struct net *net = sock_net(sk);
2450 struct netns_ipvs *ipvs = net_ipvs(net);
2452 BUG_ON(!net);
2453 if (!capable(CAP_NET_ADMIN))
2454 return -EPERM;
2456 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2457 return -EINVAL;
2459 if (*len < get_arglen[GET_CMDID(cmd)]) {
2460 pr_err("get_ctl: len %u < %u\n",
2461 *len, get_arglen[GET_CMDID(cmd)]);
2462 return -EINVAL;
2465 copylen = get_arglen[GET_CMDID(cmd)];
2466 if (copylen > 128)
2467 return -EINVAL;
2469 if (copy_from_user(arg, user, copylen) != 0)
2470 return -EFAULT;
2472 if (mutex_lock_interruptible(&__ip_vs_mutex))
2473 return -ERESTARTSYS;
2475 switch (cmd) {
2476 case IP_VS_SO_GET_VERSION:
2478 char buf[64];
2480 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2481 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2482 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2483 ret = -EFAULT;
2484 goto out;
2486 *len = strlen(buf)+1;
2488 break;
2490 case IP_VS_SO_GET_INFO:
2492 struct ip_vs_getinfo info;
2493 info.version = IP_VS_VERSION_CODE;
2494 info.size = ip_vs_conn_tab_size;
2495 info.num_services = ipvs->num_services;
2496 if (copy_to_user(user, &info, sizeof(info)) != 0)
2497 ret = -EFAULT;
2499 break;
2501 case IP_VS_SO_GET_SERVICES:
2503 struct ip_vs_get_services *get;
2504 int size;
2506 get = (struct ip_vs_get_services *)arg;
2507 size = sizeof(*get) +
2508 sizeof(struct ip_vs_service_entry) * get->num_services;
2509 if (*len != size) {
2510 pr_err("length: %u != %u\n", *len, size);
2511 ret = -EINVAL;
2512 goto out;
2514 ret = __ip_vs_get_service_entries(net, get, user);
2516 break;
2518 case IP_VS_SO_GET_SERVICE:
2520 struct ip_vs_service_entry *entry;
2521 struct ip_vs_service *svc;
2522 union nf_inet_addr addr;
2524 entry = (struct ip_vs_service_entry *)arg;
2525 addr.ip = entry->addr;
2526 if (entry->fwmark)
2527 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2528 else
2529 svc = __ip_vs_service_find(net, AF_INET,
2530 entry->protocol, &addr,
2531 entry->port);
2532 if (svc) {
2533 ip_vs_copy_service(entry, svc);
2534 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2535 ret = -EFAULT;
2536 } else
2537 ret = -ESRCH;
2539 break;
2541 case IP_VS_SO_GET_DESTS:
2543 struct ip_vs_get_dests *get;
2544 int size;
2546 get = (struct ip_vs_get_dests *)arg;
2547 size = sizeof(*get) +
2548 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2549 if (*len != size) {
2550 pr_err("length: %u != %u\n", *len, size);
2551 ret = -EINVAL;
2552 goto out;
2554 ret = __ip_vs_get_dest_entries(net, get, user);
2556 break;
2558 case IP_VS_SO_GET_TIMEOUT:
2560 struct ip_vs_timeout_user t;
2562 __ip_vs_get_timeouts(net, &t);
2563 if (copy_to_user(user, &t, sizeof(t)) != 0)
2564 ret = -EFAULT;
2566 break;
2568 case IP_VS_SO_GET_DAEMON:
2570 struct ip_vs_daemon_user d[2];
2572 memset(&d, 0, sizeof(d));
2573 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2574 d[0].state = IP_VS_STATE_MASTER;
2575 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2576 sizeof(d[0].mcast_ifn));
2577 d[0].syncid = ipvs->master_syncid;
2579 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2580 d[1].state = IP_VS_STATE_BACKUP;
2581 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2582 sizeof(d[1].mcast_ifn));
2583 d[1].syncid = ipvs->backup_syncid;
2585 if (copy_to_user(user, &d, sizeof(d)) != 0)
2586 ret = -EFAULT;
2588 break;
2590 default:
2591 ret = -EINVAL;
2594 out:
2595 mutex_unlock(&__ip_vs_mutex);
2596 return ret;
2600 static struct nf_sockopt_ops ip_vs_sockopts = {
2601 .pf = PF_INET,
2602 .set_optmin = IP_VS_BASE_CTL,
2603 .set_optmax = IP_VS_SO_SET_MAX+1,
2604 .set = do_ip_vs_set_ctl,
2605 .get_optmin = IP_VS_BASE_CTL,
2606 .get_optmax = IP_VS_SO_GET_MAX+1,
2607 .get = do_ip_vs_get_ctl,
2608 .owner = THIS_MODULE,
2612 * Generic Netlink interface
2615 /* IPVS genetlink family */
2616 static struct genl_family ip_vs_genl_family = {
2617 .id = GENL_ID_GENERATE,
2618 .hdrsize = 0,
2619 .name = IPVS_GENL_NAME,
2620 .version = IPVS_GENL_VERSION,
2621 .maxattr = IPVS_CMD_MAX,
2622 .netnsok = true, /* Make ipvsadm to work on netns */
2625 /* Policy used for first-level command attributes */
2626 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2627 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2628 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2629 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2630 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2631 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2632 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2635 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2636 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2637 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2638 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2639 .len = IP_VS_IFNAME_MAXLEN },
2640 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2643 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2644 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2645 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2646 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2647 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2648 .len = sizeof(union nf_inet_addr) },
2649 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2650 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2651 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2652 .len = IP_VS_SCHEDNAME_MAXLEN },
2653 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2654 .len = IP_VS_PENAME_MAXLEN },
2655 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2656 .len = sizeof(struct ip_vs_flags) },
2657 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2658 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2659 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2662 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2663 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2664 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2665 .len = sizeof(union nf_inet_addr) },
2666 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2667 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2668 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2669 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2670 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2671 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2672 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2673 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2677 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2678 struct ip_vs_stats *stats)
2680 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2681 if (!nl_stats)
2682 return -EMSGSIZE;
2684 spin_lock_bh(&stats->lock);
2686 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2687 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2688 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2689 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2690 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2693 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2694 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2695 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2697 spin_unlock_bh(&stats->lock);
2699 nla_nest_end(skb, nl_stats);
2701 return 0;
2703 nla_put_failure:
2704 spin_unlock_bh(&stats->lock);
2705 nla_nest_cancel(skb, nl_stats);
2706 return -EMSGSIZE;
2709 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2710 struct ip_vs_service *svc)
2712 struct nlattr *nl_service;
2713 struct ip_vs_flags flags = { .flags = svc->flags,
2714 .mask = ~0 };
2716 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2717 if (!nl_service)
2718 return -EMSGSIZE;
2720 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2722 if (svc->fwmark) {
2723 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2724 } else {
2725 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2726 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2727 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2730 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2731 if (svc->pe)
2732 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2733 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2734 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2735 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2737 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2738 goto nla_put_failure;
2740 nla_nest_end(skb, nl_service);
2742 return 0;
2744 nla_put_failure:
2745 nla_nest_cancel(skb, nl_service);
2746 return -EMSGSIZE;
2749 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2750 struct ip_vs_service *svc,
2751 struct netlink_callback *cb)
2753 void *hdr;
2755 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2756 &ip_vs_genl_family, NLM_F_MULTI,
2757 IPVS_CMD_NEW_SERVICE);
2758 if (!hdr)
2759 return -EMSGSIZE;
2761 if (ip_vs_genl_fill_service(skb, svc) < 0)
2762 goto nla_put_failure;
2764 return genlmsg_end(skb, hdr);
2766 nla_put_failure:
2767 genlmsg_cancel(skb, hdr);
2768 return -EMSGSIZE;
2771 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2772 struct netlink_callback *cb)
2774 int idx = 0, i;
2775 int start = cb->args[0];
2776 struct ip_vs_service *svc;
2777 struct net *net = skb_sknet(skb);
2779 mutex_lock(&__ip_vs_mutex);
2780 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2781 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2782 if (++idx <= start || !net_eq(svc->net, net))
2783 continue;
2784 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2785 idx--;
2786 goto nla_put_failure;
2791 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2792 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2793 if (++idx <= start || !net_eq(svc->net, net))
2794 continue;
2795 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2796 idx--;
2797 goto nla_put_failure;
2802 nla_put_failure:
2803 mutex_unlock(&__ip_vs_mutex);
2804 cb->args[0] = idx;
2806 return skb->len;
2809 static int ip_vs_genl_parse_service(struct net *net,
2810 struct ip_vs_service_user_kern *usvc,
2811 struct nlattr *nla, int full_entry,
2812 struct ip_vs_service **ret_svc)
2814 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2815 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2816 struct ip_vs_service *svc;
2818 /* Parse mandatory identifying service fields first */
2819 if (nla == NULL ||
2820 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2821 return -EINVAL;
2823 nla_af = attrs[IPVS_SVC_ATTR_AF];
2824 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2825 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2826 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2827 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2829 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2830 return -EINVAL;
2832 memset(usvc, 0, sizeof(*usvc));
2834 usvc->af = nla_get_u16(nla_af);
2835 #ifdef CONFIG_IP_VS_IPV6
2836 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2837 #else
2838 if (usvc->af != AF_INET)
2839 #endif
2840 return -EAFNOSUPPORT;
2842 if (nla_fwmark) {
2843 usvc->protocol = IPPROTO_TCP;
2844 usvc->fwmark = nla_get_u32(nla_fwmark);
2845 } else {
2846 usvc->protocol = nla_get_u16(nla_protocol);
2847 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2848 usvc->port = nla_get_u16(nla_port);
2849 usvc->fwmark = 0;
2852 if (usvc->fwmark)
2853 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2854 else
2855 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2856 &usvc->addr, usvc->port);
2857 *ret_svc = svc;
2859 /* If a full entry was requested, check for the additional fields */
2860 if (full_entry) {
2861 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2862 *nla_netmask;
2863 struct ip_vs_flags flags;
2865 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2866 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2867 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2868 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2869 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2871 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2872 return -EINVAL;
2874 nla_memcpy(&flags, nla_flags, sizeof(flags));
2876 /* prefill flags from service if it already exists */
2877 if (svc)
2878 usvc->flags = svc->flags;
2880 /* set new flags from userland */
2881 usvc->flags = (usvc->flags & ~flags.mask) |
2882 (flags.flags & flags.mask);
2883 usvc->sched_name = nla_data(nla_sched);
2884 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2885 usvc->timeout = nla_get_u32(nla_timeout);
2886 usvc->netmask = nla_get_u32(nla_netmask);
2889 return 0;
2892 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2893 struct nlattr *nla)
2895 struct ip_vs_service_user_kern usvc;
2896 struct ip_vs_service *svc;
2897 int ret;
2899 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2900 return ret ? ERR_PTR(ret) : svc;
2903 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2905 struct nlattr *nl_dest;
2907 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2908 if (!nl_dest)
2909 return -EMSGSIZE;
2911 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2912 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2914 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2915 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2916 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2917 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2918 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2919 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2920 atomic_read(&dest->activeconns));
2921 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2922 atomic_read(&dest->inactconns));
2923 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2924 atomic_read(&dest->persistconns));
2926 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2927 goto nla_put_failure;
2929 nla_nest_end(skb, nl_dest);
2931 return 0;
2933 nla_put_failure:
2934 nla_nest_cancel(skb, nl_dest);
2935 return -EMSGSIZE;
2938 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2939 struct netlink_callback *cb)
2941 void *hdr;
2943 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2944 &ip_vs_genl_family, NLM_F_MULTI,
2945 IPVS_CMD_NEW_DEST);
2946 if (!hdr)
2947 return -EMSGSIZE;
2949 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2950 goto nla_put_failure;
2952 return genlmsg_end(skb, hdr);
2954 nla_put_failure:
2955 genlmsg_cancel(skb, hdr);
2956 return -EMSGSIZE;
2959 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2960 struct netlink_callback *cb)
2962 int idx = 0;
2963 int start = cb->args[0];
2964 struct ip_vs_service *svc;
2965 struct ip_vs_dest *dest;
2966 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2967 struct net *net = skb_sknet(skb);
2969 mutex_lock(&__ip_vs_mutex);
2971 /* Try to find the service for which to dump destinations */
2972 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2973 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2974 goto out_err;
2977 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
2978 if (IS_ERR(svc) || svc == NULL)
2979 goto out_err;
2981 /* Dump the destinations */
2982 list_for_each_entry(dest, &svc->destinations, n_list) {
2983 if (++idx <= start)
2984 continue;
2985 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2986 idx--;
2987 goto nla_put_failure;
2991 nla_put_failure:
2992 cb->args[0] = idx;
2994 out_err:
2995 mutex_unlock(&__ip_vs_mutex);
2997 return skb->len;
3000 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3001 struct nlattr *nla, int full_entry)
3003 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3004 struct nlattr *nla_addr, *nla_port;
3006 /* Parse mandatory identifying destination fields first */
3007 if (nla == NULL ||
3008 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3009 return -EINVAL;
3011 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3012 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3014 if (!(nla_addr && nla_port))
3015 return -EINVAL;
3017 memset(udest, 0, sizeof(*udest));
3019 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3020 udest->port = nla_get_u16(nla_port);
3022 /* If a full entry was requested, check for the additional fields */
3023 if (full_entry) {
3024 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3025 *nla_l_thresh;
3027 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3028 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3029 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3030 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3032 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3033 return -EINVAL;
3035 udest->conn_flags = nla_get_u32(nla_fwd)
3036 & IP_VS_CONN_F_FWD_MASK;
3037 udest->weight = nla_get_u32(nla_weight);
3038 udest->u_threshold = nla_get_u32(nla_u_thresh);
3039 udest->l_threshold = nla_get_u32(nla_l_thresh);
3042 return 0;
3045 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3046 const char *mcast_ifn, __be32 syncid)
3048 struct nlattr *nl_daemon;
3050 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3051 if (!nl_daemon)
3052 return -EMSGSIZE;
3054 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3055 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3056 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3058 nla_nest_end(skb, nl_daemon);
3060 return 0;
3062 nla_put_failure:
3063 nla_nest_cancel(skb, nl_daemon);
3064 return -EMSGSIZE;
3067 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3068 const char *mcast_ifn, __be32 syncid,
3069 struct netlink_callback *cb)
3071 void *hdr;
3072 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3073 &ip_vs_genl_family, NLM_F_MULTI,
3074 IPVS_CMD_NEW_DAEMON);
3075 if (!hdr)
3076 return -EMSGSIZE;
3078 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3079 goto nla_put_failure;
3081 return genlmsg_end(skb, hdr);
3083 nla_put_failure:
3084 genlmsg_cancel(skb, hdr);
3085 return -EMSGSIZE;
3088 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3089 struct netlink_callback *cb)
3091 struct net *net = skb_net(skb);
3092 struct netns_ipvs *ipvs = net_ipvs(net);
3094 mutex_lock(&__ip_vs_mutex);
3095 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3096 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3097 ipvs->master_mcast_ifn,
3098 ipvs->master_syncid, cb) < 0)
3099 goto nla_put_failure;
3101 cb->args[0] = 1;
3104 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3105 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3106 ipvs->backup_mcast_ifn,
3107 ipvs->backup_syncid, cb) < 0)
3108 goto nla_put_failure;
3110 cb->args[1] = 1;
3113 nla_put_failure:
3114 mutex_unlock(&__ip_vs_mutex);
3116 return skb->len;
3119 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3121 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3122 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3123 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3124 return -EINVAL;
3126 return start_sync_thread(net,
3127 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3128 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3129 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3132 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3134 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3135 return -EINVAL;
3137 return stop_sync_thread(net,
3138 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3141 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3143 struct ip_vs_timeout_user t;
3145 __ip_vs_get_timeouts(net, &t);
3147 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3148 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3150 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3151 t.tcp_fin_timeout =
3152 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3154 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3155 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3157 return ip_vs_set_timeout(net, &t);
3160 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3162 struct ip_vs_service *svc = NULL;
3163 struct ip_vs_service_user_kern usvc;
3164 struct ip_vs_dest_user_kern udest;
3165 int ret = 0, cmd;
3166 int need_full_svc = 0, need_full_dest = 0;
3167 struct net *net;
3168 struct netns_ipvs *ipvs;
3170 net = skb_sknet(skb);
3171 ipvs = net_ipvs(net);
3172 cmd = info->genlhdr->cmd;
3174 mutex_lock(&__ip_vs_mutex);
3176 if (cmd == IPVS_CMD_FLUSH) {
3177 ret = ip_vs_flush(net);
3178 goto out;
3179 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3180 ret = ip_vs_genl_set_config(net, info->attrs);
3181 goto out;
3182 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3183 cmd == IPVS_CMD_DEL_DAEMON) {
3185 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3187 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3188 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3189 info->attrs[IPVS_CMD_ATTR_DAEMON],
3190 ip_vs_daemon_policy)) {
3191 ret = -EINVAL;
3192 goto out;
3195 if (cmd == IPVS_CMD_NEW_DAEMON)
3196 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3197 else
3198 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3199 goto out;
3200 } else if (cmd == IPVS_CMD_ZERO &&
3201 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3202 ret = ip_vs_zero_all(net);
3203 goto out;
3206 /* All following commands require a service argument, so check if we
3207 * received a valid one. We need a full service specification when
3208 * adding / editing a service. Only identifying members otherwise. */
3209 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3210 need_full_svc = 1;
3212 ret = ip_vs_genl_parse_service(net, &usvc,
3213 info->attrs[IPVS_CMD_ATTR_SERVICE],
3214 need_full_svc, &svc);
3215 if (ret)
3216 goto out;
3218 /* Unless we're adding a new service, the service must already exist */
3219 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3220 ret = -ESRCH;
3221 goto out;
3224 /* Destination commands require a valid destination argument. For
3225 * adding / editing a destination, we need a full destination
3226 * specification. */
3227 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3228 cmd == IPVS_CMD_DEL_DEST) {
3229 if (cmd != IPVS_CMD_DEL_DEST)
3230 need_full_dest = 1;
3232 ret = ip_vs_genl_parse_dest(&udest,
3233 info->attrs[IPVS_CMD_ATTR_DEST],
3234 need_full_dest);
3235 if (ret)
3236 goto out;
3239 switch (cmd) {
3240 case IPVS_CMD_NEW_SERVICE:
3241 if (svc == NULL)
3242 ret = ip_vs_add_service(net, &usvc, &svc);
3243 else
3244 ret = -EEXIST;
3245 break;
3246 case IPVS_CMD_SET_SERVICE:
3247 ret = ip_vs_edit_service(svc, &usvc);
3248 break;
3249 case IPVS_CMD_DEL_SERVICE:
3250 ret = ip_vs_del_service(svc);
3251 /* do not use svc, it can be freed */
3252 break;
3253 case IPVS_CMD_NEW_DEST:
3254 ret = ip_vs_add_dest(svc, &udest);
3255 break;
3256 case IPVS_CMD_SET_DEST:
3257 ret = ip_vs_edit_dest(svc, &udest);
3258 break;
3259 case IPVS_CMD_DEL_DEST:
3260 ret = ip_vs_del_dest(svc, &udest);
3261 break;
3262 case IPVS_CMD_ZERO:
3263 ret = ip_vs_zero_service(svc);
3264 break;
3265 default:
3266 ret = -EINVAL;
3269 out:
3270 mutex_unlock(&__ip_vs_mutex);
3272 return ret;
3275 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3277 struct sk_buff *msg;
3278 void *reply;
3279 int ret, cmd, reply_cmd;
3280 struct net *net;
3281 struct netns_ipvs *ipvs;
3283 net = skb_sknet(skb);
3284 ipvs = net_ipvs(net);
3285 cmd = info->genlhdr->cmd;
3287 if (cmd == IPVS_CMD_GET_SERVICE)
3288 reply_cmd = IPVS_CMD_NEW_SERVICE;
3289 else if (cmd == IPVS_CMD_GET_INFO)
3290 reply_cmd = IPVS_CMD_SET_INFO;
3291 else if (cmd == IPVS_CMD_GET_CONFIG)
3292 reply_cmd = IPVS_CMD_SET_CONFIG;
3293 else {
3294 pr_err("unknown Generic Netlink command\n");
3295 return -EINVAL;
3298 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3299 if (!msg)
3300 return -ENOMEM;
3302 mutex_lock(&__ip_vs_mutex);
3304 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3305 if (reply == NULL)
3306 goto nla_put_failure;
3308 switch (cmd) {
3309 case IPVS_CMD_GET_SERVICE:
3311 struct ip_vs_service *svc;
3313 svc = ip_vs_genl_find_service(net,
3314 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3315 if (IS_ERR(svc)) {
3316 ret = PTR_ERR(svc);
3317 goto out_err;
3318 } else if (svc) {
3319 ret = ip_vs_genl_fill_service(msg, svc);
3320 if (ret)
3321 goto nla_put_failure;
3322 } else {
3323 ret = -ESRCH;
3324 goto out_err;
3327 break;
3330 case IPVS_CMD_GET_CONFIG:
3332 struct ip_vs_timeout_user t;
3334 __ip_vs_get_timeouts(net, &t);
3335 #ifdef CONFIG_IP_VS_PROTO_TCP
3336 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3337 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3338 t.tcp_fin_timeout);
3339 #endif
3340 #ifdef CONFIG_IP_VS_PROTO_UDP
3341 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3342 #endif
3344 break;
3347 case IPVS_CMD_GET_INFO:
3348 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3349 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3350 ip_vs_conn_tab_size);
3351 break;
3354 genlmsg_end(msg, reply);
3355 ret = genlmsg_reply(msg, info);
3356 goto out;
3358 nla_put_failure:
3359 pr_err("not enough space in Netlink message\n");
3360 ret = -EMSGSIZE;
3362 out_err:
3363 nlmsg_free(msg);
3364 out:
3365 mutex_unlock(&__ip_vs_mutex);
3367 return ret;
3371 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3373 .cmd = IPVS_CMD_NEW_SERVICE,
3374 .flags = GENL_ADMIN_PERM,
3375 .policy = ip_vs_cmd_policy,
3376 .doit = ip_vs_genl_set_cmd,
3379 .cmd = IPVS_CMD_SET_SERVICE,
3380 .flags = GENL_ADMIN_PERM,
3381 .policy = ip_vs_cmd_policy,
3382 .doit = ip_vs_genl_set_cmd,
3385 .cmd = IPVS_CMD_DEL_SERVICE,
3386 .flags = GENL_ADMIN_PERM,
3387 .policy = ip_vs_cmd_policy,
3388 .doit = ip_vs_genl_set_cmd,
3391 .cmd = IPVS_CMD_GET_SERVICE,
3392 .flags = GENL_ADMIN_PERM,
3393 .doit = ip_vs_genl_get_cmd,
3394 .dumpit = ip_vs_genl_dump_services,
3395 .policy = ip_vs_cmd_policy,
3398 .cmd = IPVS_CMD_NEW_DEST,
3399 .flags = GENL_ADMIN_PERM,
3400 .policy = ip_vs_cmd_policy,
3401 .doit = ip_vs_genl_set_cmd,
3404 .cmd = IPVS_CMD_SET_DEST,
3405 .flags = GENL_ADMIN_PERM,
3406 .policy = ip_vs_cmd_policy,
3407 .doit = ip_vs_genl_set_cmd,
3410 .cmd = IPVS_CMD_DEL_DEST,
3411 .flags = GENL_ADMIN_PERM,
3412 .policy = ip_vs_cmd_policy,
3413 .doit = ip_vs_genl_set_cmd,
3416 .cmd = IPVS_CMD_GET_DEST,
3417 .flags = GENL_ADMIN_PERM,
3418 .policy = ip_vs_cmd_policy,
3419 .dumpit = ip_vs_genl_dump_dests,
3422 .cmd = IPVS_CMD_NEW_DAEMON,
3423 .flags = GENL_ADMIN_PERM,
3424 .policy = ip_vs_cmd_policy,
3425 .doit = ip_vs_genl_set_cmd,
3428 .cmd = IPVS_CMD_DEL_DAEMON,
3429 .flags = GENL_ADMIN_PERM,
3430 .policy = ip_vs_cmd_policy,
3431 .doit = ip_vs_genl_set_cmd,
3434 .cmd = IPVS_CMD_GET_DAEMON,
3435 .flags = GENL_ADMIN_PERM,
3436 .dumpit = ip_vs_genl_dump_daemons,
3439 .cmd = IPVS_CMD_SET_CONFIG,
3440 .flags = GENL_ADMIN_PERM,
3441 .policy = ip_vs_cmd_policy,
3442 .doit = ip_vs_genl_set_cmd,
3445 .cmd = IPVS_CMD_GET_CONFIG,
3446 .flags = GENL_ADMIN_PERM,
3447 .doit = ip_vs_genl_get_cmd,
3450 .cmd = IPVS_CMD_GET_INFO,
3451 .flags = GENL_ADMIN_PERM,
3452 .doit = ip_vs_genl_get_cmd,
3455 .cmd = IPVS_CMD_ZERO,
3456 .flags = GENL_ADMIN_PERM,
3457 .policy = ip_vs_cmd_policy,
3458 .doit = ip_vs_genl_set_cmd,
3461 .cmd = IPVS_CMD_FLUSH,
3462 .flags = GENL_ADMIN_PERM,
3463 .doit = ip_vs_genl_set_cmd,
3467 static int __init ip_vs_genl_register(void)
3469 return genl_register_family_with_ops(&ip_vs_genl_family,
3470 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3473 static void ip_vs_genl_unregister(void)
3475 genl_unregister_family(&ip_vs_genl_family);
3478 /* End of Generic Netlink interface definitions */
3481 * per netns intit/exit func.
3483 int __net_init __ip_vs_control_init(struct net *net)
3485 int idx;
3486 struct netns_ipvs *ipvs = net_ipvs(net);
3487 struct ctl_table *tbl;
3489 atomic_set(&ipvs->dropentry, 0);
3490 spin_lock_init(&ipvs->dropentry_lock);
3491 spin_lock_init(&ipvs->droppacket_lock);
3492 spin_lock_init(&ipvs->securetcp_lock);
3493 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3495 /* Initialize rs_table */
3496 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3497 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3499 INIT_LIST_HEAD(&ipvs->dest_trash);
3500 atomic_set(&ipvs->ftpsvc_counter, 0);
3501 atomic_set(&ipvs->nullsvc_counter, 0);
3503 /* procfs stats */
3504 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3505 if (ipvs->tot_stats == NULL) {
3506 pr_err("%s(): no memory.\n", __func__);
3507 return -ENOMEM;
3509 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3510 if (!ipvs->cpustats) {
3511 pr_err("%s() alloc_percpu failed\n", __func__);
3512 goto err_alloc;
3514 spin_lock_init(&ipvs->tot_stats->lock);
3516 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3517 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3518 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3519 &ip_vs_stats_percpu_fops);
3521 if (!net_eq(net, &init_net)) {
3522 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3523 if (tbl == NULL)
3524 goto err_dup;
3525 } else
3526 tbl = vs_vars;
3527 /* Initialize sysctl defaults */
3528 idx = 0;
3529 ipvs->sysctl_amemthresh = 1024;
3530 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3531 ipvs->sysctl_am_droprate = 10;
3532 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3533 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3534 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3535 #ifdef CONFIG_IP_VS_NFCT
3536 tbl[idx++].data = &ipvs->sysctl_conntrack;
3537 #endif
3538 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3539 ipvs->sysctl_snat_reroute = 1;
3540 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3541 ipvs->sysctl_sync_ver = 1;
3542 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3543 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3544 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3545 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3546 ipvs->sysctl_sync_threshold[0] = 3;
3547 ipvs->sysctl_sync_threshold[1] = 50;
3548 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3549 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3550 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3553 #ifdef CONFIG_SYSCTL
3554 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3555 tbl);
3556 if (ipvs->sysctl_hdr == NULL) {
3557 if (!net_eq(net, &init_net))
3558 kfree(tbl);
3559 goto err_dup;
3561 #endif
3562 ip_vs_new_estimator(net, ipvs->tot_stats);
3563 ipvs->sysctl_tbl = tbl;
3564 /* Schedule defense work */
3565 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3566 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3567 return 0;
3569 err_dup:
3570 free_percpu(ipvs->cpustats);
3571 err_alloc:
3572 kfree(ipvs->tot_stats);
3573 return -ENOMEM;
3576 static void __net_exit __ip_vs_control_cleanup(struct net *net)
3578 struct netns_ipvs *ipvs = net_ipvs(net);
3580 ip_vs_trash_cleanup(net);
3581 ip_vs_kill_estimator(net, ipvs->tot_stats);
3582 cancel_delayed_work_sync(&ipvs->defense_work);
3583 cancel_work_sync(&ipvs->defense_work.work);
3584 #ifdef CONFIG_SYSCTL
3585 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3586 #endif
3587 proc_net_remove(net, "ip_vs_stats_percpu");
3588 proc_net_remove(net, "ip_vs_stats");
3589 proc_net_remove(net, "ip_vs");
3590 free_percpu(ipvs->cpustats);
3591 kfree(ipvs->tot_stats);
3594 static struct pernet_operations ipvs_control_ops = {
3595 .init = __ip_vs_control_init,
3596 .exit = __ip_vs_control_cleanup,
3599 int __init ip_vs_control_init(void)
3601 int idx;
3602 int ret;
3604 EnterFunction(2);
3606 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3607 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3608 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3609 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3612 ret = register_pernet_subsys(&ipvs_control_ops);
3613 if (ret) {
3614 pr_err("cannot register namespace.\n");
3615 goto err;
3618 smp_wmb(); /* Do we really need it now ? */
3620 ret = nf_register_sockopt(&ip_vs_sockopts);
3621 if (ret) {
3622 pr_err("cannot register sockopt.\n");
3623 goto err_net;
3626 ret = ip_vs_genl_register();
3627 if (ret) {
3628 pr_err("cannot register Generic Netlink interface.\n");
3629 nf_unregister_sockopt(&ip_vs_sockopts);
3630 goto err_net;
3633 LeaveFunction(2);
3634 return 0;
3636 err_net:
3637 unregister_pernet_subsys(&ipvs_control_ops);
3638 err:
3639 return ret;
3643 void ip_vs_control_cleanup(void)
3645 EnterFunction(2);
3646 unregister_pernet_subsys(&ipvs_control_ops);
3647 ip_vs_genl_unregister();
3648 nf_unregister_sockopt(&ip_vs_sockopts);
3649 LeaveFunction(2);