2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/types.h>
26 #include <linux/capability.h>
28 #include <linux/sysctl.h>
29 #include <linux/proc_fs.h>
30 #include <linux/workqueue.h>
31 #include <linux/swap.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
35 #include <linux/netfilter.h>
36 #include <linux/netfilter_ipv4.h>
37 #include <linux/mutex.h>
40 #include <net/route.h>
43 #include <asm/uaccess.h>
45 #include <net/ip_vs.h>
47 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
48 static DEFINE_MUTEX(__ip_vs_mutex
);
50 /* lock for service table */
51 static DEFINE_RWLOCK(__ip_vs_svc_lock
);
53 /* lock for table with the real services */
54 static DEFINE_RWLOCK(__ip_vs_rs_lock
);
56 /* lock for state and timeout tables */
57 static DEFINE_RWLOCK(__ip_vs_securetcp_lock
);
59 /* lock for drop entry handling */
60 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock
);
62 /* lock for drop packet handling */
63 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock
);
65 /* 1/rate drop and drop-entry variables */
66 int ip_vs_drop_rate
= 0;
67 int ip_vs_drop_counter
= 0;
68 static atomic_t ip_vs_dropentry
= ATOMIC_INIT(0);
70 /* number of virtual services */
71 static int ip_vs_num_services
= 0;
73 /* sysctl variables */
74 static int sysctl_ip_vs_drop_entry
= 0;
75 static int sysctl_ip_vs_drop_packet
= 0;
76 static int sysctl_ip_vs_secure_tcp
= 0;
77 static int sysctl_ip_vs_amemthresh
= 1024;
78 static int sysctl_ip_vs_am_droprate
= 10;
79 int sysctl_ip_vs_cache_bypass
= 0;
80 int sysctl_ip_vs_expire_nodest_conn
= 0;
81 int sysctl_ip_vs_expire_quiescent_template
= 0;
82 int sysctl_ip_vs_sync_threshold
[2] = { 3, 50 };
83 int sysctl_ip_vs_nat_icmp_send
= 0;
86 #ifdef CONFIG_IP_VS_DEBUG
87 static int sysctl_ip_vs_debug_level
= 0;
89 int ip_vs_get_debug_level(void)
91 return sysctl_ip_vs_debug_level
;
96 * update_defense_level is called from keventd and from sysctl,
97 * so it needs to protect itself from softirqs
99 static void update_defense_level(void)
102 static int old_secure_tcp
= 0;
107 /* we only count free and buffered memory (in pages) */
109 availmem
= i
.freeram
+ i
.bufferram
;
110 /* however in linux 2.5 the i.bufferram is total page cache size,
112 /* si_swapinfo(&i); */
113 /* availmem = availmem - (i.totalswap - i.freeswap); */
115 nomem
= (availmem
< sysctl_ip_vs_amemthresh
);
120 spin_lock(&__ip_vs_dropentry_lock
);
121 switch (sysctl_ip_vs_drop_entry
) {
123 atomic_set(&ip_vs_dropentry
, 0);
127 atomic_set(&ip_vs_dropentry
, 1);
128 sysctl_ip_vs_drop_entry
= 2;
130 atomic_set(&ip_vs_dropentry
, 0);
135 atomic_set(&ip_vs_dropentry
, 1);
137 atomic_set(&ip_vs_dropentry
, 0);
138 sysctl_ip_vs_drop_entry
= 1;
142 atomic_set(&ip_vs_dropentry
, 1);
145 spin_unlock(&__ip_vs_dropentry_lock
);
148 spin_lock(&__ip_vs_droppacket_lock
);
149 switch (sysctl_ip_vs_drop_packet
) {
155 ip_vs_drop_rate
= ip_vs_drop_counter
156 = sysctl_ip_vs_amemthresh
/
157 (sysctl_ip_vs_amemthresh
-availmem
);
158 sysctl_ip_vs_drop_packet
= 2;
165 ip_vs_drop_rate
= ip_vs_drop_counter
166 = sysctl_ip_vs_amemthresh
/
167 (sysctl_ip_vs_amemthresh
-availmem
);
170 sysctl_ip_vs_drop_packet
= 1;
174 ip_vs_drop_rate
= sysctl_ip_vs_am_droprate
;
177 spin_unlock(&__ip_vs_droppacket_lock
);
180 write_lock(&__ip_vs_securetcp_lock
);
181 switch (sysctl_ip_vs_secure_tcp
) {
183 if (old_secure_tcp
>= 2)
188 if (old_secure_tcp
< 2)
190 sysctl_ip_vs_secure_tcp
= 2;
192 if (old_secure_tcp
>= 2)
198 if (old_secure_tcp
< 2)
201 if (old_secure_tcp
>= 2)
203 sysctl_ip_vs_secure_tcp
= 1;
207 if (old_secure_tcp
< 2)
211 old_secure_tcp
= sysctl_ip_vs_secure_tcp
;
213 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp
>1);
214 write_unlock(&__ip_vs_securetcp_lock
);
221 * Timer for checking the defense
223 #define DEFENSE_TIMER_PERIOD 1*HZ
224 static void defense_work_handler(void *data
);
225 static DECLARE_WORK(defense_work
, defense_work_handler
, NULL
);
227 static void defense_work_handler(void *data
)
229 update_defense_level();
230 if (atomic_read(&ip_vs_dropentry
))
231 ip_vs_random_dropentry();
233 schedule_delayed_work(&defense_work
, DEFENSE_TIMER_PERIOD
);
237 ip_vs_use_count_inc(void)
239 return try_module_get(THIS_MODULE
);
243 ip_vs_use_count_dec(void)
245 module_put(THIS_MODULE
);
250 * Hash table: for virtual service lookups
252 #define IP_VS_SVC_TAB_BITS 8
253 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
254 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
256 /* the service table hashed by <protocol, addr, port> */
257 static struct list_head ip_vs_svc_table
[IP_VS_SVC_TAB_SIZE
];
258 /* the service table hashed by fwmark */
259 static struct list_head ip_vs_svc_fwm_table
[IP_VS_SVC_TAB_SIZE
];
262 * Hash table: for real service lookups
264 #define IP_VS_RTAB_BITS 4
265 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
266 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
268 static struct list_head ip_vs_rtable
[IP_VS_RTAB_SIZE
];
271 * Trash for destinations
273 static LIST_HEAD(ip_vs_dest_trash
);
276 * FTP & NULL virtual service counters
278 static atomic_t ip_vs_ftpsvc_counter
= ATOMIC_INIT(0);
279 static atomic_t ip_vs_nullsvc_counter
= ATOMIC_INIT(0);
283 * Returns hash value for virtual service
285 static __inline__
unsigned
286 ip_vs_svc_hashkey(unsigned proto
, __u32 addr
, __u16 port
)
288 register unsigned porth
= ntohs(port
);
290 return (proto
^ntohl(addr
)^(porth
>>IP_VS_SVC_TAB_BITS
)^porth
)
291 & IP_VS_SVC_TAB_MASK
;
295 * Returns hash value of fwmark for virtual service lookup
297 static __inline__
unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark
)
299 return fwmark
& IP_VS_SVC_TAB_MASK
;
303 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
304 * or in the ip_vs_svc_fwm_table by fwmark.
305 * Should be called with locked tables.
307 static int ip_vs_svc_hash(struct ip_vs_service
*svc
)
311 if (svc
->flags
& IP_VS_SVC_F_HASHED
) {
312 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
313 "called from %p\n", __builtin_return_address(0));
317 if (svc
->fwmark
== 0) {
319 * Hash it by <protocol,addr,port> in ip_vs_svc_table
321 hash
= ip_vs_svc_hashkey(svc
->protocol
, svc
->addr
, svc
->port
);
322 list_add(&svc
->s_list
, &ip_vs_svc_table
[hash
]);
325 * Hash it by fwmark in ip_vs_svc_fwm_table
327 hash
= ip_vs_svc_fwm_hashkey(svc
->fwmark
);
328 list_add(&svc
->f_list
, &ip_vs_svc_fwm_table
[hash
]);
331 svc
->flags
|= IP_VS_SVC_F_HASHED
;
332 /* increase its refcnt because it is referenced by the svc table */
333 atomic_inc(&svc
->refcnt
);
339 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
340 * Should be called with locked tables.
342 static int ip_vs_svc_unhash(struct ip_vs_service
*svc
)
344 if (!(svc
->flags
& IP_VS_SVC_F_HASHED
)) {
345 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
346 "called from %p\n", __builtin_return_address(0));
350 if (svc
->fwmark
== 0) {
351 /* Remove it from the ip_vs_svc_table table */
352 list_del(&svc
->s_list
);
354 /* Remove it from the ip_vs_svc_fwm_table table */
355 list_del(&svc
->f_list
);
358 svc
->flags
&= ~IP_VS_SVC_F_HASHED
;
359 atomic_dec(&svc
->refcnt
);
365 * Get service by {proto,addr,port} in the service table.
367 static __inline__
struct ip_vs_service
*
368 __ip_vs_service_get(__u16 protocol
, __u32 vaddr
, __u16 vport
)
371 struct ip_vs_service
*svc
;
373 /* Check for "full" addressed entries */
374 hash
= ip_vs_svc_hashkey(protocol
, vaddr
, vport
);
376 list_for_each_entry(svc
, &ip_vs_svc_table
[hash
], s_list
){
377 if ((svc
->addr
== vaddr
)
378 && (svc
->port
== vport
)
379 && (svc
->protocol
== protocol
)) {
381 atomic_inc(&svc
->usecnt
);
391 * Get service by {fwmark} in the service table.
393 static __inline__
struct ip_vs_service
*__ip_vs_svc_fwm_get(__u32 fwmark
)
396 struct ip_vs_service
*svc
;
398 /* Check for fwmark addressed entries */
399 hash
= ip_vs_svc_fwm_hashkey(fwmark
);
401 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[hash
], f_list
) {
402 if (svc
->fwmark
== fwmark
) {
404 atomic_inc(&svc
->usecnt
);
412 struct ip_vs_service
*
413 ip_vs_service_get(__u32 fwmark
, __u16 protocol
, __u32 vaddr
, __u16 vport
)
415 struct ip_vs_service
*svc
;
417 read_lock(&__ip_vs_svc_lock
);
420 * Check the table hashed by fwmark first
422 if (fwmark
&& (svc
= __ip_vs_svc_fwm_get(fwmark
)))
426 * Check the table hashed by <protocol,addr,port>
427 * for "full" addressed entries
429 svc
= __ip_vs_service_get(protocol
, vaddr
, vport
);
432 && protocol
== IPPROTO_TCP
433 && atomic_read(&ip_vs_ftpsvc_counter
)
434 && (vport
== FTPDATA
|| ntohs(vport
) >= PROT_SOCK
)) {
436 * Check if ftp service entry exists, the packet
437 * might belong to FTP data connections.
439 svc
= __ip_vs_service_get(protocol
, vaddr
, FTPPORT
);
443 && atomic_read(&ip_vs_nullsvc_counter
)) {
445 * Check if the catch-all port (port zero) exists
447 svc
= __ip_vs_service_get(protocol
, vaddr
, 0);
451 read_unlock(&__ip_vs_svc_lock
);
453 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
454 fwmark
, ip_vs_proto_name(protocol
),
455 NIPQUAD(vaddr
), ntohs(vport
),
456 svc
?"hit":"not hit");
463 __ip_vs_bind_svc(struct ip_vs_dest
*dest
, struct ip_vs_service
*svc
)
465 atomic_inc(&svc
->refcnt
);
470 __ip_vs_unbind_svc(struct ip_vs_dest
*dest
)
472 struct ip_vs_service
*svc
= dest
->svc
;
475 if (atomic_dec_and_test(&svc
->refcnt
))
481 * Returns hash value for real service
483 static __inline__
unsigned ip_vs_rs_hashkey(__u32 addr
, __u16 port
)
485 register unsigned porth
= ntohs(port
);
487 return (ntohl(addr
)^(porth
>>IP_VS_RTAB_BITS
)^porth
)
492 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
493 * should be called with locked tables.
495 static int ip_vs_rs_hash(struct ip_vs_dest
*dest
)
499 if (!list_empty(&dest
->d_list
)) {
504 * Hash by proto,addr,port,
505 * which are the parameters of the real service.
507 hash
= ip_vs_rs_hashkey(dest
->addr
, dest
->port
);
508 list_add(&dest
->d_list
, &ip_vs_rtable
[hash
]);
514 * UNhashes ip_vs_dest from ip_vs_rtable.
515 * should be called with locked tables.
517 static int ip_vs_rs_unhash(struct ip_vs_dest
*dest
)
520 * Remove it from the ip_vs_rtable table.
522 if (!list_empty(&dest
->d_list
)) {
523 list_del(&dest
->d_list
);
524 INIT_LIST_HEAD(&dest
->d_list
);
531 * Lookup real service by <proto,addr,port> in the real service table.
534 ip_vs_lookup_real_service(__u16 protocol
, __u32 daddr
, __u16 dport
)
537 struct ip_vs_dest
*dest
;
540 * Check for "full" addressed entries
541 * Return the first found entry
543 hash
= ip_vs_rs_hashkey(daddr
, dport
);
545 read_lock(&__ip_vs_rs_lock
);
546 list_for_each_entry(dest
, &ip_vs_rtable
[hash
], d_list
) {
547 if ((dest
->addr
== daddr
)
548 && (dest
->port
== dport
)
549 && ((dest
->protocol
== protocol
) ||
552 read_unlock(&__ip_vs_rs_lock
);
556 read_unlock(&__ip_vs_rs_lock
);
562 * Lookup destination by {addr,port} in the given service
564 static struct ip_vs_dest
*
565 ip_vs_lookup_dest(struct ip_vs_service
*svc
, __u32 daddr
, __u16 dport
)
567 struct ip_vs_dest
*dest
;
570 * Find the destination for the given service
572 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
573 if ((dest
->addr
== daddr
) && (dest
->port
== dport
)) {
584 * Lookup dest by {svc,addr,port} in the destination trash.
585 * The destination trash is used to hold the destinations that are removed
586 * from the service table but are still referenced by some conn entries.
587 * The reason to add the destination trash is when the dest is temporary
588 * down (either by administrator or by monitor program), the dest can be
589 * picked back from the trash, the remaining connections to the dest can
590 * continue, and the counting information of the dest is also useful for
593 static struct ip_vs_dest
*
594 ip_vs_trash_get_dest(struct ip_vs_service
*svc
, __u32 daddr
, __u16 dport
)
596 struct ip_vs_dest
*dest
, *nxt
;
599 * Find the destination in trash
601 list_for_each_entry_safe(dest
, nxt
, &ip_vs_dest_trash
, n_list
) {
602 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
605 NIPQUAD(dest
->addr
), ntohs(dest
->port
),
606 atomic_read(&dest
->refcnt
));
607 if (dest
->addr
== daddr
&&
608 dest
->port
== dport
&&
609 dest
->vfwmark
== svc
->fwmark
&&
610 dest
->protocol
== svc
->protocol
&&
612 (dest
->vaddr
== svc
->addr
&&
613 dest
->vport
== svc
->port
))) {
619 * Try to purge the destination from trash if not referenced
621 if (atomic_read(&dest
->refcnt
) == 1) {
622 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
625 NIPQUAD(dest
->addr
), ntohs(dest
->port
));
626 list_del(&dest
->n_list
);
627 ip_vs_dst_reset(dest
);
628 __ip_vs_unbind_svc(dest
);
638 * Clean up all the destinations in the trash
639 * Called by the ip_vs_control_cleanup()
641 * When the ip_vs_control_clearup is activated by ipvs module exit,
642 * the service tables must have been flushed and all the connections
643 * are expired, and the refcnt of each destination in the trash must
644 * be 1, so we simply release them here.
646 static void ip_vs_trash_cleanup(void)
648 struct ip_vs_dest
*dest
, *nxt
;
650 list_for_each_entry_safe(dest
, nxt
, &ip_vs_dest_trash
, n_list
) {
651 list_del(&dest
->n_list
);
652 ip_vs_dst_reset(dest
);
653 __ip_vs_unbind_svc(dest
);
660 ip_vs_zero_stats(struct ip_vs_stats
*stats
)
662 spin_lock_bh(&stats
->lock
);
663 memset(stats
, 0, (char *)&stats
->lock
- (char *)stats
);
664 spin_unlock_bh(&stats
->lock
);
665 ip_vs_zero_estimator(stats
);
669 * Update a destination in the given service
672 __ip_vs_update_dest(struct ip_vs_service
*svc
,
673 struct ip_vs_dest
*dest
, struct ip_vs_dest_user
*udest
)
677 /* set the weight and the flags */
678 atomic_set(&dest
->weight
, udest
->weight
);
679 conn_flags
= udest
->conn_flags
| IP_VS_CONN_F_INACTIVE
;
681 /* check if local node and update the flags */
682 if (inet_addr_type(udest
->addr
) == RTN_LOCAL
) {
683 conn_flags
= (conn_flags
& ~IP_VS_CONN_F_FWD_MASK
)
684 | IP_VS_CONN_F_LOCALNODE
;
687 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
688 if ((conn_flags
& IP_VS_CONN_F_FWD_MASK
) != 0) {
689 conn_flags
|= IP_VS_CONN_F_NOOUTPUT
;
692 * Put the real service in ip_vs_rtable if not present.
693 * For now only for NAT!
695 write_lock_bh(&__ip_vs_rs_lock
);
697 write_unlock_bh(&__ip_vs_rs_lock
);
699 atomic_set(&dest
->conn_flags
, conn_flags
);
701 /* bind the service */
703 __ip_vs_bind_svc(dest
, svc
);
705 if (dest
->svc
!= svc
) {
706 __ip_vs_unbind_svc(dest
);
707 ip_vs_zero_stats(&dest
->stats
);
708 __ip_vs_bind_svc(dest
, svc
);
712 /* set the dest status flags */
713 dest
->flags
|= IP_VS_DEST_F_AVAILABLE
;
715 if (udest
->u_threshold
== 0 || udest
->u_threshold
> dest
->u_threshold
)
716 dest
->flags
&= ~IP_VS_DEST_F_OVERLOAD
;
717 dest
->u_threshold
= udest
->u_threshold
;
718 dest
->l_threshold
= udest
->l_threshold
;
723 * Create a destination for the given service
726 ip_vs_new_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user
*udest
,
727 struct ip_vs_dest
**dest_p
)
729 struct ip_vs_dest
*dest
;
734 atype
= inet_addr_type(udest
->addr
);
735 if (atype
!= RTN_LOCAL
&& atype
!= RTN_UNICAST
)
738 dest
= kmalloc(sizeof(struct ip_vs_dest
), GFP_ATOMIC
);
740 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
743 memset(dest
, 0, sizeof(struct ip_vs_dest
));
745 dest
->protocol
= svc
->protocol
;
746 dest
->vaddr
= svc
->addr
;
747 dest
->vport
= svc
->port
;
748 dest
->vfwmark
= svc
->fwmark
;
749 dest
->addr
= udest
->addr
;
750 dest
->port
= udest
->port
;
752 atomic_set(&dest
->activeconns
, 0);
753 atomic_set(&dest
->inactconns
, 0);
754 atomic_set(&dest
->persistconns
, 0);
755 atomic_set(&dest
->refcnt
, 0);
757 INIT_LIST_HEAD(&dest
->d_list
);
758 spin_lock_init(&dest
->dst_lock
);
759 spin_lock_init(&dest
->stats
.lock
);
760 __ip_vs_update_dest(svc
, dest
, udest
);
761 ip_vs_new_estimator(&dest
->stats
);
771 * Add a destination into an existing service
774 ip_vs_add_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user
*udest
)
776 struct ip_vs_dest
*dest
;
777 __u32 daddr
= udest
->addr
;
778 __u16 dport
= udest
->port
;
783 if (udest
->weight
< 0) {
784 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
788 if (udest
->l_threshold
> udest
->u_threshold
) {
789 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
790 "upper threshold\n");
795 * Check if the dest already exists in the list
797 dest
= ip_vs_lookup_dest(svc
, daddr
, dport
);
799 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
804 * Check if the dest already exists in the trash and
805 * is from the same service
807 dest
= ip_vs_trash_get_dest(svc
, daddr
, dport
);
809 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
810 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
811 NIPQUAD(daddr
), ntohs(dport
),
812 atomic_read(&dest
->refcnt
),
814 NIPQUAD(dest
->vaddr
),
816 __ip_vs_update_dest(svc
, dest
, udest
);
819 * Get the destination from the trash
821 list_del(&dest
->n_list
);
823 ip_vs_new_estimator(&dest
->stats
);
825 write_lock_bh(&__ip_vs_svc_lock
);
828 * Wait until all other svc users go away.
830 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
832 list_add(&dest
->n_list
, &svc
->destinations
);
835 /* call the update_service function of its scheduler */
836 svc
->scheduler
->update_service(svc
);
838 write_unlock_bh(&__ip_vs_svc_lock
);
843 * Allocate and initialize the dest structure
845 ret
= ip_vs_new_dest(svc
, udest
, &dest
);
851 * Add the dest entry into the list
853 atomic_inc(&dest
->refcnt
);
855 write_lock_bh(&__ip_vs_svc_lock
);
858 * Wait until all other svc users go away.
860 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
862 list_add(&dest
->n_list
, &svc
->destinations
);
865 /* call the update_service function of its scheduler */
866 svc
->scheduler
->update_service(svc
);
868 write_unlock_bh(&__ip_vs_svc_lock
);
877 * Edit a destination in the given service
880 ip_vs_edit_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user
*udest
)
882 struct ip_vs_dest
*dest
;
883 __u32 daddr
= udest
->addr
;
884 __u16 dport
= udest
->port
;
888 if (udest
->weight
< 0) {
889 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
893 if (udest
->l_threshold
> udest
->u_threshold
) {
894 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
895 "upper threshold\n");
900 * Lookup the destination list
902 dest
= ip_vs_lookup_dest(svc
, daddr
, dport
);
904 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
908 __ip_vs_update_dest(svc
, dest
, udest
);
910 write_lock_bh(&__ip_vs_svc_lock
);
912 /* Wait until all other svc users go away */
913 while (atomic_read(&svc
->usecnt
) > 1) {};
915 /* call the update_service, because server weight may be changed */
916 svc
->scheduler
->update_service(svc
);
918 write_unlock_bh(&__ip_vs_svc_lock
);
927 * Delete a destination (must be already unlinked from the service)
929 static void __ip_vs_del_dest(struct ip_vs_dest
*dest
)
931 ip_vs_kill_estimator(&dest
->stats
);
934 * Remove it from the d-linked list with the real services.
936 write_lock_bh(&__ip_vs_rs_lock
);
937 ip_vs_rs_unhash(dest
);
938 write_unlock_bh(&__ip_vs_rs_lock
);
941 * Decrease the refcnt of the dest, and free the dest
942 * if nobody refers to it (refcnt=0). Otherwise, throw
943 * the destination into the trash.
945 if (atomic_dec_and_test(&dest
->refcnt
)) {
946 ip_vs_dst_reset(dest
);
947 /* simply decrease svc->refcnt here, let the caller check
948 and release the service if nobody refers to it.
949 Only user context can release destination and service,
950 and only one user context can update virtual service at a
951 time, so the operation here is OK */
952 atomic_dec(&dest
->svc
->refcnt
);
955 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
957 NIPQUAD(dest
->addr
), ntohs(dest
->port
),
958 atomic_read(&dest
->refcnt
));
959 list_add(&dest
->n_list
, &ip_vs_dest_trash
);
960 atomic_inc(&dest
->refcnt
);
966 * Unlink a destination from the given service
968 static void __ip_vs_unlink_dest(struct ip_vs_service
*svc
,
969 struct ip_vs_dest
*dest
,
972 dest
->flags
&= ~IP_VS_DEST_F_AVAILABLE
;
975 * Remove it from the d-linked destination list.
977 list_del(&dest
->n_list
);
981 * Call the update_service function of its scheduler
983 svc
->scheduler
->update_service(svc
);
989 * Delete a destination server in the given service
992 ip_vs_del_dest(struct ip_vs_service
*svc
,struct ip_vs_dest_user
*udest
)
994 struct ip_vs_dest
*dest
;
995 __u32 daddr
= udest
->addr
;
996 __u16 dport
= udest
->port
;
1000 dest
= ip_vs_lookup_dest(svc
, daddr
, dport
);
1002 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1006 write_lock_bh(&__ip_vs_svc_lock
);
1009 * Wait until all other svc users go away.
1011 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1014 * Unlink dest from the service
1016 __ip_vs_unlink_dest(svc
, dest
, 1);
1018 write_unlock_bh(&__ip_vs_svc_lock
);
1021 * Delete the destination
1023 __ip_vs_del_dest(dest
);
1032 * Add a service into the service hash table
1035 ip_vs_add_service(struct ip_vs_service_user
*u
, struct ip_vs_service
**svc_p
)
1038 struct ip_vs_scheduler
*sched
= NULL
;
1039 struct ip_vs_service
*svc
= NULL
;
1041 /* increase the module use count */
1042 ip_vs_use_count_inc();
1044 /* Lookup the scheduler by 'u->sched_name' */
1045 sched
= ip_vs_scheduler_get(u
->sched_name
);
1046 if (sched
== NULL
) {
1047 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1053 svc
= (struct ip_vs_service
*)
1054 kmalloc(sizeof(struct ip_vs_service
), GFP_ATOMIC
);
1056 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1060 memset(svc
, 0, sizeof(struct ip_vs_service
));
1062 /* I'm the first user of the service */
1063 atomic_set(&svc
->usecnt
, 1);
1064 atomic_set(&svc
->refcnt
, 0);
1066 svc
->protocol
= u
->protocol
;
1067 svc
->addr
= u
->addr
;
1068 svc
->port
= u
->port
;
1069 svc
->fwmark
= u
->fwmark
;
1070 svc
->flags
= u
->flags
;
1071 svc
->timeout
= u
->timeout
* HZ
;
1072 svc
->netmask
= u
->netmask
;
1074 INIT_LIST_HEAD(&svc
->destinations
);
1075 rwlock_init(&svc
->sched_lock
);
1076 spin_lock_init(&svc
->stats
.lock
);
1078 /* Bind the scheduler */
1079 ret
= ip_vs_bind_scheduler(svc
, sched
);
1084 /* Update the virtual service counters */
1085 if (svc
->port
== FTPPORT
)
1086 atomic_inc(&ip_vs_ftpsvc_counter
);
1087 else if (svc
->port
== 0)
1088 atomic_inc(&ip_vs_nullsvc_counter
);
1090 ip_vs_new_estimator(&svc
->stats
);
1091 ip_vs_num_services
++;
1093 /* Hash the service into the service table */
1094 write_lock_bh(&__ip_vs_svc_lock
);
1095 ip_vs_svc_hash(svc
);
1096 write_unlock_bh(&__ip_vs_svc_lock
);
1104 ip_vs_unbind_scheduler(svc
);
1107 ip_vs_app_inc_put(svc
->inc
);
1112 ip_vs_scheduler_put(sched
);
1115 /* decrease the module use count */
1116 ip_vs_use_count_dec();
1123 * Edit a service and bind it with a new scheduler
1126 ip_vs_edit_service(struct ip_vs_service
*svc
, struct ip_vs_service_user
*u
)
1128 struct ip_vs_scheduler
*sched
, *old_sched
;
1132 * Lookup the scheduler, by 'u->sched_name'
1134 sched
= ip_vs_scheduler_get(u
->sched_name
);
1135 if (sched
== NULL
) {
1136 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1142 write_lock_bh(&__ip_vs_svc_lock
);
1145 * Wait until all other svc users go away.
1147 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1150 * Set the flags and timeout value
1152 svc
->flags
= u
->flags
| IP_VS_SVC_F_HASHED
;
1153 svc
->timeout
= u
->timeout
* HZ
;
1154 svc
->netmask
= u
->netmask
;
1156 old_sched
= svc
->scheduler
;
1157 if (sched
!= old_sched
) {
1159 * Unbind the old scheduler
1161 if ((ret
= ip_vs_unbind_scheduler(svc
))) {
1167 * Bind the new scheduler
1169 if ((ret
= ip_vs_bind_scheduler(svc
, sched
))) {
1171 * If ip_vs_bind_scheduler fails, restore the old
1173 * The main reason of failure is out of memory.
1175 * The question is if the old scheduler can be
1176 * restored all the time. TODO: if it cannot be
1177 * restored some time, we must delete the service,
1178 * otherwise the system may crash.
1180 ip_vs_bind_scheduler(svc
, old_sched
);
1187 write_unlock_bh(&__ip_vs_svc_lock
);
1190 ip_vs_scheduler_put(old_sched
);
1197 * Delete a service from the service list
1198 * - The service must be unlinked, unlocked and not referenced!
1199 * - We are called under _bh lock
1201 static void __ip_vs_del_service(struct ip_vs_service
*svc
)
1203 struct ip_vs_dest
*dest
, *nxt
;
1204 struct ip_vs_scheduler
*old_sched
;
1206 ip_vs_num_services
--;
1207 ip_vs_kill_estimator(&svc
->stats
);
1209 /* Unbind scheduler */
1210 old_sched
= svc
->scheduler
;
1211 ip_vs_unbind_scheduler(svc
);
1213 ip_vs_scheduler_put(old_sched
);
1215 /* Unbind app inc */
1217 ip_vs_app_inc_put(svc
->inc
);
1222 * Unlink the whole destination list
1224 list_for_each_entry_safe(dest
, nxt
, &svc
->destinations
, n_list
) {
1225 __ip_vs_unlink_dest(svc
, dest
, 0);
1226 __ip_vs_del_dest(dest
);
1230 * Update the virtual service counters
1232 if (svc
->port
== FTPPORT
)
1233 atomic_dec(&ip_vs_ftpsvc_counter
);
1234 else if (svc
->port
== 0)
1235 atomic_dec(&ip_vs_nullsvc_counter
);
1238 * Free the service if nobody refers to it
1240 if (atomic_read(&svc
->refcnt
) == 0)
1243 /* decrease the module use count */
1244 ip_vs_use_count_dec();
1248 * Delete a service from the service list
1250 static int ip_vs_del_service(struct ip_vs_service
*svc
)
1256 * Unhash it from the service table
1258 write_lock_bh(&__ip_vs_svc_lock
);
1260 ip_vs_svc_unhash(svc
);
1263 * Wait until all the svc users go away.
1265 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1267 __ip_vs_del_service(svc
);
1269 write_unlock_bh(&__ip_vs_svc_lock
);
1276 * Flush all the virtual services
1278 static int ip_vs_flush(void)
1281 struct ip_vs_service
*svc
, *nxt
;
1284 * Flush the service table hashed by <protocol,addr,port>
1286 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1287 list_for_each_entry_safe(svc
, nxt
, &ip_vs_svc_table
[idx
], s_list
) {
1288 write_lock_bh(&__ip_vs_svc_lock
);
1289 ip_vs_svc_unhash(svc
);
1291 * Wait until all the svc users go away.
1293 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1294 __ip_vs_del_service(svc
);
1295 write_unlock_bh(&__ip_vs_svc_lock
);
1300 * Flush the service table hashed by fwmark
1302 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1303 list_for_each_entry_safe(svc
, nxt
,
1304 &ip_vs_svc_fwm_table
[idx
], f_list
) {
1305 write_lock_bh(&__ip_vs_svc_lock
);
1306 ip_vs_svc_unhash(svc
);
1308 * Wait until all the svc users go away.
1310 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1311 __ip_vs_del_service(svc
);
1312 write_unlock_bh(&__ip_vs_svc_lock
);
1321 * Zero counters in a service or all services
1323 static int ip_vs_zero_service(struct ip_vs_service
*svc
)
1325 struct ip_vs_dest
*dest
;
1327 write_lock_bh(&__ip_vs_svc_lock
);
1328 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1329 ip_vs_zero_stats(&dest
->stats
);
1331 ip_vs_zero_stats(&svc
->stats
);
1332 write_unlock_bh(&__ip_vs_svc_lock
);
1336 static int ip_vs_zero_all(void)
1339 struct ip_vs_service
*svc
;
1341 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1342 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1343 ip_vs_zero_service(svc
);
1347 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1348 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1349 ip_vs_zero_service(svc
);
1353 ip_vs_zero_stats(&ip_vs_stats
);
1359 proc_do_defense_mode(ctl_table
*table
, int write
, struct file
* filp
,
1360 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1362 int *valp
= table
->data
;
1366 rc
= proc_dointvec(table
, write
, filp
, buffer
, lenp
, ppos
);
1367 if (write
&& (*valp
!= val
)) {
1368 if ((*valp
< 0) || (*valp
> 3)) {
1369 /* Restore the correct value */
1372 update_defense_level();
1380 proc_do_sync_threshold(ctl_table
*table
, int write
, struct file
*filp
,
1381 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1383 int *valp
= table
->data
;
1387 /* backup the value first */
1388 memcpy(val
, valp
, sizeof(val
));
1390 rc
= proc_dointvec(table
, write
, filp
, buffer
, lenp
, ppos
);
1391 if (write
&& (valp
[0] < 0 || valp
[1] < 0 || valp
[0] >= valp
[1])) {
1392 /* Restore the correct value */
1393 memcpy(valp
, val
, sizeof(val
));
1400 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1403 static struct ctl_table vs_vars
[] = {
1405 .ctl_name
= NET_IPV4_VS_AMEMTHRESH
,
1406 .procname
= "amemthresh",
1407 .data
= &sysctl_ip_vs_amemthresh
,
1408 .maxlen
= sizeof(int),
1410 .proc_handler
= &proc_dointvec
,
1412 #ifdef CONFIG_IP_VS_DEBUG
1414 .ctl_name
= NET_IPV4_VS_DEBUG_LEVEL
,
1415 .procname
= "debug_level",
1416 .data
= &sysctl_ip_vs_debug_level
,
1417 .maxlen
= sizeof(int),
1419 .proc_handler
= &proc_dointvec
,
1423 .ctl_name
= NET_IPV4_VS_AMDROPRATE
,
1424 .procname
= "am_droprate",
1425 .data
= &sysctl_ip_vs_am_droprate
,
1426 .maxlen
= sizeof(int),
1428 .proc_handler
= &proc_dointvec
,
1431 .ctl_name
= NET_IPV4_VS_DROP_ENTRY
,
1432 .procname
= "drop_entry",
1433 .data
= &sysctl_ip_vs_drop_entry
,
1434 .maxlen
= sizeof(int),
1436 .proc_handler
= &proc_do_defense_mode
,
1439 .ctl_name
= NET_IPV4_VS_DROP_PACKET
,
1440 .procname
= "drop_packet",
1441 .data
= &sysctl_ip_vs_drop_packet
,
1442 .maxlen
= sizeof(int),
1444 .proc_handler
= &proc_do_defense_mode
,
1447 .ctl_name
= NET_IPV4_VS_SECURE_TCP
,
1448 .procname
= "secure_tcp",
1449 .data
= &sysctl_ip_vs_secure_tcp
,
1450 .maxlen
= sizeof(int),
1452 .proc_handler
= &proc_do_defense_mode
,
1456 .ctl_name
= NET_IPV4_VS_TO_ES
,
1457 .procname
= "timeout_established",
1458 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ESTABLISHED
],
1459 .maxlen
= sizeof(int),
1461 .proc_handler
= &proc_dointvec_jiffies
,
1464 .ctl_name
= NET_IPV4_VS_TO_SS
,
1465 .procname
= "timeout_synsent",
1466 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_SENT
],
1467 .maxlen
= sizeof(int),
1469 .proc_handler
= &proc_dointvec_jiffies
,
1472 .ctl_name
= NET_IPV4_VS_TO_SR
,
1473 .procname
= "timeout_synrecv",
1474 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_RECV
],
1475 .maxlen
= sizeof(int),
1477 .proc_handler
= &proc_dointvec_jiffies
,
1480 .ctl_name
= NET_IPV4_VS_TO_FW
,
1481 .procname
= "timeout_finwait",
1482 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_FIN_WAIT
],
1483 .maxlen
= sizeof(int),
1485 .proc_handler
= &proc_dointvec_jiffies
,
1488 .ctl_name
= NET_IPV4_VS_TO_TW
,
1489 .procname
= "timeout_timewait",
1490 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_TIME_WAIT
],
1491 .maxlen
= sizeof(int),
1493 .proc_handler
= &proc_dointvec_jiffies
,
1496 .ctl_name
= NET_IPV4_VS_TO_CL
,
1497 .procname
= "timeout_close",
1498 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE
],
1499 .maxlen
= sizeof(int),
1501 .proc_handler
= &proc_dointvec_jiffies
,
1504 .ctl_name
= NET_IPV4_VS_TO_CW
,
1505 .procname
= "timeout_closewait",
1506 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE_WAIT
],
1507 .maxlen
= sizeof(int),
1509 .proc_handler
= &proc_dointvec_jiffies
,
1512 .ctl_name
= NET_IPV4_VS_TO_LA
,
1513 .procname
= "timeout_lastack",
1514 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LAST_ACK
],
1515 .maxlen
= sizeof(int),
1517 .proc_handler
= &proc_dointvec_jiffies
,
1520 .ctl_name
= NET_IPV4_VS_TO_LI
,
1521 .procname
= "timeout_listen",
1522 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LISTEN
],
1523 .maxlen
= sizeof(int),
1525 .proc_handler
= &proc_dointvec_jiffies
,
1528 .ctl_name
= NET_IPV4_VS_TO_SA
,
1529 .procname
= "timeout_synack",
1530 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYNACK
],
1531 .maxlen
= sizeof(int),
1533 .proc_handler
= &proc_dointvec_jiffies
,
1536 .ctl_name
= NET_IPV4_VS_TO_UDP
,
1537 .procname
= "timeout_udp",
1538 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_UDP
],
1539 .maxlen
= sizeof(int),
1541 .proc_handler
= &proc_dointvec_jiffies
,
1544 .ctl_name
= NET_IPV4_VS_TO_ICMP
,
1545 .procname
= "timeout_icmp",
1546 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ICMP
],
1547 .maxlen
= sizeof(int),
1549 .proc_handler
= &proc_dointvec_jiffies
,
1553 .ctl_name
= NET_IPV4_VS_CACHE_BYPASS
,
1554 .procname
= "cache_bypass",
1555 .data
= &sysctl_ip_vs_cache_bypass
,
1556 .maxlen
= sizeof(int),
1558 .proc_handler
= &proc_dointvec
,
1561 .ctl_name
= NET_IPV4_VS_EXPIRE_NODEST_CONN
,
1562 .procname
= "expire_nodest_conn",
1563 .data
= &sysctl_ip_vs_expire_nodest_conn
,
1564 .maxlen
= sizeof(int),
1566 .proc_handler
= &proc_dointvec
,
1569 .ctl_name
= NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE
,
1570 .procname
= "expire_quiescent_template",
1571 .data
= &sysctl_ip_vs_expire_quiescent_template
,
1572 .maxlen
= sizeof(int),
1574 .proc_handler
= &proc_dointvec
,
1577 .ctl_name
= NET_IPV4_VS_SYNC_THRESHOLD
,
1578 .procname
= "sync_threshold",
1579 .data
= &sysctl_ip_vs_sync_threshold
,
1580 .maxlen
= sizeof(sysctl_ip_vs_sync_threshold
),
1582 .proc_handler
= &proc_do_sync_threshold
,
1585 .ctl_name
= NET_IPV4_VS_NAT_ICMP_SEND
,
1586 .procname
= "nat_icmp_send",
1587 .data
= &sysctl_ip_vs_nat_icmp_send
,
1588 .maxlen
= sizeof(int),
1590 .proc_handler
= &proc_dointvec
,
1595 static ctl_table vs_table
[] = {
1597 .ctl_name
= NET_IPV4_VS
,
1605 static ctl_table ipvs_ipv4_table
[] = {
1607 .ctl_name
= NET_IPV4
,
1615 static ctl_table vs_root_table
[] = {
1617 .ctl_name
= CTL_NET
,
1620 .child
= ipvs_ipv4_table
,
1625 static struct ctl_table_header
* sysctl_header
;
1627 #ifdef CONFIG_PROC_FS
1630 struct list_head
*table
;
1635 * Write the contents of the VS rule table to a PROCfs file.
1636 * (It is kept just for backward compatibility)
1638 static inline const char *ip_vs_fwd_name(unsigned flags
)
1640 switch (flags
& IP_VS_CONN_F_FWD_MASK
) {
1641 case IP_VS_CONN_F_LOCALNODE
:
1643 case IP_VS_CONN_F_TUNNEL
:
1645 case IP_VS_CONN_F_DROUTE
:
1653 /* Get the Nth entry in the two lists */
1654 static struct ip_vs_service
*ip_vs_info_array(struct seq_file
*seq
, loff_t pos
)
1656 struct ip_vs_iter
*iter
= seq
->private;
1658 struct ip_vs_service
*svc
;
1660 /* look in hash by protocol */
1661 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1662 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1664 iter
->table
= ip_vs_svc_table
;
1671 /* keep looking in fwmark */
1672 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1673 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1675 iter
->table
= ip_vs_svc_fwm_table
;
1685 static void *ip_vs_info_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1688 read_lock_bh(&__ip_vs_svc_lock
);
1689 return *pos
? ip_vs_info_array(seq
, *pos
- 1) : SEQ_START_TOKEN
;
1693 static void *ip_vs_info_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1695 struct list_head
*e
;
1696 struct ip_vs_iter
*iter
;
1697 struct ip_vs_service
*svc
;
1700 if (v
== SEQ_START_TOKEN
)
1701 return ip_vs_info_array(seq
,0);
1704 iter
= seq
->private;
1706 if (iter
->table
== ip_vs_svc_table
) {
1707 /* next service in table hashed by protocol */
1708 if ((e
= svc
->s_list
.next
) != &ip_vs_svc_table
[iter
->bucket
])
1709 return list_entry(e
, struct ip_vs_service
, s_list
);
1712 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
1713 list_for_each_entry(svc
,&ip_vs_svc_table
[iter
->bucket
],
1719 iter
->table
= ip_vs_svc_fwm_table
;
1724 /* next service in hashed by fwmark */
1725 if ((e
= svc
->f_list
.next
) != &ip_vs_svc_fwm_table
[iter
->bucket
])
1726 return list_entry(e
, struct ip_vs_service
, f_list
);
1729 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
1730 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[iter
->bucket
],
1738 static void ip_vs_info_seq_stop(struct seq_file
*seq
, void *v
)
1740 read_unlock_bh(&__ip_vs_svc_lock
);
1744 static int ip_vs_info_seq_show(struct seq_file
*seq
, void *v
)
1746 if (v
== SEQ_START_TOKEN
) {
1748 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1749 NVERSION(IP_VS_VERSION_CODE
), IP_VS_CONN_TAB_SIZE
);
1751 "Prot LocalAddress:Port Scheduler Flags\n");
1753 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1755 const struct ip_vs_service
*svc
= v
;
1756 const struct ip_vs_iter
*iter
= seq
->private;
1757 const struct ip_vs_dest
*dest
;
1759 if (iter
->table
== ip_vs_svc_table
)
1760 seq_printf(seq
, "%s %08X:%04X %s ",
1761 ip_vs_proto_name(svc
->protocol
),
1764 svc
->scheduler
->name
);
1766 seq_printf(seq
, "FWM %08X %s ",
1767 svc
->fwmark
, svc
->scheduler
->name
);
1769 if (svc
->flags
& IP_VS_SVC_F_PERSISTENT
)
1770 seq_printf(seq
, "persistent %d %08X\n",
1772 ntohl(svc
->netmask
));
1774 seq_putc(seq
, '\n');
1776 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1778 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1779 ntohl(dest
->addr
), ntohs(dest
->port
),
1780 ip_vs_fwd_name(atomic_read(&dest
->conn_flags
)),
1781 atomic_read(&dest
->weight
),
1782 atomic_read(&dest
->activeconns
),
1783 atomic_read(&dest
->inactconns
));
1789 static struct seq_operations ip_vs_info_seq_ops
= {
1790 .start
= ip_vs_info_seq_start
,
1791 .next
= ip_vs_info_seq_next
,
1792 .stop
= ip_vs_info_seq_stop
,
1793 .show
= ip_vs_info_seq_show
,
1796 static int ip_vs_info_open(struct inode
*inode
, struct file
*file
)
1798 struct seq_file
*seq
;
1800 struct ip_vs_iter
*s
= kmalloc(sizeof(*s
), GFP_KERNEL
);
1805 rc
= seq_open(file
, &ip_vs_info_seq_ops
);
1809 seq
= file
->private_data
;
1811 memset(s
, 0, sizeof(*s
));
1819 static struct file_operations ip_vs_info_fops
= {
1820 .owner
= THIS_MODULE
,
1821 .open
= ip_vs_info_open
,
1823 .llseek
= seq_lseek
,
1824 .release
= seq_release_private
,
1829 struct ip_vs_stats ip_vs_stats
;
1831 #ifdef CONFIG_PROC_FS
1832 static int ip_vs_stats_show(struct seq_file
*seq
, void *v
)
1835 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1837 " Total Incoming Outgoing Incoming Outgoing\n");
1839 " Conns Packets Packets Bytes Bytes\n");
1841 spin_lock_bh(&ip_vs_stats
.lock
);
1842 seq_printf(seq
, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats
.conns
,
1843 ip_vs_stats
.inpkts
, ip_vs_stats
.outpkts
,
1844 (unsigned long long) ip_vs_stats
.inbytes
,
1845 (unsigned long long) ip_vs_stats
.outbytes
);
1847 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1849 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1850 seq_printf(seq
,"%8X %8X %8X %16X %16X\n",
1855 ip_vs_stats
.outbps
);
1856 spin_unlock_bh(&ip_vs_stats
.lock
);
1861 static int ip_vs_stats_seq_open(struct inode
*inode
, struct file
*file
)
1863 return single_open(file
, ip_vs_stats_show
, NULL
);
1866 static struct file_operations ip_vs_stats_fops
= {
1867 .owner
= THIS_MODULE
,
1868 .open
= ip_vs_stats_seq_open
,
1870 .llseek
= seq_lseek
,
1871 .release
= single_release
,
1877 * Set timeout values for tcp tcpfin udp in the timeout_table.
1879 static int ip_vs_set_timeout(struct ip_vs_timeout_user
*u
)
1881 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1886 #ifdef CONFIG_IP_VS_PROTO_TCP
1887 if (u
->tcp_timeout
) {
1888 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_ESTABLISHED
]
1889 = u
->tcp_timeout
* HZ
;
1892 if (u
->tcp_fin_timeout
) {
1893 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_FIN_WAIT
]
1894 = u
->tcp_fin_timeout
* HZ
;
1898 #ifdef CONFIG_IP_VS_PROTO_UDP
1899 if (u
->udp_timeout
) {
1900 ip_vs_protocol_udp
.timeout_table
[IP_VS_UDP_S_NORMAL
]
1901 = u
->udp_timeout
* HZ
;
1908 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1909 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1910 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1911 sizeof(struct ip_vs_dest_user))
1912 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1913 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1914 #define MAX_ARG_LEN SVCDEST_ARG_LEN
1916 static const unsigned char set_arglen
[SET_CMDID(IP_VS_SO_SET_MAX
)+1] = {
1917 [SET_CMDID(IP_VS_SO_SET_ADD
)] = SERVICE_ARG_LEN
,
1918 [SET_CMDID(IP_VS_SO_SET_EDIT
)] = SERVICE_ARG_LEN
,
1919 [SET_CMDID(IP_VS_SO_SET_DEL
)] = SERVICE_ARG_LEN
,
1920 [SET_CMDID(IP_VS_SO_SET_FLUSH
)] = 0,
1921 [SET_CMDID(IP_VS_SO_SET_ADDDEST
)] = SVCDEST_ARG_LEN
,
1922 [SET_CMDID(IP_VS_SO_SET_DELDEST
)] = SVCDEST_ARG_LEN
,
1923 [SET_CMDID(IP_VS_SO_SET_EDITDEST
)] = SVCDEST_ARG_LEN
,
1924 [SET_CMDID(IP_VS_SO_SET_TIMEOUT
)] = TIMEOUT_ARG_LEN
,
1925 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON
)] = DAEMON_ARG_LEN
,
1926 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON
)] = DAEMON_ARG_LEN
,
1927 [SET_CMDID(IP_VS_SO_SET_ZERO
)] = SERVICE_ARG_LEN
,
1931 do_ip_vs_set_ctl(struct sock
*sk
, int cmd
, void __user
*user
, unsigned int len
)
1934 unsigned char arg
[MAX_ARG_LEN
];
1935 struct ip_vs_service_user
*usvc
;
1936 struct ip_vs_service
*svc
;
1937 struct ip_vs_dest_user
*udest
;
1939 if (!capable(CAP_NET_ADMIN
))
1942 if (len
!= set_arglen
[SET_CMDID(cmd
)]) {
1943 IP_VS_ERR("set_ctl: len %u != %u\n",
1944 len
, set_arglen
[SET_CMDID(cmd
)]);
1948 if (copy_from_user(arg
, user
, len
) != 0)
1951 /* increase the module use count */
1952 ip_vs_use_count_inc();
1954 if (mutex_lock_interruptible(&__ip_vs_mutex
)) {
1959 if (cmd
== IP_VS_SO_SET_FLUSH
) {
1960 /* Flush the virtual service */
1961 ret
= ip_vs_flush();
1963 } else if (cmd
== IP_VS_SO_SET_TIMEOUT
) {
1964 /* Set timeout values for (tcp tcpfin udp) */
1965 ret
= ip_vs_set_timeout((struct ip_vs_timeout_user
*)arg
);
1967 } else if (cmd
== IP_VS_SO_SET_STARTDAEMON
) {
1968 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
1969 ret
= start_sync_thread(dm
->state
, dm
->mcast_ifn
, dm
->syncid
);
1971 } else if (cmd
== IP_VS_SO_SET_STOPDAEMON
) {
1972 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
1973 ret
= stop_sync_thread(dm
->state
);
1977 usvc
= (struct ip_vs_service_user
*)arg
;
1978 udest
= (struct ip_vs_dest_user
*)(usvc
+ 1);
1980 if (cmd
== IP_VS_SO_SET_ZERO
) {
1981 /* if no service address is set, zero counters in all */
1982 if (!usvc
->fwmark
&& !usvc
->addr
&& !usvc
->port
) {
1983 ret
= ip_vs_zero_all();
1988 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1989 if (usvc
->protocol
!=IPPROTO_TCP
&& usvc
->protocol
!=IPPROTO_UDP
) {
1990 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1991 usvc
->protocol
, NIPQUAD(usvc
->addr
),
1992 ntohs(usvc
->port
), usvc
->sched_name
);
1997 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1998 if (usvc
->fwmark
== 0)
1999 svc
= __ip_vs_service_get(usvc
->protocol
,
2000 usvc
->addr
, usvc
->port
);
2002 svc
= __ip_vs_svc_fwm_get(usvc
->fwmark
);
2004 if (cmd
!= IP_VS_SO_SET_ADD
2005 && (svc
== NULL
|| svc
->protocol
!= usvc
->protocol
)) {
2011 case IP_VS_SO_SET_ADD
:
2015 ret
= ip_vs_add_service(usvc
, &svc
);
2017 case IP_VS_SO_SET_EDIT
:
2018 ret
= ip_vs_edit_service(svc
, usvc
);
2020 case IP_VS_SO_SET_DEL
:
2021 ret
= ip_vs_del_service(svc
);
2025 case IP_VS_SO_SET_ZERO
:
2026 ret
= ip_vs_zero_service(svc
);
2028 case IP_VS_SO_SET_ADDDEST
:
2029 ret
= ip_vs_add_dest(svc
, udest
);
2031 case IP_VS_SO_SET_EDITDEST
:
2032 ret
= ip_vs_edit_dest(svc
, udest
);
2034 case IP_VS_SO_SET_DELDEST
:
2035 ret
= ip_vs_del_dest(svc
, udest
);
2042 ip_vs_service_put(svc
);
2045 mutex_unlock(&__ip_vs_mutex
);
2047 /* decrease the module use count */
2048 ip_vs_use_count_dec();
2055 ip_vs_copy_stats(struct ip_vs_stats_user
*dst
, struct ip_vs_stats
*src
)
2057 spin_lock_bh(&src
->lock
);
2058 memcpy(dst
, src
, (char*)&src
->lock
- (char*)src
);
2059 spin_unlock_bh(&src
->lock
);
2063 ip_vs_copy_service(struct ip_vs_service_entry
*dst
, struct ip_vs_service
*src
)
2065 dst
->protocol
= src
->protocol
;
2066 dst
->addr
= src
->addr
;
2067 dst
->port
= src
->port
;
2068 dst
->fwmark
= src
->fwmark
;
2069 strlcpy(dst
->sched_name
, src
->scheduler
->name
, sizeof(dst
->sched_name
));
2070 dst
->flags
= src
->flags
;
2071 dst
->timeout
= src
->timeout
/ HZ
;
2072 dst
->netmask
= src
->netmask
;
2073 dst
->num_dests
= src
->num_dests
;
2074 ip_vs_copy_stats(&dst
->stats
, &src
->stats
);
2078 __ip_vs_get_service_entries(const struct ip_vs_get_services
*get
,
2079 struct ip_vs_get_services __user
*uptr
)
2082 struct ip_vs_service
*svc
;
2083 struct ip_vs_service_entry entry
;
2086 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2087 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
2088 if (count
>= get
->num_services
)
2090 memset(&entry
, 0, sizeof(entry
));
2091 ip_vs_copy_service(&entry
, svc
);
2092 if (copy_to_user(&uptr
->entrytable
[count
],
2093 &entry
, sizeof(entry
))) {
2101 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2102 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
2103 if (count
>= get
->num_services
)
2105 memset(&entry
, 0, sizeof(entry
));
2106 ip_vs_copy_service(&entry
, svc
);
2107 if (copy_to_user(&uptr
->entrytable
[count
],
2108 &entry
, sizeof(entry
))) {
2120 __ip_vs_get_dest_entries(const struct ip_vs_get_dests
*get
,
2121 struct ip_vs_get_dests __user
*uptr
)
2123 struct ip_vs_service
*svc
;
2127 svc
= __ip_vs_svc_fwm_get(get
->fwmark
);
2129 svc
= __ip_vs_service_get(get
->protocol
,
2130 get
->addr
, get
->port
);
2133 struct ip_vs_dest
*dest
;
2134 struct ip_vs_dest_entry entry
;
2136 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
2137 if (count
>= get
->num_dests
)
2140 entry
.addr
= dest
->addr
;
2141 entry
.port
= dest
->port
;
2142 entry
.conn_flags
= atomic_read(&dest
->conn_flags
);
2143 entry
.weight
= atomic_read(&dest
->weight
);
2144 entry
.u_threshold
= dest
->u_threshold
;
2145 entry
.l_threshold
= dest
->l_threshold
;
2146 entry
.activeconns
= atomic_read(&dest
->activeconns
);
2147 entry
.inactconns
= atomic_read(&dest
->inactconns
);
2148 entry
.persistconns
= atomic_read(&dest
->persistconns
);
2149 ip_vs_copy_stats(&entry
.stats
, &dest
->stats
);
2150 if (copy_to_user(&uptr
->entrytable
[count
],
2151 &entry
, sizeof(entry
))) {
2157 ip_vs_service_put(svc
);
2164 __ip_vs_get_timeouts(struct ip_vs_timeout_user
*u
)
2166 #ifdef CONFIG_IP_VS_PROTO_TCP
2168 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_ESTABLISHED
] / HZ
;
2169 u
->tcp_fin_timeout
=
2170 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_FIN_WAIT
] / HZ
;
2172 #ifdef CONFIG_IP_VS_PROTO_UDP
2174 ip_vs_protocol_udp
.timeout_table
[IP_VS_UDP_S_NORMAL
] / HZ
;
2179 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2180 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2181 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2182 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2183 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2184 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2185 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2187 static const unsigned char get_arglen
[GET_CMDID(IP_VS_SO_GET_MAX
)+1] = {
2188 [GET_CMDID(IP_VS_SO_GET_VERSION
)] = 64,
2189 [GET_CMDID(IP_VS_SO_GET_INFO
)] = GET_INFO_ARG_LEN
,
2190 [GET_CMDID(IP_VS_SO_GET_SERVICES
)] = GET_SERVICES_ARG_LEN
,
2191 [GET_CMDID(IP_VS_SO_GET_SERVICE
)] = GET_SERVICE_ARG_LEN
,
2192 [GET_CMDID(IP_VS_SO_GET_DESTS
)] = GET_DESTS_ARG_LEN
,
2193 [GET_CMDID(IP_VS_SO_GET_TIMEOUT
)] = GET_TIMEOUT_ARG_LEN
,
2194 [GET_CMDID(IP_VS_SO_GET_DAEMON
)] = GET_DAEMON_ARG_LEN
,
2198 do_ip_vs_get_ctl(struct sock
*sk
, int cmd
, void __user
*user
, int *len
)
2200 unsigned char arg
[128];
2203 if (!capable(CAP_NET_ADMIN
))
2206 if (*len
< get_arglen
[GET_CMDID(cmd
)]) {
2207 IP_VS_ERR("get_ctl: len %u < %u\n",
2208 *len
, get_arglen
[GET_CMDID(cmd
)]);
2212 if (copy_from_user(arg
, user
, get_arglen
[GET_CMDID(cmd
)]) != 0)
2215 if (mutex_lock_interruptible(&__ip_vs_mutex
))
2216 return -ERESTARTSYS
;
2219 case IP_VS_SO_GET_VERSION
:
2223 sprintf(buf
, "IP Virtual Server version %d.%d.%d (size=%d)",
2224 NVERSION(IP_VS_VERSION_CODE
), IP_VS_CONN_TAB_SIZE
);
2225 if (copy_to_user(user
, buf
, strlen(buf
)+1) != 0) {
2229 *len
= strlen(buf
)+1;
2233 case IP_VS_SO_GET_INFO
:
2235 struct ip_vs_getinfo info
;
2236 info
.version
= IP_VS_VERSION_CODE
;
2237 info
.size
= IP_VS_CONN_TAB_SIZE
;
2238 info
.num_services
= ip_vs_num_services
;
2239 if (copy_to_user(user
, &info
, sizeof(info
)) != 0)
2244 case IP_VS_SO_GET_SERVICES
:
2246 struct ip_vs_get_services
*get
;
2249 get
= (struct ip_vs_get_services
*)arg
;
2250 size
= sizeof(*get
) +
2251 sizeof(struct ip_vs_service_entry
) * get
->num_services
;
2253 IP_VS_ERR("length: %u != %u\n", *len
, size
);
2257 ret
= __ip_vs_get_service_entries(get
, user
);
2261 case IP_VS_SO_GET_SERVICE
:
2263 struct ip_vs_service_entry
*entry
;
2264 struct ip_vs_service
*svc
;
2266 entry
= (struct ip_vs_service_entry
*)arg
;
2268 svc
= __ip_vs_svc_fwm_get(entry
->fwmark
);
2270 svc
= __ip_vs_service_get(entry
->protocol
,
2271 entry
->addr
, entry
->port
);
2273 ip_vs_copy_service(entry
, svc
);
2274 if (copy_to_user(user
, entry
, sizeof(*entry
)) != 0)
2276 ip_vs_service_put(svc
);
2282 case IP_VS_SO_GET_DESTS
:
2284 struct ip_vs_get_dests
*get
;
2287 get
= (struct ip_vs_get_dests
*)arg
;
2288 size
= sizeof(*get
) +
2289 sizeof(struct ip_vs_dest_entry
) * get
->num_dests
;
2291 IP_VS_ERR("length: %u != %u\n", *len
, size
);
2295 ret
= __ip_vs_get_dest_entries(get
, user
);
2299 case IP_VS_SO_GET_TIMEOUT
:
2301 struct ip_vs_timeout_user t
;
2303 __ip_vs_get_timeouts(&t
);
2304 if (copy_to_user(user
, &t
, sizeof(t
)) != 0)
2309 case IP_VS_SO_GET_DAEMON
:
2311 struct ip_vs_daemon_user d
[2];
2313 memset(&d
, 0, sizeof(d
));
2314 if (ip_vs_sync_state
& IP_VS_STATE_MASTER
) {
2315 d
[0].state
= IP_VS_STATE_MASTER
;
2316 strlcpy(d
[0].mcast_ifn
, ip_vs_master_mcast_ifn
, sizeof(d
[0].mcast_ifn
));
2317 d
[0].syncid
= ip_vs_master_syncid
;
2319 if (ip_vs_sync_state
& IP_VS_STATE_BACKUP
) {
2320 d
[1].state
= IP_VS_STATE_BACKUP
;
2321 strlcpy(d
[1].mcast_ifn
, ip_vs_backup_mcast_ifn
, sizeof(d
[1].mcast_ifn
));
2322 d
[1].syncid
= ip_vs_backup_syncid
;
2324 if (copy_to_user(user
, &d
, sizeof(d
)) != 0)
2334 mutex_unlock(&__ip_vs_mutex
);
2339 static struct nf_sockopt_ops ip_vs_sockopts
= {
2341 .set_optmin
= IP_VS_BASE_CTL
,
2342 .set_optmax
= IP_VS_SO_SET_MAX
+1,
2343 .set
= do_ip_vs_set_ctl
,
2344 .get_optmin
= IP_VS_BASE_CTL
,
2345 .get_optmax
= IP_VS_SO_GET_MAX
+1,
2346 .get
= do_ip_vs_get_ctl
,
2350 int ip_vs_control_init(void)
2357 ret
= nf_register_sockopt(&ip_vs_sockopts
);
2359 IP_VS_ERR("cannot register sockopt.\n");
2363 proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops
);
2364 proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops
);
2366 sysctl_header
= register_sysctl_table(vs_root_table
, 0);
2368 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2369 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2370 INIT_LIST_HEAD(&ip_vs_svc_table
[idx
]);
2371 INIT_LIST_HEAD(&ip_vs_svc_fwm_table
[idx
]);
2373 for(idx
= 0; idx
< IP_VS_RTAB_SIZE
; idx
++) {
2374 INIT_LIST_HEAD(&ip_vs_rtable
[idx
]);
2377 memset(&ip_vs_stats
, 0, sizeof(ip_vs_stats
));
2378 spin_lock_init(&ip_vs_stats
.lock
);
2379 ip_vs_new_estimator(&ip_vs_stats
);
2381 /* Hook the defense timer */
2382 schedule_delayed_work(&defense_work
, DEFENSE_TIMER_PERIOD
);
2389 void ip_vs_control_cleanup(void)
2392 ip_vs_trash_cleanup();
2393 cancel_rearming_delayed_work(&defense_work
);
2394 ip_vs_kill_estimator(&ip_vs_stats
);
2395 unregister_sysctl_table(sysctl_header
);
2396 proc_net_remove("ip_vs_stats");
2397 proc_net_remove("ip_vs");
2398 nf_unregister_sockopt(&ip_vs_sockopts
);