2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
43 #ifdef CONFIG_IP_VS_IPV6
45 #include <net/ip6_route.h>
47 #include <net/route.h>
49 #include <net/genetlink.h>
51 #include <asm/uaccess.h>
53 #include <net/ip_vs.h>
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex
);
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock
);
61 /* lock for table with the real services */
62 static DEFINE_RWLOCK(__ip_vs_rs_lock
);
64 /* lock for state and timeout tables */
65 static DEFINE_SPINLOCK(ip_vs_securetcp_lock
);
67 /* lock for drop entry handling */
68 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock
);
70 /* lock for drop packet handling */
71 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock
);
73 /* 1/rate drop and drop-entry variables */
74 int ip_vs_drop_rate
= 0;
75 int ip_vs_drop_counter
= 0;
76 static atomic_t ip_vs_dropentry
= ATOMIC_INIT(0);
78 /* number of virtual services */
79 static int ip_vs_num_services
= 0;
81 /* sysctl variables */
82 static int sysctl_ip_vs_drop_entry
= 0;
83 static int sysctl_ip_vs_drop_packet
= 0;
84 static int sysctl_ip_vs_secure_tcp
= 0;
85 static int sysctl_ip_vs_amemthresh
= 1024;
86 static int sysctl_ip_vs_am_droprate
= 10;
87 int sysctl_ip_vs_cache_bypass
= 0;
88 int sysctl_ip_vs_expire_nodest_conn
= 0;
89 int sysctl_ip_vs_expire_quiescent_template
= 0;
90 int sysctl_ip_vs_sync_threshold
[2] = { 3, 50 };
91 int sysctl_ip_vs_nat_icmp_send
= 0;
92 #ifdef CONFIG_IP_VS_NFCT
93 int sysctl_ip_vs_conntrack
;
95 int sysctl_ip_vs_snat_reroute
= 1;
96 int sysctl_ip_vs_sync_ver
= 1; /* Default version of sync proto */
98 #ifdef CONFIG_IP_VS_DEBUG
99 static int sysctl_ip_vs_debug_level
= 0;
101 int ip_vs_get_debug_level(void)
103 return sysctl_ip_vs_debug_level
;
107 #ifdef CONFIG_IP_VS_IPV6
108 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
109 static int __ip_vs_addr_is_local_v6(const struct in6_addr
*addr
)
115 .fl6_src
= { .s6_addr32
= {0, 0, 0, 0} },
118 rt
= (struct rt6_info
*)ip6_route_output(&init_net
, NULL
, &fl
);
119 if (rt
&& rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
& IFF_LOOPBACK
))
126 * update_defense_level is called from keventd and from sysctl,
127 * so it needs to protect itself from softirqs
129 static void update_defense_level(struct netns_ipvs
*ipvs
)
132 static int old_secure_tcp
= 0;
137 /* we only count free and buffered memory (in pages) */
139 availmem
= i
.freeram
+ i
.bufferram
;
140 /* however in linux 2.5 the i.bufferram is total page cache size,
142 /* si_swapinfo(&i); */
143 /* availmem = availmem - (i.totalswap - i.freeswap); */
145 nomem
= (availmem
< sysctl_ip_vs_amemthresh
);
150 spin_lock(&__ip_vs_dropentry_lock
);
151 switch (sysctl_ip_vs_drop_entry
) {
153 atomic_set(&ip_vs_dropentry
, 0);
157 atomic_set(&ip_vs_dropentry
, 1);
158 sysctl_ip_vs_drop_entry
= 2;
160 atomic_set(&ip_vs_dropentry
, 0);
165 atomic_set(&ip_vs_dropentry
, 1);
167 atomic_set(&ip_vs_dropentry
, 0);
168 sysctl_ip_vs_drop_entry
= 1;
172 atomic_set(&ip_vs_dropentry
, 1);
175 spin_unlock(&__ip_vs_dropentry_lock
);
178 spin_lock(&__ip_vs_droppacket_lock
);
179 switch (sysctl_ip_vs_drop_packet
) {
185 ip_vs_drop_rate
= ip_vs_drop_counter
186 = sysctl_ip_vs_amemthresh
/
187 (sysctl_ip_vs_amemthresh
-availmem
);
188 sysctl_ip_vs_drop_packet
= 2;
195 ip_vs_drop_rate
= ip_vs_drop_counter
196 = sysctl_ip_vs_amemthresh
/
197 (sysctl_ip_vs_amemthresh
-availmem
);
200 sysctl_ip_vs_drop_packet
= 1;
204 ip_vs_drop_rate
= sysctl_ip_vs_am_droprate
;
207 spin_unlock(&__ip_vs_droppacket_lock
);
210 spin_lock(&ip_vs_securetcp_lock
);
211 switch (sysctl_ip_vs_secure_tcp
) {
213 if (old_secure_tcp
>= 2)
218 if (old_secure_tcp
< 2)
220 sysctl_ip_vs_secure_tcp
= 2;
222 if (old_secure_tcp
>= 2)
228 if (old_secure_tcp
< 2)
231 if (old_secure_tcp
>= 2)
233 sysctl_ip_vs_secure_tcp
= 1;
237 if (old_secure_tcp
< 2)
241 old_secure_tcp
= sysctl_ip_vs_secure_tcp
;
243 ip_vs_protocol_timeout_change(ipvs
,
244 sysctl_ip_vs_secure_tcp
> 1);
245 spin_unlock(&ip_vs_securetcp_lock
);
252 * Timer for checking the defense
254 #define DEFENSE_TIMER_PERIOD 1*HZ
255 static void defense_work_handler(struct work_struct
*work
);
256 static DECLARE_DELAYED_WORK(defense_work
, defense_work_handler
);
258 static void defense_work_handler(struct work_struct
*work
)
260 struct net
*net
= &init_net
;
261 struct netns_ipvs
*ipvs
= net_ipvs(net
);
263 update_defense_level(ipvs
);
264 if (atomic_read(&ip_vs_dropentry
))
265 ip_vs_random_dropentry();
267 schedule_delayed_work(&defense_work
, DEFENSE_TIMER_PERIOD
);
271 ip_vs_use_count_inc(void)
273 return try_module_get(THIS_MODULE
);
277 ip_vs_use_count_dec(void)
279 module_put(THIS_MODULE
);
284 * Hash table: for virtual service lookups
286 #define IP_VS_SVC_TAB_BITS 8
287 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
288 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
290 /* the service table hashed by <protocol, addr, port> */
291 static struct list_head ip_vs_svc_table
[IP_VS_SVC_TAB_SIZE
];
292 /* the service table hashed by fwmark */
293 static struct list_head ip_vs_svc_fwm_table
[IP_VS_SVC_TAB_SIZE
];
296 * Trash for destinations
298 static LIST_HEAD(ip_vs_dest_trash
);
301 * FTP & NULL virtual service counters
303 static atomic_t ip_vs_ftpsvc_counter
= ATOMIC_INIT(0);
304 static atomic_t ip_vs_nullsvc_counter
= ATOMIC_INIT(0);
308 * Returns hash value for virtual service
310 static inline unsigned
311 ip_vs_svc_hashkey(struct net
*net
, int af
, unsigned proto
,
312 const union nf_inet_addr
*addr
, __be16 port
)
314 register unsigned porth
= ntohs(port
);
315 __be32 addr_fold
= addr
->ip
;
317 #ifdef CONFIG_IP_VS_IPV6
319 addr_fold
= addr
->ip6
[0]^addr
->ip6
[1]^
320 addr
->ip6
[2]^addr
->ip6
[3];
322 addr_fold
^= ((size_t)net
>>8);
324 return (proto
^ntohl(addr_fold
)^(porth
>>IP_VS_SVC_TAB_BITS
)^porth
)
325 & IP_VS_SVC_TAB_MASK
;
329 * Returns hash value of fwmark for virtual service lookup
331 static inline unsigned ip_vs_svc_fwm_hashkey(struct net
*net
, __u32 fwmark
)
333 return (((size_t)net
>>8) ^ fwmark
) & IP_VS_SVC_TAB_MASK
;
337 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
338 * or in the ip_vs_svc_fwm_table by fwmark.
339 * Should be called with locked tables.
341 static int ip_vs_svc_hash(struct ip_vs_service
*svc
)
345 if (svc
->flags
& IP_VS_SVC_F_HASHED
) {
346 pr_err("%s(): request for already hashed, called from %pF\n",
347 __func__
, __builtin_return_address(0));
351 if (svc
->fwmark
== 0) {
353 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
355 hash
= ip_vs_svc_hashkey(svc
->net
, svc
->af
, svc
->protocol
,
356 &svc
->addr
, svc
->port
);
357 list_add(&svc
->s_list
, &ip_vs_svc_table
[hash
]);
360 * Hash it by fwmark in svc_fwm_table
362 hash
= ip_vs_svc_fwm_hashkey(svc
->net
, svc
->fwmark
);
363 list_add(&svc
->f_list
, &ip_vs_svc_fwm_table
[hash
]);
366 svc
->flags
|= IP_VS_SVC_F_HASHED
;
367 /* increase its refcnt because it is referenced by the svc table */
368 atomic_inc(&svc
->refcnt
);
374 * Unhashes a service from svc_table / svc_fwm_table.
375 * Should be called with locked tables.
377 static int ip_vs_svc_unhash(struct ip_vs_service
*svc
)
379 if (!(svc
->flags
& IP_VS_SVC_F_HASHED
)) {
380 pr_err("%s(): request for unhash flagged, called from %pF\n",
381 __func__
, __builtin_return_address(0));
385 if (svc
->fwmark
== 0) {
386 /* Remove it from the svc_table table */
387 list_del(&svc
->s_list
);
389 /* Remove it from the svc_fwm_table table */
390 list_del(&svc
->f_list
);
393 svc
->flags
&= ~IP_VS_SVC_F_HASHED
;
394 atomic_dec(&svc
->refcnt
);
400 * Get service by {netns, proto,addr,port} in the service table.
402 static inline struct ip_vs_service
*
403 __ip_vs_service_find(struct net
*net
, int af
, __u16 protocol
,
404 const union nf_inet_addr
*vaddr
, __be16 vport
)
407 struct ip_vs_service
*svc
;
409 /* Check for "full" addressed entries */
410 hash
= ip_vs_svc_hashkey(net
, af
, protocol
, vaddr
, vport
);
412 list_for_each_entry(svc
, &ip_vs_svc_table
[hash
], s_list
){
414 && ip_vs_addr_equal(af
, &svc
->addr
, vaddr
)
415 && (svc
->port
== vport
)
416 && (svc
->protocol
== protocol
)
417 && net_eq(svc
->net
, net
)) {
428 * Get service by {fwmark} in the service table.
430 static inline struct ip_vs_service
*
431 __ip_vs_svc_fwm_find(struct net
*net
, int af
, __u32 fwmark
)
434 struct ip_vs_service
*svc
;
436 /* Check for fwmark addressed entries */
437 hash
= ip_vs_svc_fwm_hashkey(net
, fwmark
);
439 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[hash
], f_list
) {
440 if (svc
->fwmark
== fwmark
&& svc
->af
== af
441 && net_eq(svc
->net
, net
)) {
450 struct ip_vs_service
*
451 ip_vs_service_get(struct net
*net
, int af
, __u32 fwmark
, __u16 protocol
,
452 const union nf_inet_addr
*vaddr
, __be16 vport
)
454 struct ip_vs_service
*svc
;
456 read_lock(&__ip_vs_svc_lock
);
459 * Check the table hashed by fwmark first
461 svc
= __ip_vs_svc_fwm_find(net
, af
, fwmark
);
466 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries
469 svc
= __ip_vs_service_find(net
, af
, protocol
, vaddr
, vport
);
472 && protocol
== IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter
)
474 && (vport
== FTPDATA
|| ntohs(vport
) >= PROT_SOCK
)) {
476 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections.
479 svc
= __ip_vs_service_find(net
, af
, protocol
, vaddr
, FTPPORT
);
483 && atomic_read(&ip_vs_nullsvc_counter
)) {
485 * Check if the catch-all port (port zero) exists
487 svc
= __ip_vs_service_find(net
, af
, protocol
, vaddr
, 0);
492 atomic_inc(&svc
->usecnt
);
493 read_unlock(&__ip_vs_svc_lock
);
495 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
496 fwmark
, ip_vs_proto_name(protocol
),
497 IP_VS_DBG_ADDR(af
, vaddr
), ntohs(vport
),
498 svc
? "hit" : "not hit");
505 __ip_vs_bind_svc(struct ip_vs_dest
*dest
, struct ip_vs_service
*svc
)
507 atomic_inc(&svc
->refcnt
);
512 __ip_vs_unbind_svc(struct ip_vs_dest
*dest
)
514 struct ip_vs_service
*svc
= dest
->svc
;
517 if (atomic_dec_and_test(&svc
->refcnt
)) {
518 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
520 IP_VS_DBG_ADDR(svc
->af
, &svc
->addr
),
521 ntohs(svc
->port
), atomic_read(&svc
->usecnt
));
528 * Returns hash value for real service
530 static inline unsigned ip_vs_rs_hashkey(int af
,
531 const union nf_inet_addr
*addr
,
534 register unsigned porth
= ntohs(port
);
535 __be32 addr_fold
= addr
->ip
;
537 #ifdef CONFIG_IP_VS_IPV6
539 addr_fold
= addr
->ip6
[0]^addr
->ip6
[1]^
540 addr
->ip6
[2]^addr
->ip6
[3];
543 return (ntohl(addr_fold
)^(porth
>>IP_VS_RTAB_BITS
)^porth
)
548 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
549 * should be called with locked tables.
551 static int ip_vs_rs_hash(struct netns_ipvs
*ipvs
, struct ip_vs_dest
*dest
)
555 if (!list_empty(&dest
->d_list
)) {
560 * Hash by proto,addr,port,
561 * which are the parameters of the real service.
563 hash
= ip_vs_rs_hashkey(dest
->af
, &dest
->addr
, dest
->port
);
565 list_add(&dest
->d_list
, &ipvs
->rs_table
[hash
]);
571 * UNhashes ip_vs_dest from rs_table.
572 * should be called with locked tables.
574 static int ip_vs_rs_unhash(struct ip_vs_dest
*dest
)
577 * Remove it from the rs_table table.
579 if (!list_empty(&dest
->d_list
)) {
580 list_del(&dest
->d_list
);
581 INIT_LIST_HEAD(&dest
->d_list
);
588 * Lookup real service by <proto,addr,port> in the real service table.
591 ip_vs_lookup_real_service(struct net
*net
, int af
, __u16 protocol
,
592 const union nf_inet_addr
*daddr
,
595 struct netns_ipvs
*ipvs
= net_ipvs(net
);
597 struct ip_vs_dest
*dest
;
600 * Check for "full" addressed entries
601 * Return the first found entry
603 hash
= ip_vs_rs_hashkey(af
, daddr
, dport
);
605 read_lock(&__ip_vs_rs_lock
);
606 list_for_each_entry(dest
, &ipvs
->rs_table
[hash
], d_list
) {
608 && ip_vs_addr_equal(af
, &dest
->addr
, daddr
)
609 && (dest
->port
== dport
)
610 && ((dest
->protocol
== protocol
) ||
613 read_unlock(&__ip_vs_rs_lock
);
617 read_unlock(&__ip_vs_rs_lock
);
623 * Lookup destination by {addr,port} in the given service
625 static struct ip_vs_dest
*
626 ip_vs_lookup_dest(struct ip_vs_service
*svc
, const union nf_inet_addr
*daddr
,
629 struct ip_vs_dest
*dest
;
632 * Find the destination for the given service
634 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
635 if ((dest
->af
== svc
->af
)
636 && ip_vs_addr_equal(svc
->af
, &dest
->addr
, daddr
)
637 && (dest
->port
== dport
)) {
647 * Find destination by {daddr,dport,vaddr,protocol}
648 * Cretaed to be used in ip_vs_process_message() in
649 * the backup synchronization daemon. It finds the
650 * destination to be bound to the received connection
653 * ip_vs_lookup_real_service() looked promissing, but
654 * seems not working as expected.
656 struct ip_vs_dest
*ip_vs_find_dest(struct net
*net
, int af
,
657 const union nf_inet_addr
*daddr
,
659 const union nf_inet_addr
*vaddr
,
660 __be16 vport
, __u16 protocol
, __u32 fwmark
)
662 struct ip_vs_dest
*dest
;
663 struct ip_vs_service
*svc
;
665 svc
= ip_vs_service_get(net
, af
, fwmark
, protocol
, vaddr
, vport
);
668 dest
= ip_vs_lookup_dest(svc
, daddr
, dport
);
670 atomic_inc(&dest
->refcnt
);
671 ip_vs_service_put(svc
);
676 * Lookup dest by {svc,addr,port} in the destination trash.
677 * The destination trash is used to hold the destinations that are removed
678 * from the service table but are still referenced by some conn entries.
679 * The reason to add the destination trash is when the dest is temporary
680 * down (either by administrator or by monitor program), the dest can be
681 * picked back from the trash, the remaining connections to the dest can
682 * continue, and the counting information of the dest is also useful for
685 static struct ip_vs_dest
*
686 ip_vs_trash_get_dest(struct ip_vs_service
*svc
, const union nf_inet_addr
*daddr
,
689 struct ip_vs_dest
*dest
, *nxt
;
692 * Find the destination in trash
694 list_for_each_entry_safe(dest
, nxt
, &ip_vs_dest_trash
, n_list
) {
695 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
698 IP_VS_DBG_ADDR(svc
->af
, &dest
->addr
),
700 atomic_read(&dest
->refcnt
));
701 if (dest
->af
== svc
->af
&&
702 ip_vs_addr_equal(svc
->af
, &dest
->addr
, daddr
) &&
703 dest
->port
== dport
&&
704 dest
->vfwmark
== svc
->fwmark
&&
705 dest
->protocol
== svc
->protocol
&&
707 (ip_vs_addr_equal(svc
->af
, &dest
->vaddr
, &svc
->addr
) &&
708 dest
->vport
== svc
->port
))) {
714 * Try to purge the destination from trash if not referenced
716 if (atomic_read(&dest
->refcnt
) == 1) {
717 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
720 IP_VS_DBG_ADDR(svc
->af
, &dest
->addr
),
722 list_del(&dest
->n_list
);
723 ip_vs_dst_reset(dest
);
724 __ip_vs_unbind_svc(dest
);
734 * Clean up all the destinations in the trash
735 * Called by the ip_vs_control_cleanup()
737 * When the ip_vs_control_clearup is activated by ipvs module exit,
738 * the service tables must have been flushed and all the connections
739 * are expired, and the refcnt of each destination in the trash must
740 * be 1, so we simply release them here.
742 static void ip_vs_trash_cleanup(void)
744 struct ip_vs_dest
*dest
, *nxt
;
746 list_for_each_entry_safe(dest
, nxt
, &ip_vs_dest_trash
, n_list
) {
747 list_del(&dest
->n_list
);
748 ip_vs_dst_reset(dest
);
749 __ip_vs_unbind_svc(dest
);
756 ip_vs_zero_stats(struct ip_vs_stats
*stats
)
758 spin_lock_bh(&stats
->lock
);
760 memset(&stats
->ustats
, 0, sizeof(stats
->ustats
));
761 ip_vs_zero_estimator(stats
);
763 spin_unlock_bh(&stats
->lock
);
767 * Update a destination in the given service
770 __ip_vs_update_dest(struct ip_vs_service
*svc
, struct ip_vs_dest
*dest
,
771 struct ip_vs_dest_user_kern
*udest
, int add
)
773 struct netns_ipvs
*ipvs
= net_ipvs(svc
->net
);
776 /* set the weight and the flags */
777 atomic_set(&dest
->weight
, udest
->weight
);
778 conn_flags
= udest
->conn_flags
& IP_VS_CONN_F_DEST_MASK
;
779 conn_flags
|= IP_VS_CONN_F_INACTIVE
;
781 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
782 if ((conn_flags
& IP_VS_CONN_F_FWD_MASK
) != IP_VS_CONN_F_MASQ
) {
783 conn_flags
|= IP_VS_CONN_F_NOOUTPUT
;
786 * Put the real service in rs_table if not present.
787 * For now only for NAT!
789 write_lock_bh(&__ip_vs_rs_lock
);
790 ip_vs_rs_hash(ipvs
, dest
);
791 write_unlock_bh(&__ip_vs_rs_lock
);
793 atomic_set(&dest
->conn_flags
, conn_flags
);
795 /* bind the service */
797 __ip_vs_bind_svc(dest
, svc
);
799 if (dest
->svc
!= svc
) {
800 __ip_vs_unbind_svc(dest
);
801 ip_vs_zero_stats(&dest
->stats
);
802 __ip_vs_bind_svc(dest
, svc
);
806 /* set the dest status flags */
807 dest
->flags
|= IP_VS_DEST_F_AVAILABLE
;
809 if (udest
->u_threshold
== 0 || udest
->u_threshold
> dest
->u_threshold
)
810 dest
->flags
&= ~IP_VS_DEST_F_OVERLOAD
;
811 dest
->u_threshold
= udest
->u_threshold
;
812 dest
->l_threshold
= udest
->l_threshold
;
814 spin_lock(&dest
->dst_lock
);
815 ip_vs_dst_reset(dest
);
816 spin_unlock(&dest
->dst_lock
);
819 ip_vs_new_estimator(svc
->net
, &dest
->stats
);
821 write_lock_bh(&__ip_vs_svc_lock
);
823 /* Wait until all other svc users go away */
824 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
827 list_add(&dest
->n_list
, &svc
->destinations
);
831 /* call the update_service, because server weight may be changed */
832 if (svc
->scheduler
->update_service
)
833 svc
->scheduler
->update_service(svc
);
835 write_unlock_bh(&__ip_vs_svc_lock
);
840 * Create a destination for the given service
843 ip_vs_new_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
,
844 struct ip_vs_dest
**dest_p
)
846 struct ip_vs_dest
*dest
;
851 #ifdef CONFIG_IP_VS_IPV6
852 if (svc
->af
== AF_INET6
) {
853 atype
= ipv6_addr_type(&udest
->addr
.in6
);
854 if ((!(atype
& IPV6_ADDR_UNICAST
) ||
855 atype
& IPV6_ADDR_LINKLOCAL
) &&
856 !__ip_vs_addr_is_local_v6(&udest
->addr
.in6
))
861 atype
= inet_addr_type(&init_net
, udest
->addr
.ip
);
862 if (atype
!= RTN_LOCAL
&& atype
!= RTN_UNICAST
)
866 dest
= kzalloc(sizeof(struct ip_vs_dest
), GFP_KERNEL
);
868 pr_err("%s(): no memory.\n", __func__
);
873 dest
->protocol
= svc
->protocol
;
874 dest
->vaddr
= svc
->addr
;
875 dest
->vport
= svc
->port
;
876 dest
->vfwmark
= svc
->fwmark
;
877 ip_vs_addr_copy(svc
->af
, &dest
->addr
, &udest
->addr
);
878 dest
->port
= udest
->port
;
880 atomic_set(&dest
->activeconns
, 0);
881 atomic_set(&dest
->inactconns
, 0);
882 atomic_set(&dest
->persistconns
, 0);
883 atomic_set(&dest
->refcnt
, 1);
885 INIT_LIST_HEAD(&dest
->d_list
);
886 spin_lock_init(&dest
->dst_lock
);
887 spin_lock_init(&dest
->stats
.lock
);
888 __ip_vs_update_dest(svc
, dest
, udest
, 1);
898 * Add a destination into an existing service
901 ip_vs_add_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
903 struct ip_vs_dest
*dest
;
904 union nf_inet_addr daddr
;
905 __be16 dport
= udest
->port
;
910 if (udest
->weight
< 0) {
911 pr_err("%s(): server weight less than zero\n", __func__
);
915 if (udest
->l_threshold
> udest
->u_threshold
) {
916 pr_err("%s(): lower threshold is higher than upper threshold\n",
921 ip_vs_addr_copy(svc
->af
, &daddr
, &udest
->addr
);
924 * Check if the dest already exists in the list
926 dest
= ip_vs_lookup_dest(svc
, &daddr
, dport
);
929 IP_VS_DBG(1, "%s(): dest already exists\n", __func__
);
934 * Check if the dest already exists in the trash and
935 * is from the same service
937 dest
= ip_vs_trash_get_dest(svc
, &daddr
, dport
);
940 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
941 "dest->refcnt=%d, service %u/%s:%u\n",
942 IP_VS_DBG_ADDR(svc
->af
, &daddr
), ntohs(dport
),
943 atomic_read(&dest
->refcnt
),
945 IP_VS_DBG_ADDR(svc
->af
, &dest
->vaddr
),
949 * Get the destination from the trash
951 list_del(&dest
->n_list
);
953 __ip_vs_update_dest(svc
, dest
, udest
, 1);
957 * Allocate and initialize the dest structure
959 ret
= ip_vs_new_dest(svc
, udest
, &dest
);
968 * Edit a destination in the given service
971 ip_vs_edit_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
973 struct ip_vs_dest
*dest
;
974 union nf_inet_addr daddr
;
975 __be16 dport
= udest
->port
;
979 if (udest
->weight
< 0) {
980 pr_err("%s(): server weight less than zero\n", __func__
);
984 if (udest
->l_threshold
> udest
->u_threshold
) {
985 pr_err("%s(): lower threshold is higher than upper threshold\n",
990 ip_vs_addr_copy(svc
->af
, &daddr
, &udest
->addr
);
993 * Lookup the destination list
995 dest
= ip_vs_lookup_dest(svc
, &daddr
, dport
);
998 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__
);
1002 __ip_vs_update_dest(svc
, dest
, udest
, 0);
1010 * Delete a destination (must be already unlinked from the service)
1012 static void __ip_vs_del_dest(struct net
*net
, struct ip_vs_dest
*dest
)
1014 ip_vs_kill_estimator(net
, &dest
->stats
);
1017 * Remove it from the d-linked list with the real services.
1019 write_lock_bh(&__ip_vs_rs_lock
);
1020 ip_vs_rs_unhash(dest
);
1021 write_unlock_bh(&__ip_vs_rs_lock
);
1024 * Decrease the refcnt of the dest, and free the dest
1025 * if nobody refers to it (refcnt=0). Otherwise, throw
1026 * the destination into the trash.
1028 if (atomic_dec_and_test(&dest
->refcnt
)) {
1029 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1031 IP_VS_DBG_ADDR(dest
->af
, &dest
->addr
),
1033 ip_vs_dst_reset(dest
);
1034 /* simply decrease svc->refcnt here, let the caller check
1035 and release the service if nobody refers to it.
1036 Only user context can release destination and service,
1037 and only one user context can update virtual service at a
1038 time, so the operation here is OK */
1039 atomic_dec(&dest
->svc
->refcnt
);
1042 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1043 "dest->refcnt=%d\n",
1044 IP_VS_DBG_ADDR(dest
->af
, &dest
->addr
),
1046 atomic_read(&dest
->refcnt
));
1047 list_add(&dest
->n_list
, &ip_vs_dest_trash
);
1048 atomic_inc(&dest
->refcnt
);
1054 * Unlink a destination from the given service
1056 static void __ip_vs_unlink_dest(struct ip_vs_service
*svc
,
1057 struct ip_vs_dest
*dest
,
1060 dest
->flags
&= ~IP_VS_DEST_F_AVAILABLE
;
1063 * Remove it from the d-linked destination list.
1065 list_del(&dest
->n_list
);
1069 * Call the update_service function of its scheduler
1071 if (svcupd
&& svc
->scheduler
->update_service
)
1072 svc
->scheduler
->update_service(svc
);
1077 * Delete a destination server in the given service
1080 ip_vs_del_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
1082 struct ip_vs_dest
*dest
;
1083 struct net
*net
= svc
->net
;
1084 __be16 dport
= udest
->port
;
1088 dest
= ip_vs_lookup_dest(svc
, &udest
->addr
, dport
);
1091 IP_VS_DBG(1, "%s(): destination not found!\n", __func__
);
1095 write_lock_bh(&__ip_vs_svc_lock
);
1098 * Wait until all other svc users go away.
1100 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1103 * Unlink dest from the service
1105 __ip_vs_unlink_dest(svc
, dest
, 1);
1107 write_unlock_bh(&__ip_vs_svc_lock
);
1110 * Delete the destination
1112 __ip_vs_del_dest(net
, dest
);
1121 * Add a service into the service hash table
1124 ip_vs_add_service(struct net
*net
, struct ip_vs_service_user_kern
*u
,
1125 struct ip_vs_service
**svc_p
)
1128 struct ip_vs_scheduler
*sched
= NULL
;
1129 struct ip_vs_pe
*pe
= NULL
;
1130 struct ip_vs_service
*svc
= NULL
;
1132 /* increase the module use count */
1133 ip_vs_use_count_inc();
1135 /* Lookup the scheduler by 'u->sched_name' */
1136 sched
= ip_vs_scheduler_get(u
->sched_name
);
1137 if (sched
== NULL
) {
1138 pr_info("Scheduler module ip_vs_%s not found\n", u
->sched_name
);
1143 if (u
->pe_name
&& *u
->pe_name
) {
1144 pe
= ip_vs_pe_getbyname(u
->pe_name
);
1146 pr_info("persistence engine module ip_vs_pe_%s "
1147 "not found\n", u
->pe_name
);
1153 #ifdef CONFIG_IP_VS_IPV6
1154 if (u
->af
== AF_INET6
&& (u
->netmask
< 1 || u
->netmask
> 128)) {
1160 svc
= kzalloc(sizeof(struct ip_vs_service
), GFP_KERNEL
);
1162 IP_VS_DBG(1, "%s(): no memory\n", __func__
);
1167 /* I'm the first user of the service */
1168 atomic_set(&svc
->usecnt
, 0);
1169 atomic_set(&svc
->refcnt
, 0);
1172 svc
->protocol
= u
->protocol
;
1173 ip_vs_addr_copy(svc
->af
, &svc
->addr
, &u
->addr
);
1174 svc
->port
= u
->port
;
1175 svc
->fwmark
= u
->fwmark
;
1176 svc
->flags
= u
->flags
;
1177 svc
->timeout
= u
->timeout
* HZ
;
1178 svc
->netmask
= u
->netmask
;
1181 INIT_LIST_HEAD(&svc
->destinations
);
1182 rwlock_init(&svc
->sched_lock
);
1183 spin_lock_init(&svc
->stats
.lock
);
1185 /* Bind the scheduler */
1186 ret
= ip_vs_bind_scheduler(svc
, sched
);
1191 /* Bind the ct retriever */
1192 ip_vs_bind_pe(svc
, pe
);
1195 /* Update the virtual service counters */
1196 if (svc
->port
== FTPPORT
)
1197 atomic_inc(&ip_vs_ftpsvc_counter
);
1198 else if (svc
->port
== 0)
1199 atomic_inc(&ip_vs_nullsvc_counter
);
1201 ip_vs_new_estimator(net
, &svc
->stats
);
1203 /* Count only IPv4 services for old get/setsockopt interface */
1204 if (svc
->af
== AF_INET
)
1205 ip_vs_num_services
++;
1207 /* Hash the service into the service table */
1208 write_lock_bh(&__ip_vs_svc_lock
);
1209 ip_vs_svc_hash(svc
);
1210 write_unlock_bh(&__ip_vs_svc_lock
);
1217 ip_vs_unbind_scheduler(svc
);
1220 ip_vs_app_inc_put(svc
->inc
);
1225 ip_vs_scheduler_put(sched
);
1228 /* decrease the module use count */
1229 ip_vs_use_count_dec();
1236 * Edit a service and bind it with a new scheduler
1239 ip_vs_edit_service(struct ip_vs_service
*svc
, struct ip_vs_service_user_kern
*u
)
1241 struct ip_vs_scheduler
*sched
, *old_sched
;
1242 struct ip_vs_pe
*pe
= NULL
, *old_pe
= NULL
;
1246 * Lookup the scheduler, by 'u->sched_name'
1248 sched
= ip_vs_scheduler_get(u
->sched_name
);
1249 if (sched
== NULL
) {
1250 pr_info("Scheduler module ip_vs_%s not found\n", u
->sched_name
);
1255 if (u
->pe_name
&& *u
->pe_name
) {
1256 pe
= ip_vs_pe_getbyname(u
->pe_name
);
1258 pr_info("persistence engine module ip_vs_pe_%s "
1259 "not found\n", u
->pe_name
);
1266 #ifdef CONFIG_IP_VS_IPV6
1267 if (u
->af
== AF_INET6
&& (u
->netmask
< 1 || u
->netmask
> 128)) {
1273 write_lock_bh(&__ip_vs_svc_lock
);
1276 * Wait until all other svc users go away.
1278 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1281 * Set the flags and timeout value
1283 svc
->flags
= u
->flags
| IP_VS_SVC_F_HASHED
;
1284 svc
->timeout
= u
->timeout
* HZ
;
1285 svc
->netmask
= u
->netmask
;
1287 old_sched
= svc
->scheduler
;
1288 if (sched
!= old_sched
) {
1290 * Unbind the old scheduler
1292 if ((ret
= ip_vs_unbind_scheduler(svc
))) {
1298 * Bind the new scheduler
1300 if ((ret
= ip_vs_bind_scheduler(svc
, sched
))) {
1302 * If ip_vs_bind_scheduler fails, restore the old
1304 * The main reason of failure is out of memory.
1306 * The question is if the old scheduler can be
1307 * restored all the time. TODO: if it cannot be
1308 * restored some time, we must delete the service,
1309 * otherwise the system may crash.
1311 ip_vs_bind_scheduler(svc
, old_sched
);
1319 ip_vs_unbind_pe(svc
);
1320 ip_vs_bind_pe(svc
, pe
);
1324 write_unlock_bh(&__ip_vs_svc_lock
);
1326 ip_vs_scheduler_put(old_sched
);
1327 ip_vs_pe_put(old_pe
);
1333 * Delete a service from the service list
1334 * - The service must be unlinked, unlocked and not referenced!
1335 * - We are called under _bh lock
1337 static void __ip_vs_del_service(struct ip_vs_service
*svc
)
1339 struct ip_vs_dest
*dest
, *nxt
;
1340 struct ip_vs_scheduler
*old_sched
;
1341 struct ip_vs_pe
*old_pe
;
1343 pr_info("%s: enter\n", __func__
);
1345 /* Count only IPv4 services for old get/setsockopt interface */
1346 if (svc
->af
== AF_INET
)
1347 ip_vs_num_services
--;
1349 ip_vs_kill_estimator(svc
->net
, &svc
->stats
);
1351 /* Unbind scheduler */
1352 old_sched
= svc
->scheduler
;
1353 ip_vs_unbind_scheduler(svc
);
1354 ip_vs_scheduler_put(old_sched
);
1356 /* Unbind persistence engine */
1358 ip_vs_unbind_pe(svc
);
1359 ip_vs_pe_put(old_pe
);
1361 /* Unbind app inc */
1363 ip_vs_app_inc_put(svc
->inc
);
1368 * Unlink the whole destination list
1370 list_for_each_entry_safe(dest
, nxt
, &svc
->destinations
, n_list
) {
1371 __ip_vs_unlink_dest(svc
, dest
, 0);
1372 __ip_vs_del_dest(svc
->net
, dest
);
1376 * Update the virtual service counters
1378 if (svc
->port
== FTPPORT
)
1379 atomic_dec(&ip_vs_ftpsvc_counter
);
1380 else if (svc
->port
== 0)
1381 atomic_dec(&ip_vs_nullsvc_counter
);
1384 * Free the service if nobody refers to it
1386 if (atomic_read(&svc
->refcnt
) == 0) {
1387 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1389 IP_VS_DBG_ADDR(svc
->af
, &svc
->addr
),
1390 ntohs(svc
->port
), atomic_read(&svc
->usecnt
));
1394 /* decrease the module use count */
1395 ip_vs_use_count_dec();
1399 * Unlink a service from list and try to delete it if its refcnt reached 0
1401 static void ip_vs_unlink_service(struct ip_vs_service
*svc
)
1404 * Unhash it from the service table
1406 write_lock_bh(&__ip_vs_svc_lock
);
1408 ip_vs_svc_unhash(svc
);
1411 * Wait until all the svc users go away.
1413 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1415 __ip_vs_del_service(svc
);
1417 write_unlock_bh(&__ip_vs_svc_lock
);
1421 * Delete a service from the service list
1423 static int ip_vs_del_service(struct ip_vs_service
*svc
)
1427 ip_vs_unlink_service(svc
);
1434 * Flush all the virtual services
1436 static int ip_vs_flush(struct net
*net
)
1439 struct ip_vs_service
*svc
, *nxt
;
1442 * Flush the service table hashed by <netns,protocol,addr,port>
1444 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1445 list_for_each_entry_safe(svc
, nxt
, &ip_vs_svc_table
[idx
],
1447 if (net_eq(svc
->net
, net
))
1448 ip_vs_unlink_service(svc
);
1453 * Flush the service table hashed by fwmark
1455 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1456 list_for_each_entry_safe(svc
, nxt
,
1457 &ip_vs_svc_fwm_table
[idx
], f_list
) {
1458 if (net_eq(svc
->net
, net
))
1459 ip_vs_unlink_service(svc
);
1468 * Zero counters in a service or all services
1470 static int ip_vs_zero_service(struct ip_vs_service
*svc
)
1472 struct ip_vs_dest
*dest
;
1474 write_lock_bh(&__ip_vs_svc_lock
);
1475 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1476 ip_vs_zero_stats(&dest
->stats
);
1478 ip_vs_zero_stats(&svc
->stats
);
1479 write_unlock_bh(&__ip_vs_svc_lock
);
1483 static int ip_vs_zero_all(struct net
*net
)
1486 struct ip_vs_service
*svc
;
1488 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1489 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1490 if (net_eq(svc
->net
, net
))
1491 ip_vs_zero_service(svc
);
1495 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1496 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1497 if (net_eq(svc
->net
, net
))
1498 ip_vs_zero_service(svc
);
1502 ip_vs_zero_stats(&ip_vs_stats
);
1508 proc_do_defense_mode(ctl_table
*table
, int write
,
1509 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1511 struct net
*net
= current
->nsproxy
->net_ns
;
1512 int *valp
= table
->data
;
1516 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1517 if (write
&& (*valp
!= val
)) {
1518 if ((*valp
< 0) || (*valp
> 3)) {
1519 /* Restore the correct value */
1522 update_defense_level(net_ipvs(net
));
1530 proc_do_sync_threshold(ctl_table
*table
, int write
,
1531 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1533 int *valp
= table
->data
;
1537 /* backup the value first */
1538 memcpy(val
, valp
, sizeof(val
));
1540 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1541 if (write
&& (valp
[0] < 0 || valp
[1] < 0 || valp
[0] >= valp
[1])) {
1542 /* Restore the correct value */
1543 memcpy(valp
, val
, sizeof(val
));
1549 proc_do_sync_mode(ctl_table
*table
, int write
,
1550 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1552 int *valp
= table
->data
;
1556 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1557 if (write
&& (*valp
!= val
)) {
1558 if ((*valp
< 0) || (*valp
> 1)) {
1559 /* Restore the correct value */
1562 struct net
*net
= current
->nsproxy
->net_ns
;
1563 ip_vs_sync_switch_mode(net
, val
);
1570 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1573 static struct ctl_table vs_vars
[] = {
1575 .procname
= "amemthresh",
1576 .data
= &sysctl_ip_vs_amemthresh
,
1577 .maxlen
= sizeof(int),
1579 .proc_handler
= proc_dointvec
,
1581 #ifdef CONFIG_IP_VS_DEBUG
1583 .procname
= "debug_level",
1584 .data
= &sysctl_ip_vs_debug_level
,
1585 .maxlen
= sizeof(int),
1587 .proc_handler
= proc_dointvec
,
1591 .procname
= "am_droprate",
1592 .data
= &sysctl_ip_vs_am_droprate
,
1593 .maxlen
= sizeof(int),
1595 .proc_handler
= proc_dointvec
,
1598 .procname
= "drop_entry",
1599 .data
= &sysctl_ip_vs_drop_entry
,
1600 .maxlen
= sizeof(int),
1602 .proc_handler
= proc_do_defense_mode
,
1605 .procname
= "drop_packet",
1606 .data
= &sysctl_ip_vs_drop_packet
,
1607 .maxlen
= sizeof(int),
1609 .proc_handler
= proc_do_defense_mode
,
1611 #ifdef CONFIG_IP_VS_NFCT
1613 .procname
= "conntrack",
1614 .data
= &sysctl_ip_vs_conntrack
,
1615 .maxlen
= sizeof(int),
1617 .proc_handler
= &proc_dointvec
,
1621 .procname
= "secure_tcp",
1622 .data
= &sysctl_ip_vs_secure_tcp
,
1623 .maxlen
= sizeof(int),
1625 .proc_handler
= proc_do_defense_mode
,
1628 .procname
= "snat_reroute",
1629 .data
= &sysctl_ip_vs_snat_reroute
,
1630 .maxlen
= sizeof(int),
1632 .proc_handler
= &proc_dointvec
,
1635 .procname
= "sync_version",
1636 .data
= &sysctl_ip_vs_sync_ver
,
1637 .maxlen
= sizeof(int),
1639 .proc_handler
= &proc_do_sync_mode
,
1643 .procname
= "timeout_established",
1644 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ESTABLISHED
],
1645 .maxlen
= sizeof(int),
1647 .proc_handler
= proc_dointvec_jiffies
,
1650 .procname
= "timeout_synsent",
1651 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_SENT
],
1652 .maxlen
= sizeof(int),
1654 .proc_handler
= proc_dointvec_jiffies
,
1657 .procname
= "timeout_synrecv",
1658 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_RECV
],
1659 .maxlen
= sizeof(int),
1661 .proc_handler
= proc_dointvec_jiffies
,
1664 .procname
= "timeout_finwait",
1665 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_FIN_WAIT
],
1666 .maxlen
= sizeof(int),
1668 .proc_handler
= proc_dointvec_jiffies
,
1671 .procname
= "timeout_timewait",
1672 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_TIME_WAIT
],
1673 .maxlen
= sizeof(int),
1675 .proc_handler
= proc_dointvec_jiffies
,
1678 .procname
= "timeout_close",
1679 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE
],
1680 .maxlen
= sizeof(int),
1682 .proc_handler
= proc_dointvec_jiffies
,
1685 .procname
= "timeout_closewait",
1686 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE_WAIT
],
1687 .maxlen
= sizeof(int),
1689 .proc_handler
= proc_dointvec_jiffies
,
1692 .procname
= "timeout_lastack",
1693 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LAST_ACK
],
1694 .maxlen
= sizeof(int),
1696 .proc_handler
= proc_dointvec_jiffies
,
1699 .procname
= "timeout_listen",
1700 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LISTEN
],
1701 .maxlen
= sizeof(int),
1703 .proc_handler
= proc_dointvec_jiffies
,
1706 .procname
= "timeout_synack",
1707 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYNACK
],
1708 .maxlen
= sizeof(int),
1710 .proc_handler
= proc_dointvec_jiffies
,
1713 .procname
= "timeout_udp",
1714 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_UDP
],
1715 .maxlen
= sizeof(int),
1717 .proc_handler
= proc_dointvec_jiffies
,
1720 .procname
= "timeout_icmp",
1721 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ICMP
],
1722 .maxlen
= sizeof(int),
1724 .proc_handler
= proc_dointvec_jiffies
,
1728 .procname
= "cache_bypass",
1729 .data
= &sysctl_ip_vs_cache_bypass
,
1730 .maxlen
= sizeof(int),
1732 .proc_handler
= proc_dointvec
,
1735 .procname
= "expire_nodest_conn",
1736 .data
= &sysctl_ip_vs_expire_nodest_conn
,
1737 .maxlen
= sizeof(int),
1739 .proc_handler
= proc_dointvec
,
1742 .procname
= "expire_quiescent_template",
1743 .data
= &sysctl_ip_vs_expire_quiescent_template
,
1744 .maxlen
= sizeof(int),
1746 .proc_handler
= proc_dointvec
,
1749 .procname
= "sync_threshold",
1750 .data
= &sysctl_ip_vs_sync_threshold
,
1751 .maxlen
= sizeof(sysctl_ip_vs_sync_threshold
),
1753 .proc_handler
= proc_do_sync_threshold
,
1756 .procname
= "nat_icmp_send",
1757 .data
= &sysctl_ip_vs_nat_icmp_send
,
1758 .maxlen
= sizeof(int),
1760 .proc_handler
= proc_dointvec
,
1765 const struct ctl_path net_vs_ctl_path
[] = {
1766 { .procname
= "net", },
1767 { .procname
= "ipv4", },
1768 { .procname
= "vs", },
1771 EXPORT_SYMBOL_GPL(net_vs_ctl_path
);
1773 static struct ctl_table_header
* sysctl_header
;
1775 #ifdef CONFIG_PROC_FS
1778 struct seq_net_private p
; /* Do not move this, netns depends upon it*/
1779 struct list_head
*table
;
1784 * Write the contents of the VS rule table to a PROCfs file.
1785 * (It is kept just for backward compatibility)
1787 static inline const char *ip_vs_fwd_name(unsigned flags
)
1789 switch (flags
& IP_VS_CONN_F_FWD_MASK
) {
1790 case IP_VS_CONN_F_LOCALNODE
:
1792 case IP_VS_CONN_F_TUNNEL
:
1794 case IP_VS_CONN_F_DROUTE
:
1802 /* Get the Nth entry in the two lists */
1803 static struct ip_vs_service
*ip_vs_info_array(struct seq_file
*seq
, loff_t pos
)
1805 struct net
*net
= seq_file_net(seq
);
1806 struct ip_vs_iter
*iter
= seq
->private;
1808 struct ip_vs_service
*svc
;
1810 /* look in hash by protocol */
1811 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1812 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1813 if (net_eq(svc
->net
, net
) && pos
-- == 0) {
1814 iter
->table
= ip_vs_svc_table
;
1821 /* keep looking in fwmark */
1822 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1823 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1824 if (net_eq(svc
->net
, net
) && pos
-- == 0) {
1825 iter
->table
= ip_vs_svc_fwm_table
;
1835 static void *ip_vs_info_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1836 __acquires(__ip_vs_svc_lock
)
1839 read_lock_bh(&__ip_vs_svc_lock
);
1840 return *pos
? ip_vs_info_array(seq
, *pos
- 1) : SEQ_START_TOKEN
;
1844 static void *ip_vs_info_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1846 struct list_head
*e
;
1847 struct ip_vs_iter
*iter
;
1848 struct ip_vs_service
*svc
;
1851 if (v
== SEQ_START_TOKEN
)
1852 return ip_vs_info_array(seq
,0);
1855 iter
= seq
->private;
1857 if (iter
->table
== ip_vs_svc_table
) {
1858 /* next service in table hashed by protocol */
1859 if ((e
= svc
->s_list
.next
) != &ip_vs_svc_table
[iter
->bucket
])
1860 return list_entry(e
, struct ip_vs_service
, s_list
);
1863 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
1864 list_for_each_entry(svc
,&ip_vs_svc_table
[iter
->bucket
],
1870 iter
->table
= ip_vs_svc_fwm_table
;
1875 /* next service in hashed by fwmark */
1876 if ((e
= svc
->f_list
.next
) != &ip_vs_svc_fwm_table
[iter
->bucket
])
1877 return list_entry(e
, struct ip_vs_service
, f_list
);
1880 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
1881 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[iter
->bucket
],
1889 static void ip_vs_info_seq_stop(struct seq_file
*seq
, void *v
)
1890 __releases(__ip_vs_svc_lock
)
1892 read_unlock_bh(&__ip_vs_svc_lock
);
1896 static int ip_vs_info_seq_show(struct seq_file
*seq
, void *v
)
1898 if (v
== SEQ_START_TOKEN
) {
1900 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1901 NVERSION(IP_VS_VERSION_CODE
), ip_vs_conn_tab_size
);
1903 "Prot LocalAddress:Port Scheduler Flags\n");
1905 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1907 const struct ip_vs_service
*svc
= v
;
1908 const struct ip_vs_iter
*iter
= seq
->private;
1909 const struct ip_vs_dest
*dest
;
1911 if (iter
->table
== ip_vs_svc_table
) {
1912 #ifdef CONFIG_IP_VS_IPV6
1913 if (svc
->af
== AF_INET6
)
1914 seq_printf(seq
, "%s [%pI6]:%04X %s ",
1915 ip_vs_proto_name(svc
->protocol
),
1918 svc
->scheduler
->name
);
1921 seq_printf(seq
, "%s %08X:%04X %s %s ",
1922 ip_vs_proto_name(svc
->protocol
),
1923 ntohl(svc
->addr
.ip
),
1925 svc
->scheduler
->name
,
1926 (svc
->flags
& IP_VS_SVC_F_ONEPACKET
)?"ops ":"");
1928 seq_printf(seq
, "FWM %08X %s %s",
1929 svc
->fwmark
, svc
->scheduler
->name
,
1930 (svc
->flags
& IP_VS_SVC_F_ONEPACKET
)?"ops ":"");
1933 if (svc
->flags
& IP_VS_SVC_F_PERSISTENT
)
1934 seq_printf(seq
, "persistent %d %08X\n",
1936 ntohl(svc
->netmask
));
1938 seq_putc(seq
, '\n');
1940 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1941 #ifdef CONFIG_IP_VS_IPV6
1942 if (dest
->af
== AF_INET6
)
1945 " %-7s %-6d %-10d %-10d\n",
1948 ip_vs_fwd_name(atomic_read(&dest
->conn_flags
)),
1949 atomic_read(&dest
->weight
),
1950 atomic_read(&dest
->activeconns
),
1951 atomic_read(&dest
->inactconns
));
1956 "%-7s %-6d %-10d %-10d\n",
1957 ntohl(dest
->addr
.ip
),
1959 ip_vs_fwd_name(atomic_read(&dest
->conn_flags
)),
1960 atomic_read(&dest
->weight
),
1961 atomic_read(&dest
->activeconns
),
1962 atomic_read(&dest
->inactconns
));
1969 static const struct seq_operations ip_vs_info_seq_ops
= {
1970 .start
= ip_vs_info_seq_start
,
1971 .next
= ip_vs_info_seq_next
,
1972 .stop
= ip_vs_info_seq_stop
,
1973 .show
= ip_vs_info_seq_show
,
1976 static int ip_vs_info_open(struct inode
*inode
, struct file
*file
)
1978 return seq_open_net(inode
, file
, &ip_vs_info_seq_ops
,
1979 sizeof(struct ip_vs_iter
));
1982 static const struct file_operations ip_vs_info_fops
= {
1983 .owner
= THIS_MODULE
,
1984 .open
= ip_vs_info_open
,
1986 .llseek
= seq_lseek
,
1987 .release
= seq_release_private
,
1992 struct ip_vs_stats ip_vs_stats
= {
1993 .lock
= __SPIN_LOCK_UNLOCKED(ip_vs_stats
.lock
),
1996 #ifdef CONFIG_PROC_FS
1997 static int ip_vs_stats_show(struct seq_file
*seq
, void *v
)
2000 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2002 " Total Incoming Outgoing Incoming Outgoing\n");
2004 " Conns Packets Packets Bytes Bytes\n");
2006 spin_lock_bh(&ip_vs_stats
.lock
);
2007 seq_printf(seq
, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats
.ustats
.conns
,
2008 ip_vs_stats
.ustats
.inpkts
, ip_vs_stats
.ustats
.outpkts
,
2009 (unsigned long long) ip_vs_stats
.ustats
.inbytes
,
2010 (unsigned long long) ip_vs_stats
.ustats
.outbytes
);
2012 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2014 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2015 seq_printf(seq
,"%8X %8X %8X %16X %16X\n",
2016 ip_vs_stats
.ustats
.cps
,
2017 ip_vs_stats
.ustats
.inpps
,
2018 ip_vs_stats
.ustats
.outpps
,
2019 ip_vs_stats
.ustats
.inbps
,
2020 ip_vs_stats
.ustats
.outbps
);
2021 spin_unlock_bh(&ip_vs_stats
.lock
);
2026 static int ip_vs_stats_seq_open(struct inode
*inode
, struct file
*file
)
2028 return single_open_net(inode
, file
, ip_vs_stats_show
);
2031 static const struct file_operations ip_vs_stats_fops
= {
2032 .owner
= THIS_MODULE
,
2033 .open
= ip_vs_stats_seq_open
,
2035 .llseek
= seq_lseek
,
2036 .release
= single_release
,
2042 * Set timeout values for tcp tcpfin udp in the timeout_table.
2044 static int ip_vs_set_timeout(struct net
*net
, struct ip_vs_timeout_user
*u
)
2046 struct ip_vs_proto_data
*pd
;
2048 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2053 #ifdef CONFIG_IP_VS_PROTO_TCP
2054 if (u
->tcp_timeout
) {
2055 pd
= ip_vs_proto_data_get(net
, IPPROTO_TCP
);
2056 pd
->timeout_table
[IP_VS_TCP_S_ESTABLISHED
]
2057 = u
->tcp_timeout
* HZ
;
2060 if (u
->tcp_fin_timeout
) {
2061 pd
= ip_vs_proto_data_get(net
, IPPROTO_TCP
);
2062 pd
->timeout_table
[IP_VS_TCP_S_FIN_WAIT
]
2063 = u
->tcp_fin_timeout
* HZ
;
2067 #ifdef CONFIG_IP_VS_PROTO_UDP
2068 if (u
->udp_timeout
) {
2069 pd
= ip_vs_proto_data_get(net
, IPPROTO_UDP
);
2070 pd
->timeout_table
[IP_VS_UDP_S_NORMAL
]
2071 = u
->udp_timeout
* HZ
;
2078 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2079 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2080 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2081 sizeof(struct ip_vs_dest_user))
2082 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2083 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2084 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2086 static const unsigned char set_arglen
[SET_CMDID(IP_VS_SO_SET_MAX
)+1] = {
2087 [SET_CMDID(IP_VS_SO_SET_ADD
)] = SERVICE_ARG_LEN
,
2088 [SET_CMDID(IP_VS_SO_SET_EDIT
)] = SERVICE_ARG_LEN
,
2089 [SET_CMDID(IP_VS_SO_SET_DEL
)] = SERVICE_ARG_LEN
,
2090 [SET_CMDID(IP_VS_SO_SET_FLUSH
)] = 0,
2091 [SET_CMDID(IP_VS_SO_SET_ADDDEST
)] = SVCDEST_ARG_LEN
,
2092 [SET_CMDID(IP_VS_SO_SET_DELDEST
)] = SVCDEST_ARG_LEN
,
2093 [SET_CMDID(IP_VS_SO_SET_EDITDEST
)] = SVCDEST_ARG_LEN
,
2094 [SET_CMDID(IP_VS_SO_SET_TIMEOUT
)] = TIMEOUT_ARG_LEN
,
2095 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON
)] = DAEMON_ARG_LEN
,
2096 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON
)] = DAEMON_ARG_LEN
,
2097 [SET_CMDID(IP_VS_SO_SET_ZERO
)] = SERVICE_ARG_LEN
,
2100 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern
*usvc
,
2101 struct ip_vs_service_user
*usvc_compat
)
2103 memset(usvc
, 0, sizeof(*usvc
));
2106 usvc
->protocol
= usvc_compat
->protocol
;
2107 usvc
->addr
.ip
= usvc_compat
->addr
;
2108 usvc
->port
= usvc_compat
->port
;
2109 usvc
->fwmark
= usvc_compat
->fwmark
;
2111 /* Deep copy of sched_name is not needed here */
2112 usvc
->sched_name
= usvc_compat
->sched_name
;
2114 usvc
->flags
= usvc_compat
->flags
;
2115 usvc
->timeout
= usvc_compat
->timeout
;
2116 usvc
->netmask
= usvc_compat
->netmask
;
2119 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern
*udest
,
2120 struct ip_vs_dest_user
*udest_compat
)
2122 memset(udest
, 0, sizeof(*udest
));
2124 udest
->addr
.ip
= udest_compat
->addr
;
2125 udest
->port
= udest_compat
->port
;
2126 udest
->conn_flags
= udest_compat
->conn_flags
;
2127 udest
->weight
= udest_compat
->weight
;
2128 udest
->u_threshold
= udest_compat
->u_threshold
;
2129 udest
->l_threshold
= udest_compat
->l_threshold
;
2133 do_ip_vs_set_ctl(struct sock
*sk
, int cmd
, void __user
*user
, unsigned int len
)
2135 struct net
*net
= sock_net(sk
);
2137 unsigned char arg
[MAX_ARG_LEN
];
2138 struct ip_vs_service_user
*usvc_compat
;
2139 struct ip_vs_service_user_kern usvc
;
2140 struct ip_vs_service
*svc
;
2141 struct ip_vs_dest_user
*udest_compat
;
2142 struct ip_vs_dest_user_kern udest
;
2144 if (!capable(CAP_NET_ADMIN
))
2147 if (cmd
< IP_VS_BASE_CTL
|| cmd
> IP_VS_SO_SET_MAX
)
2149 if (len
< 0 || len
> MAX_ARG_LEN
)
2151 if (len
!= set_arglen
[SET_CMDID(cmd
)]) {
2152 pr_err("set_ctl: len %u != %u\n",
2153 len
, set_arglen
[SET_CMDID(cmd
)]);
2157 if (copy_from_user(arg
, user
, len
) != 0)
2160 /* increase the module use count */
2161 ip_vs_use_count_inc();
2163 if (mutex_lock_interruptible(&__ip_vs_mutex
)) {
2168 if (cmd
== IP_VS_SO_SET_FLUSH
) {
2169 /* Flush the virtual service */
2170 ret
= ip_vs_flush(net
);
2172 } else if (cmd
== IP_VS_SO_SET_TIMEOUT
) {
2173 /* Set timeout values for (tcp tcpfin udp) */
2174 ret
= ip_vs_set_timeout(net
, (struct ip_vs_timeout_user
*)arg
);
2176 } else if (cmd
== IP_VS_SO_SET_STARTDAEMON
) {
2177 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
2178 ret
= start_sync_thread(net
, dm
->state
, dm
->mcast_ifn
,
2181 } else if (cmd
== IP_VS_SO_SET_STOPDAEMON
) {
2182 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
2183 ret
= stop_sync_thread(net
, dm
->state
);
2187 usvc_compat
= (struct ip_vs_service_user
*)arg
;
2188 udest_compat
= (struct ip_vs_dest_user
*)(usvc_compat
+ 1);
2190 /* We only use the new structs internally, so copy userspace compat
2191 * structs to extended internal versions */
2192 ip_vs_copy_usvc_compat(&usvc
, usvc_compat
);
2193 ip_vs_copy_udest_compat(&udest
, udest_compat
);
2195 if (cmd
== IP_VS_SO_SET_ZERO
) {
2196 /* if no service address is set, zero counters in all */
2197 if (!usvc
.fwmark
&& !usvc
.addr
.ip
&& !usvc
.port
) {
2198 ret
= ip_vs_zero_all(net
);
2203 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2204 if (usvc
.protocol
!= IPPROTO_TCP
&& usvc
.protocol
!= IPPROTO_UDP
&&
2205 usvc
.protocol
!= IPPROTO_SCTP
) {
2206 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2207 usvc
.protocol
, &usvc
.addr
.ip
,
2208 ntohs(usvc
.port
), usvc
.sched_name
);
2213 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2214 if (usvc
.fwmark
== 0)
2215 svc
= __ip_vs_service_find(net
, usvc
.af
, usvc
.protocol
,
2216 &usvc
.addr
, usvc
.port
);
2218 svc
= __ip_vs_svc_fwm_find(net
, usvc
.af
, usvc
.fwmark
);
2220 if (cmd
!= IP_VS_SO_SET_ADD
2221 && (svc
== NULL
|| svc
->protocol
!= usvc
.protocol
)) {
2227 case IP_VS_SO_SET_ADD
:
2231 ret
= ip_vs_add_service(net
, &usvc
, &svc
);
2233 case IP_VS_SO_SET_EDIT
:
2234 ret
= ip_vs_edit_service(svc
, &usvc
);
2236 case IP_VS_SO_SET_DEL
:
2237 ret
= ip_vs_del_service(svc
);
2241 case IP_VS_SO_SET_ZERO
:
2242 ret
= ip_vs_zero_service(svc
);
2244 case IP_VS_SO_SET_ADDDEST
:
2245 ret
= ip_vs_add_dest(svc
, &udest
);
2247 case IP_VS_SO_SET_EDITDEST
:
2248 ret
= ip_vs_edit_dest(svc
, &udest
);
2250 case IP_VS_SO_SET_DELDEST
:
2251 ret
= ip_vs_del_dest(svc
, &udest
);
2258 mutex_unlock(&__ip_vs_mutex
);
2260 /* decrease the module use count */
2261 ip_vs_use_count_dec();
2268 ip_vs_copy_stats(struct ip_vs_stats_user
*dst
, struct ip_vs_stats
*src
)
2270 spin_lock_bh(&src
->lock
);
2271 memcpy(dst
, &src
->ustats
, sizeof(*dst
));
2272 spin_unlock_bh(&src
->lock
);
2276 ip_vs_copy_service(struct ip_vs_service_entry
*dst
, struct ip_vs_service
*src
)
2278 dst
->protocol
= src
->protocol
;
2279 dst
->addr
= src
->addr
.ip
;
2280 dst
->port
= src
->port
;
2281 dst
->fwmark
= src
->fwmark
;
2282 strlcpy(dst
->sched_name
, src
->scheduler
->name
, sizeof(dst
->sched_name
));
2283 dst
->flags
= src
->flags
;
2284 dst
->timeout
= src
->timeout
/ HZ
;
2285 dst
->netmask
= src
->netmask
;
2286 dst
->num_dests
= src
->num_dests
;
2287 ip_vs_copy_stats(&dst
->stats
, &src
->stats
);
2291 __ip_vs_get_service_entries(struct net
*net
,
2292 const struct ip_vs_get_services
*get
,
2293 struct ip_vs_get_services __user
*uptr
)
2296 struct ip_vs_service
*svc
;
2297 struct ip_vs_service_entry entry
;
2300 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2301 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
2302 /* Only expose IPv4 entries to old interface */
2303 if (svc
->af
!= AF_INET
|| !net_eq(svc
->net
, net
))
2306 if (count
>= get
->num_services
)
2308 memset(&entry
, 0, sizeof(entry
));
2309 ip_vs_copy_service(&entry
, svc
);
2310 if (copy_to_user(&uptr
->entrytable
[count
],
2311 &entry
, sizeof(entry
))) {
2319 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2320 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
2321 /* Only expose IPv4 entries to old interface */
2322 if (svc
->af
!= AF_INET
|| !net_eq(svc
->net
, net
))
2325 if (count
>= get
->num_services
)
2327 memset(&entry
, 0, sizeof(entry
));
2328 ip_vs_copy_service(&entry
, svc
);
2329 if (copy_to_user(&uptr
->entrytable
[count
],
2330 &entry
, sizeof(entry
))) {
2342 __ip_vs_get_dest_entries(struct net
*net
, const struct ip_vs_get_dests
*get
,
2343 struct ip_vs_get_dests __user
*uptr
)
2345 struct ip_vs_service
*svc
;
2346 union nf_inet_addr addr
= { .ip
= get
->addr
};
2350 svc
= __ip_vs_svc_fwm_find(net
, AF_INET
, get
->fwmark
);
2352 svc
= __ip_vs_service_find(net
, AF_INET
, get
->protocol
, &addr
,
2357 struct ip_vs_dest
*dest
;
2358 struct ip_vs_dest_entry entry
;
2360 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
2361 if (count
>= get
->num_dests
)
2364 entry
.addr
= dest
->addr
.ip
;
2365 entry
.port
= dest
->port
;
2366 entry
.conn_flags
= atomic_read(&dest
->conn_flags
);
2367 entry
.weight
= atomic_read(&dest
->weight
);
2368 entry
.u_threshold
= dest
->u_threshold
;
2369 entry
.l_threshold
= dest
->l_threshold
;
2370 entry
.activeconns
= atomic_read(&dest
->activeconns
);
2371 entry
.inactconns
= atomic_read(&dest
->inactconns
);
2372 entry
.persistconns
= atomic_read(&dest
->persistconns
);
2373 ip_vs_copy_stats(&entry
.stats
, &dest
->stats
);
2374 if (copy_to_user(&uptr
->entrytable
[count
],
2375 &entry
, sizeof(entry
))) {
2387 __ip_vs_get_timeouts(struct net
*net
, struct ip_vs_timeout_user
*u
)
2389 struct ip_vs_proto_data
*pd
;
2391 #ifdef CONFIG_IP_VS_PROTO_TCP
2392 pd
= ip_vs_proto_data_get(net
, IPPROTO_TCP
);
2393 u
->tcp_timeout
= pd
->timeout_table
[IP_VS_TCP_S_ESTABLISHED
] / HZ
;
2394 u
->tcp_fin_timeout
= pd
->timeout_table
[IP_VS_TCP_S_FIN_WAIT
] / HZ
;
2396 #ifdef CONFIG_IP_VS_PROTO_UDP
2397 pd
= ip_vs_proto_data_get(net
, IPPROTO_UDP
);
2399 pd
->timeout_table
[IP_VS_UDP_S_NORMAL
] / HZ
;
2404 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2405 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2406 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2407 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2408 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2409 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2410 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2412 static const unsigned char get_arglen
[GET_CMDID(IP_VS_SO_GET_MAX
)+1] = {
2413 [GET_CMDID(IP_VS_SO_GET_VERSION
)] = 64,
2414 [GET_CMDID(IP_VS_SO_GET_INFO
)] = GET_INFO_ARG_LEN
,
2415 [GET_CMDID(IP_VS_SO_GET_SERVICES
)] = GET_SERVICES_ARG_LEN
,
2416 [GET_CMDID(IP_VS_SO_GET_SERVICE
)] = GET_SERVICE_ARG_LEN
,
2417 [GET_CMDID(IP_VS_SO_GET_DESTS
)] = GET_DESTS_ARG_LEN
,
2418 [GET_CMDID(IP_VS_SO_GET_TIMEOUT
)] = GET_TIMEOUT_ARG_LEN
,
2419 [GET_CMDID(IP_VS_SO_GET_DAEMON
)] = GET_DAEMON_ARG_LEN
,
2423 do_ip_vs_get_ctl(struct sock
*sk
, int cmd
, void __user
*user
, int *len
)
2425 unsigned char arg
[128];
2427 unsigned int copylen
;
2428 struct net
*net
= sock_net(sk
);
2429 struct netns_ipvs
*ipvs
= net_ipvs(net
);
2432 if (!capable(CAP_NET_ADMIN
))
2435 if (cmd
< IP_VS_BASE_CTL
|| cmd
> IP_VS_SO_GET_MAX
)
2438 if (*len
< get_arglen
[GET_CMDID(cmd
)]) {
2439 pr_err("get_ctl: len %u < %u\n",
2440 *len
, get_arglen
[GET_CMDID(cmd
)]);
2444 copylen
= get_arglen
[GET_CMDID(cmd
)];
2448 if (copy_from_user(arg
, user
, copylen
) != 0)
2451 if (mutex_lock_interruptible(&__ip_vs_mutex
))
2452 return -ERESTARTSYS
;
2455 case IP_VS_SO_GET_VERSION
:
2459 sprintf(buf
, "IP Virtual Server version %d.%d.%d (size=%d)",
2460 NVERSION(IP_VS_VERSION_CODE
), ip_vs_conn_tab_size
);
2461 if (copy_to_user(user
, buf
, strlen(buf
)+1) != 0) {
2465 *len
= strlen(buf
)+1;
2469 case IP_VS_SO_GET_INFO
:
2471 struct ip_vs_getinfo info
;
2472 info
.version
= IP_VS_VERSION_CODE
;
2473 info
.size
= ip_vs_conn_tab_size
;
2474 info
.num_services
= ip_vs_num_services
;
2475 if (copy_to_user(user
, &info
, sizeof(info
)) != 0)
2480 case IP_VS_SO_GET_SERVICES
:
2482 struct ip_vs_get_services
*get
;
2485 get
= (struct ip_vs_get_services
*)arg
;
2486 size
= sizeof(*get
) +
2487 sizeof(struct ip_vs_service_entry
) * get
->num_services
;
2489 pr_err("length: %u != %u\n", *len
, size
);
2493 ret
= __ip_vs_get_service_entries(net
, get
, user
);
2497 case IP_VS_SO_GET_SERVICE
:
2499 struct ip_vs_service_entry
*entry
;
2500 struct ip_vs_service
*svc
;
2501 union nf_inet_addr addr
;
2503 entry
= (struct ip_vs_service_entry
*)arg
;
2504 addr
.ip
= entry
->addr
;
2506 svc
= __ip_vs_svc_fwm_find(net
, AF_INET
, entry
->fwmark
);
2508 svc
= __ip_vs_service_find(net
, AF_INET
,
2509 entry
->protocol
, &addr
,
2512 ip_vs_copy_service(entry
, svc
);
2513 if (copy_to_user(user
, entry
, sizeof(*entry
)) != 0)
2520 case IP_VS_SO_GET_DESTS
:
2522 struct ip_vs_get_dests
*get
;
2525 get
= (struct ip_vs_get_dests
*)arg
;
2526 size
= sizeof(*get
) +
2527 sizeof(struct ip_vs_dest_entry
) * get
->num_dests
;
2529 pr_err("length: %u != %u\n", *len
, size
);
2533 ret
= __ip_vs_get_dest_entries(net
, get
, user
);
2537 case IP_VS_SO_GET_TIMEOUT
:
2539 struct ip_vs_timeout_user t
;
2541 __ip_vs_get_timeouts(net
, &t
);
2542 if (copy_to_user(user
, &t
, sizeof(t
)) != 0)
2547 case IP_VS_SO_GET_DAEMON
:
2549 struct ip_vs_daemon_user d
[2];
2551 memset(&d
, 0, sizeof(d
));
2552 if (ipvs
->sync_state
& IP_VS_STATE_MASTER
) {
2553 d
[0].state
= IP_VS_STATE_MASTER
;
2554 strlcpy(d
[0].mcast_ifn
, ipvs
->master_mcast_ifn
,
2555 sizeof(d
[0].mcast_ifn
));
2556 d
[0].syncid
= ipvs
->master_syncid
;
2558 if (ipvs
->sync_state
& IP_VS_STATE_BACKUP
) {
2559 d
[1].state
= IP_VS_STATE_BACKUP
;
2560 strlcpy(d
[1].mcast_ifn
, ipvs
->backup_mcast_ifn
,
2561 sizeof(d
[1].mcast_ifn
));
2562 d
[1].syncid
= ipvs
->backup_syncid
;
2564 if (copy_to_user(user
, &d
, sizeof(d
)) != 0)
2574 mutex_unlock(&__ip_vs_mutex
);
2579 static struct nf_sockopt_ops ip_vs_sockopts
= {
2581 .set_optmin
= IP_VS_BASE_CTL
,
2582 .set_optmax
= IP_VS_SO_SET_MAX
+1,
2583 .set
= do_ip_vs_set_ctl
,
2584 .get_optmin
= IP_VS_BASE_CTL
,
2585 .get_optmax
= IP_VS_SO_GET_MAX
+1,
2586 .get
= do_ip_vs_get_ctl
,
2587 .owner
= THIS_MODULE
,
2591 * Generic Netlink interface
2594 /* IPVS genetlink family */
2595 static struct genl_family ip_vs_genl_family
= {
2596 .id
= GENL_ID_GENERATE
,
2598 .name
= IPVS_GENL_NAME
,
2599 .version
= IPVS_GENL_VERSION
,
2600 .maxattr
= IPVS_CMD_MAX
,
2603 /* Policy used for first-level command attributes */
2604 static const struct nla_policy ip_vs_cmd_policy
[IPVS_CMD_ATTR_MAX
+ 1] = {
2605 [IPVS_CMD_ATTR_SERVICE
] = { .type
= NLA_NESTED
},
2606 [IPVS_CMD_ATTR_DEST
] = { .type
= NLA_NESTED
},
2607 [IPVS_CMD_ATTR_DAEMON
] = { .type
= NLA_NESTED
},
2608 [IPVS_CMD_ATTR_TIMEOUT_TCP
] = { .type
= NLA_U32
},
2609 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
] = { .type
= NLA_U32
},
2610 [IPVS_CMD_ATTR_TIMEOUT_UDP
] = { .type
= NLA_U32
},
2613 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2614 static const struct nla_policy ip_vs_daemon_policy
[IPVS_DAEMON_ATTR_MAX
+ 1] = {
2615 [IPVS_DAEMON_ATTR_STATE
] = { .type
= NLA_U32
},
2616 [IPVS_DAEMON_ATTR_MCAST_IFN
] = { .type
= NLA_NUL_STRING
,
2617 .len
= IP_VS_IFNAME_MAXLEN
},
2618 [IPVS_DAEMON_ATTR_SYNC_ID
] = { .type
= NLA_U32
},
2621 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2622 static const struct nla_policy ip_vs_svc_policy
[IPVS_SVC_ATTR_MAX
+ 1] = {
2623 [IPVS_SVC_ATTR_AF
] = { .type
= NLA_U16
},
2624 [IPVS_SVC_ATTR_PROTOCOL
] = { .type
= NLA_U16
},
2625 [IPVS_SVC_ATTR_ADDR
] = { .type
= NLA_BINARY
,
2626 .len
= sizeof(union nf_inet_addr
) },
2627 [IPVS_SVC_ATTR_PORT
] = { .type
= NLA_U16
},
2628 [IPVS_SVC_ATTR_FWMARK
] = { .type
= NLA_U32
},
2629 [IPVS_SVC_ATTR_SCHED_NAME
] = { .type
= NLA_NUL_STRING
,
2630 .len
= IP_VS_SCHEDNAME_MAXLEN
},
2631 [IPVS_SVC_ATTR_PE_NAME
] = { .type
= NLA_NUL_STRING
,
2632 .len
= IP_VS_PENAME_MAXLEN
},
2633 [IPVS_SVC_ATTR_FLAGS
] = { .type
= NLA_BINARY
,
2634 .len
= sizeof(struct ip_vs_flags
) },
2635 [IPVS_SVC_ATTR_TIMEOUT
] = { .type
= NLA_U32
},
2636 [IPVS_SVC_ATTR_NETMASK
] = { .type
= NLA_U32
},
2637 [IPVS_SVC_ATTR_STATS
] = { .type
= NLA_NESTED
},
2640 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2641 static const struct nla_policy ip_vs_dest_policy
[IPVS_DEST_ATTR_MAX
+ 1] = {
2642 [IPVS_DEST_ATTR_ADDR
] = { .type
= NLA_BINARY
,
2643 .len
= sizeof(union nf_inet_addr
) },
2644 [IPVS_DEST_ATTR_PORT
] = { .type
= NLA_U16
},
2645 [IPVS_DEST_ATTR_FWD_METHOD
] = { .type
= NLA_U32
},
2646 [IPVS_DEST_ATTR_WEIGHT
] = { .type
= NLA_U32
},
2647 [IPVS_DEST_ATTR_U_THRESH
] = { .type
= NLA_U32
},
2648 [IPVS_DEST_ATTR_L_THRESH
] = { .type
= NLA_U32
},
2649 [IPVS_DEST_ATTR_ACTIVE_CONNS
] = { .type
= NLA_U32
},
2650 [IPVS_DEST_ATTR_INACT_CONNS
] = { .type
= NLA_U32
},
2651 [IPVS_DEST_ATTR_PERSIST_CONNS
] = { .type
= NLA_U32
},
2652 [IPVS_DEST_ATTR_STATS
] = { .type
= NLA_NESTED
},
2655 static int ip_vs_genl_fill_stats(struct sk_buff
*skb
, int container_type
,
2656 struct ip_vs_stats
*stats
)
2658 struct nlattr
*nl_stats
= nla_nest_start(skb
, container_type
);
2662 spin_lock_bh(&stats
->lock
);
2664 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_CONNS
, stats
->ustats
.conns
);
2665 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_INPKTS
, stats
->ustats
.inpkts
);
2666 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_OUTPKTS
, stats
->ustats
.outpkts
);
2667 NLA_PUT_U64(skb
, IPVS_STATS_ATTR_INBYTES
, stats
->ustats
.inbytes
);
2668 NLA_PUT_U64(skb
, IPVS_STATS_ATTR_OUTBYTES
, stats
->ustats
.outbytes
);
2669 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_CPS
, stats
->ustats
.cps
);
2670 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_INPPS
, stats
->ustats
.inpps
);
2671 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_OUTPPS
, stats
->ustats
.outpps
);
2672 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_INBPS
, stats
->ustats
.inbps
);
2673 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_OUTBPS
, stats
->ustats
.outbps
);
2675 spin_unlock_bh(&stats
->lock
);
2677 nla_nest_end(skb
, nl_stats
);
2682 spin_unlock_bh(&stats
->lock
);
2683 nla_nest_cancel(skb
, nl_stats
);
2687 static int ip_vs_genl_fill_service(struct sk_buff
*skb
,
2688 struct ip_vs_service
*svc
)
2690 struct nlattr
*nl_service
;
2691 struct ip_vs_flags flags
= { .flags
= svc
->flags
,
2694 nl_service
= nla_nest_start(skb
, IPVS_CMD_ATTR_SERVICE
);
2698 NLA_PUT_U16(skb
, IPVS_SVC_ATTR_AF
, svc
->af
);
2701 NLA_PUT_U32(skb
, IPVS_SVC_ATTR_FWMARK
, svc
->fwmark
);
2703 NLA_PUT_U16(skb
, IPVS_SVC_ATTR_PROTOCOL
, svc
->protocol
);
2704 NLA_PUT(skb
, IPVS_SVC_ATTR_ADDR
, sizeof(svc
->addr
), &svc
->addr
);
2705 NLA_PUT_U16(skb
, IPVS_SVC_ATTR_PORT
, svc
->port
);
2708 NLA_PUT_STRING(skb
, IPVS_SVC_ATTR_SCHED_NAME
, svc
->scheduler
->name
);
2710 NLA_PUT_STRING(skb
, IPVS_SVC_ATTR_PE_NAME
, svc
->pe
->name
);
2711 NLA_PUT(skb
, IPVS_SVC_ATTR_FLAGS
, sizeof(flags
), &flags
);
2712 NLA_PUT_U32(skb
, IPVS_SVC_ATTR_TIMEOUT
, svc
->timeout
/ HZ
);
2713 NLA_PUT_U32(skb
, IPVS_SVC_ATTR_NETMASK
, svc
->netmask
);
2715 if (ip_vs_genl_fill_stats(skb
, IPVS_SVC_ATTR_STATS
, &svc
->stats
))
2716 goto nla_put_failure
;
2718 nla_nest_end(skb
, nl_service
);
2723 nla_nest_cancel(skb
, nl_service
);
2727 static int ip_vs_genl_dump_service(struct sk_buff
*skb
,
2728 struct ip_vs_service
*svc
,
2729 struct netlink_callback
*cb
)
2733 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).pid
, cb
->nlh
->nlmsg_seq
,
2734 &ip_vs_genl_family
, NLM_F_MULTI
,
2735 IPVS_CMD_NEW_SERVICE
);
2739 if (ip_vs_genl_fill_service(skb
, svc
) < 0)
2740 goto nla_put_failure
;
2742 return genlmsg_end(skb
, hdr
);
2745 genlmsg_cancel(skb
, hdr
);
2749 static int ip_vs_genl_dump_services(struct sk_buff
*skb
,
2750 struct netlink_callback
*cb
)
2753 int start
= cb
->args
[0];
2754 struct ip_vs_service
*svc
;
2755 struct net
*net
= skb_sknet(skb
);
2757 mutex_lock(&__ip_vs_mutex
);
2758 for (i
= 0; i
< IP_VS_SVC_TAB_SIZE
; i
++) {
2759 list_for_each_entry(svc
, &ip_vs_svc_table
[i
], s_list
) {
2760 if (++idx
<= start
|| !net_eq(svc
->net
, net
))
2762 if (ip_vs_genl_dump_service(skb
, svc
, cb
) < 0) {
2764 goto nla_put_failure
;
2769 for (i
= 0; i
< IP_VS_SVC_TAB_SIZE
; i
++) {
2770 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[i
], f_list
) {
2771 if (++idx
<= start
|| !net_eq(svc
->net
, net
))
2773 if (ip_vs_genl_dump_service(skb
, svc
, cb
) < 0) {
2775 goto nla_put_failure
;
2781 mutex_unlock(&__ip_vs_mutex
);
2787 static int ip_vs_genl_parse_service(struct net
*net
,
2788 struct ip_vs_service_user_kern
*usvc
,
2789 struct nlattr
*nla
, int full_entry
,
2790 struct ip_vs_service
**ret_svc
)
2792 struct nlattr
*attrs
[IPVS_SVC_ATTR_MAX
+ 1];
2793 struct nlattr
*nla_af
, *nla_port
, *nla_fwmark
, *nla_protocol
, *nla_addr
;
2794 struct ip_vs_service
*svc
;
2796 /* Parse mandatory identifying service fields first */
2798 nla_parse_nested(attrs
, IPVS_SVC_ATTR_MAX
, nla
, ip_vs_svc_policy
))
2801 nla_af
= attrs
[IPVS_SVC_ATTR_AF
];
2802 nla_protocol
= attrs
[IPVS_SVC_ATTR_PROTOCOL
];
2803 nla_addr
= attrs
[IPVS_SVC_ATTR_ADDR
];
2804 nla_port
= attrs
[IPVS_SVC_ATTR_PORT
];
2805 nla_fwmark
= attrs
[IPVS_SVC_ATTR_FWMARK
];
2807 if (!(nla_af
&& (nla_fwmark
|| (nla_port
&& nla_protocol
&& nla_addr
))))
2810 memset(usvc
, 0, sizeof(*usvc
));
2812 usvc
->af
= nla_get_u16(nla_af
);
2813 #ifdef CONFIG_IP_VS_IPV6
2814 if (usvc
->af
!= AF_INET
&& usvc
->af
!= AF_INET6
)
2816 if (usvc
->af
!= AF_INET
)
2818 return -EAFNOSUPPORT
;
2821 usvc
->protocol
= IPPROTO_TCP
;
2822 usvc
->fwmark
= nla_get_u32(nla_fwmark
);
2824 usvc
->protocol
= nla_get_u16(nla_protocol
);
2825 nla_memcpy(&usvc
->addr
, nla_addr
, sizeof(usvc
->addr
));
2826 usvc
->port
= nla_get_u16(nla_port
);
2831 svc
= __ip_vs_svc_fwm_find(net
, usvc
->af
, usvc
->fwmark
);
2833 svc
= __ip_vs_service_find(net
, usvc
->af
, usvc
->protocol
,
2834 &usvc
->addr
, usvc
->port
);
2837 /* If a full entry was requested, check for the additional fields */
2839 struct nlattr
*nla_sched
, *nla_flags
, *nla_pe
, *nla_timeout
,
2841 struct ip_vs_flags flags
;
2843 nla_sched
= attrs
[IPVS_SVC_ATTR_SCHED_NAME
];
2844 nla_pe
= attrs
[IPVS_SVC_ATTR_PE_NAME
];
2845 nla_flags
= attrs
[IPVS_SVC_ATTR_FLAGS
];
2846 nla_timeout
= attrs
[IPVS_SVC_ATTR_TIMEOUT
];
2847 nla_netmask
= attrs
[IPVS_SVC_ATTR_NETMASK
];
2849 if (!(nla_sched
&& nla_flags
&& nla_timeout
&& nla_netmask
))
2852 nla_memcpy(&flags
, nla_flags
, sizeof(flags
));
2854 /* prefill flags from service if it already exists */
2856 usvc
->flags
= svc
->flags
;
2858 /* set new flags from userland */
2859 usvc
->flags
= (usvc
->flags
& ~flags
.mask
) |
2860 (flags
.flags
& flags
.mask
);
2861 usvc
->sched_name
= nla_data(nla_sched
);
2862 usvc
->pe_name
= nla_pe
? nla_data(nla_pe
) : NULL
;
2863 usvc
->timeout
= nla_get_u32(nla_timeout
);
2864 usvc
->netmask
= nla_get_u32(nla_netmask
);
2870 static struct ip_vs_service
*ip_vs_genl_find_service(struct net
*net
,
2873 struct ip_vs_service_user_kern usvc
;
2874 struct ip_vs_service
*svc
;
2877 ret
= ip_vs_genl_parse_service(net
, &usvc
, nla
, 0, &svc
);
2878 return ret
? ERR_PTR(ret
) : svc
;
2881 static int ip_vs_genl_fill_dest(struct sk_buff
*skb
, struct ip_vs_dest
*dest
)
2883 struct nlattr
*nl_dest
;
2885 nl_dest
= nla_nest_start(skb
, IPVS_CMD_ATTR_DEST
);
2889 NLA_PUT(skb
, IPVS_DEST_ATTR_ADDR
, sizeof(dest
->addr
), &dest
->addr
);
2890 NLA_PUT_U16(skb
, IPVS_DEST_ATTR_PORT
, dest
->port
);
2892 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_FWD_METHOD
,
2893 atomic_read(&dest
->conn_flags
) & IP_VS_CONN_F_FWD_MASK
);
2894 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_WEIGHT
, atomic_read(&dest
->weight
));
2895 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_U_THRESH
, dest
->u_threshold
);
2896 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_L_THRESH
, dest
->l_threshold
);
2897 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_ACTIVE_CONNS
,
2898 atomic_read(&dest
->activeconns
));
2899 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_INACT_CONNS
,
2900 atomic_read(&dest
->inactconns
));
2901 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_PERSIST_CONNS
,
2902 atomic_read(&dest
->persistconns
));
2904 if (ip_vs_genl_fill_stats(skb
, IPVS_DEST_ATTR_STATS
, &dest
->stats
))
2905 goto nla_put_failure
;
2907 nla_nest_end(skb
, nl_dest
);
2912 nla_nest_cancel(skb
, nl_dest
);
2916 static int ip_vs_genl_dump_dest(struct sk_buff
*skb
, struct ip_vs_dest
*dest
,
2917 struct netlink_callback
*cb
)
2921 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).pid
, cb
->nlh
->nlmsg_seq
,
2922 &ip_vs_genl_family
, NLM_F_MULTI
,
2927 if (ip_vs_genl_fill_dest(skb
, dest
) < 0)
2928 goto nla_put_failure
;
2930 return genlmsg_end(skb
, hdr
);
2933 genlmsg_cancel(skb
, hdr
);
2937 static int ip_vs_genl_dump_dests(struct sk_buff
*skb
,
2938 struct netlink_callback
*cb
)
2941 int start
= cb
->args
[0];
2942 struct ip_vs_service
*svc
;
2943 struct ip_vs_dest
*dest
;
2944 struct nlattr
*attrs
[IPVS_CMD_ATTR_MAX
+ 1];
2947 mutex_lock(&__ip_vs_mutex
);
2949 /* Try to find the service for which to dump destinations */
2950 if (nlmsg_parse(cb
->nlh
, GENL_HDRLEN
, attrs
,
2951 IPVS_CMD_ATTR_MAX
, ip_vs_cmd_policy
))
2954 net
= skb_sknet(skb
);
2955 svc
= ip_vs_genl_find_service(net
, attrs
[IPVS_CMD_ATTR_SERVICE
]);
2956 if (IS_ERR(svc
) || svc
== NULL
)
2959 /* Dump the destinations */
2960 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
2963 if (ip_vs_genl_dump_dest(skb
, dest
, cb
) < 0) {
2965 goto nla_put_failure
;
2973 mutex_unlock(&__ip_vs_mutex
);
2978 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern
*udest
,
2979 struct nlattr
*nla
, int full_entry
)
2981 struct nlattr
*attrs
[IPVS_DEST_ATTR_MAX
+ 1];
2982 struct nlattr
*nla_addr
, *nla_port
;
2984 /* Parse mandatory identifying destination fields first */
2986 nla_parse_nested(attrs
, IPVS_DEST_ATTR_MAX
, nla
, ip_vs_dest_policy
))
2989 nla_addr
= attrs
[IPVS_DEST_ATTR_ADDR
];
2990 nla_port
= attrs
[IPVS_DEST_ATTR_PORT
];
2992 if (!(nla_addr
&& nla_port
))
2995 memset(udest
, 0, sizeof(*udest
));
2997 nla_memcpy(&udest
->addr
, nla_addr
, sizeof(udest
->addr
));
2998 udest
->port
= nla_get_u16(nla_port
);
3000 /* If a full entry was requested, check for the additional fields */
3002 struct nlattr
*nla_fwd
, *nla_weight
, *nla_u_thresh
,
3005 nla_fwd
= attrs
[IPVS_DEST_ATTR_FWD_METHOD
];
3006 nla_weight
= attrs
[IPVS_DEST_ATTR_WEIGHT
];
3007 nla_u_thresh
= attrs
[IPVS_DEST_ATTR_U_THRESH
];
3008 nla_l_thresh
= attrs
[IPVS_DEST_ATTR_L_THRESH
];
3010 if (!(nla_fwd
&& nla_weight
&& nla_u_thresh
&& nla_l_thresh
))
3013 udest
->conn_flags
= nla_get_u32(nla_fwd
)
3014 & IP_VS_CONN_F_FWD_MASK
;
3015 udest
->weight
= nla_get_u32(nla_weight
);
3016 udest
->u_threshold
= nla_get_u32(nla_u_thresh
);
3017 udest
->l_threshold
= nla_get_u32(nla_l_thresh
);
3023 static int ip_vs_genl_fill_daemon(struct sk_buff
*skb
, __be32 state
,
3024 const char *mcast_ifn
, __be32 syncid
)
3026 struct nlattr
*nl_daemon
;
3028 nl_daemon
= nla_nest_start(skb
, IPVS_CMD_ATTR_DAEMON
);
3032 NLA_PUT_U32(skb
, IPVS_DAEMON_ATTR_STATE
, state
);
3033 NLA_PUT_STRING(skb
, IPVS_DAEMON_ATTR_MCAST_IFN
, mcast_ifn
);
3034 NLA_PUT_U32(skb
, IPVS_DAEMON_ATTR_SYNC_ID
, syncid
);
3036 nla_nest_end(skb
, nl_daemon
);
3041 nla_nest_cancel(skb
, nl_daemon
);
3045 static int ip_vs_genl_dump_daemon(struct sk_buff
*skb
, __be32 state
,
3046 const char *mcast_ifn
, __be32 syncid
,
3047 struct netlink_callback
*cb
)
3050 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).pid
, cb
->nlh
->nlmsg_seq
,
3051 &ip_vs_genl_family
, NLM_F_MULTI
,
3052 IPVS_CMD_NEW_DAEMON
);
3056 if (ip_vs_genl_fill_daemon(skb
, state
, mcast_ifn
, syncid
))
3057 goto nla_put_failure
;
3059 return genlmsg_end(skb
, hdr
);
3062 genlmsg_cancel(skb
, hdr
);
3066 static int ip_vs_genl_dump_daemons(struct sk_buff
*skb
,
3067 struct netlink_callback
*cb
)
3069 struct net
*net
= skb_net(skb
);
3070 struct netns_ipvs
*ipvs
= net_ipvs(net
);
3072 mutex_lock(&__ip_vs_mutex
);
3073 if ((ipvs
->sync_state
& IP_VS_STATE_MASTER
) && !cb
->args
[0]) {
3074 if (ip_vs_genl_dump_daemon(skb
, IP_VS_STATE_MASTER
,
3075 ipvs
->master_mcast_ifn
,
3076 ipvs
->master_syncid
, cb
) < 0)
3077 goto nla_put_failure
;
3082 if ((ipvs
->sync_state
& IP_VS_STATE_BACKUP
) && !cb
->args
[1]) {
3083 if (ip_vs_genl_dump_daemon(skb
, IP_VS_STATE_BACKUP
,
3084 ipvs
->backup_mcast_ifn
,
3085 ipvs
->backup_syncid
, cb
) < 0)
3086 goto nla_put_failure
;
3092 mutex_unlock(&__ip_vs_mutex
);
3097 static int ip_vs_genl_new_daemon(struct net
*net
, struct nlattr
**attrs
)
3099 if (!(attrs
[IPVS_DAEMON_ATTR_STATE
] &&
3100 attrs
[IPVS_DAEMON_ATTR_MCAST_IFN
] &&
3101 attrs
[IPVS_DAEMON_ATTR_SYNC_ID
]))
3104 return start_sync_thread(net
,
3105 nla_get_u32(attrs
[IPVS_DAEMON_ATTR_STATE
]),
3106 nla_data(attrs
[IPVS_DAEMON_ATTR_MCAST_IFN
]),
3107 nla_get_u32(attrs
[IPVS_DAEMON_ATTR_SYNC_ID
]));
3110 static int ip_vs_genl_del_daemon(struct net
*net
, struct nlattr
**attrs
)
3112 if (!attrs
[IPVS_DAEMON_ATTR_STATE
])
3115 return stop_sync_thread(net
,
3116 nla_get_u32(attrs
[IPVS_DAEMON_ATTR_STATE
]));
3119 static int ip_vs_genl_set_config(struct net
*net
, struct nlattr
**attrs
)
3121 struct ip_vs_timeout_user t
;
3123 __ip_vs_get_timeouts(net
, &t
);
3125 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP
])
3126 t
.tcp_timeout
= nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP
]);
3128 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
])
3130 nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
]);
3132 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_UDP
])
3133 t
.udp_timeout
= nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_UDP
]);
3135 return ip_vs_set_timeout(net
, &t
);
3138 static int ip_vs_genl_set_cmd(struct sk_buff
*skb
, struct genl_info
*info
)
3140 struct ip_vs_service
*svc
= NULL
;
3141 struct ip_vs_service_user_kern usvc
;
3142 struct ip_vs_dest_user_kern udest
;
3144 int need_full_svc
= 0, need_full_dest
= 0;
3147 net
= skb_sknet(skb
);
3148 cmd
= info
->genlhdr
->cmd
;
3150 mutex_lock(&__ip_vs_mutex
);
3152 if (cmd
== IPVS_CMD_FLUSH
) {
3153 ret
= ip_vs_flush(net
);
3155 } else if (cmd
== IPVS_CMD_SET_CONFIG
) {
3156 ret
= ip_vs_genl_set_config(net
, info
->attrs
);
3158 } else if (cmd
== IPVS_CMD_NEW_DAEMON
||
3159 cmd
== IPVS_CMD_DEL_DAEMON
) {
3161 struct nlattr
*daemon_attrs
[IPVS_DAEMON_ATTR_MAX
+ 1];
3163 if (!info
->attrs
[IPVS_CMD_ATTR_DAEMON
] ||
3164 nla_parse_nested(daemon_attrs
, IPVS_DAEMON_ATTR_MAX
,
3165 info
->attrs
[IPVS_CMD_ATTR_DAEMON
],
3166 ip_vs_daemon_policy
)) {
3171 if (cmd
== IPVS_CMD_NEW_DAEMON
)
3172 ret
= ip_vs_genl_new_daemon(net
, daemon_attrs
);
3174 ret
= ip_vs_genl_del_daemon(net
, daemon_attrs
);
3176 } else if (cmd
== IPVS_CMD_ZERO
&&
3177 !info
->attrs
[IPVS_CMD_ATTR_SERVICE
]) {
3178 ret
= ip_vs_zero_all(net
);
3182 /* All following commands require a service argument, so check if we
3183 * received a valid one. We need a full service specification when
3184 * adding / editing a service. Only identifying members otherwise. */
3185 if (cmd
== IPVS_CMD_NEW_SERVICE
|| cmd
== IPVS_CMD_SET_SERVICE
)
3188 ret
= ip_vs_genl_parse_service(net
, &usvc
,
3189 info
->attrs
[IPVS_CMD_ATTR_SERVICE
],
3190 need_full_svc
, &svc
);
3194 /* Unless we're adding a new service, the service must already exist */
3195 if ((cmd
!= IPVS_CMD_NEW_SERVICE
) && (svc
== NULL
)) {
3200 /* Destination commands require a valid destination argument. For
3201 * adding / editing a destination, we need a full destination
3203 if (cmd
== IPVS_CMD_NEW_DEST
|| cmd
== IPVS_CMD_SET_DEST
||
3204 cmd
== IPVS_CMD_DEL_DEST
) {
3205 if (cmd
!= IPVS_CMD_DEL_DEST
)
3208 ret
= ip_vs_genl_parse_dest(&udest
,
3209 info
->attrs
[IPVS_CMD_ATTR_DEST
],
3216 case IPVS_CMD_NEW_SERVICE
:
3218 ret
= ip_vs_add_service(net
, &usvc
, &svc
);
3222 case IPVS_CMD_SET_SERVICE
:
3223 ret
= ip_vs_edit_service(svc
, &usvc
);
3225 case IPVS_CMD_DEL_SERVICE
:
3226 ret
= ip_vs_del_service(svc
);
3227 /* do not use svc, it can be freed */
3229 case IPVS_CMD_NEW_DEST
:
3230 ret
= ip_vs_add_dest(svc
, &udest
);
3232 case IPVS_CMD_SET_DEST
:
3233 ret
= ip_vs_edit_dest(svc
, &udest
);
3235 case IPVS_CMD_DEL_DEST
:
3236 ret
= ip_vs_del_dest(svc
, &udest
);
3239 ret
= ip_vs_zero_service(svc
);
3246 mutex_unlock(&__ip_vs_mutex
);
3251 static int ip_vs_genl_get_cmd(struct sk_buff
*skb
, struct genl_info
*info
)
3253 struct sk_buff
*msg
;
3255 int ret
, cmd
, reply_cmd
;
3258 net
= skb_sknet(skb
);
3259 cmd
= info
->genlhdr
->cmd
;
3261 if (cmd
== IPVS_CMD_GET_SERVICE
)
3262 reply_cmd
= IPVS_CMD_NEW_SERVICE
;
3263 else if (cmd
== IPVS_CMD_GET_INFO
)
3264 reply_cmd
= IPVS_CMD_SET_INFO
;
3265 else if (cmd
== IPVS_CMD_GET_CONFIG
)
3266 reply_cmd
= IPVS_CMD_SET_CONFIG
;
3268 pr_err("unknown Generic Netlink command\n");
3272 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
3276 mutex_lock(&__ip_vs_mutex
);
3278 reply
= genlmsg_put_reply(msg
, info
, &ip_vs_genl_family
, 0, reply_cmd
);
3280 goto nla_put_failure
;
3283 case IPVS_CMD_GET_SERVICE
:
3285 struct ip_vs_service
*svc
;
3287 svc
= ip_vs_genl_find_service(net
,
3288 info
->attrs
[IPVS_CMD_ATTR_SERVICE
]);
3293 ret
= ip_vs_genl_fill_service(msg
, svc
);
3295 goto nla_put_failure
;
3304 case IPVS_CMD_GET_CONFIG
:
3306 struct ip_vs_timeout_user t
;
3308 __ip_vs_get_timeouts(net
, &t
);
3309 #ifdef CONFIG_IP_VS_PROTO_TCP
3310 NLA_PUT_U32(msg
, IPVS_CMD_ATTR_TIMEOUT_TCP
, t
.tcp_timeout
);
3311 NLA_PUT_U32(msg
, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
,
3314 #ifdef CONFIG_IP_VS_PROTO_UDP
3315 NLA_PUT_U32(msg
, IPVS_CMD_ATTR_TIMEOUT_UDP
, t
.udp_timeout
);
3321 case IPVS_CMD_GET_INFO
:
3322 NLA_PUT_U32(msg
, IPVS_INFO_ATTR_VERSION
, IP_VS_VERSION_CODE
);
3323 NLA_PUT_U32(msg
, IPVS_INFO_ATTR_CONN_TAB_SIZE
,
3324 ip_vs_conn_tab_size
);
3328 genlmsg_end(msg
, reply
);
3329 ret
= genlmsg_reply(msg
, info
);
3333 pr_err("not enough space in Netlink message\n");
3339 mutex_unlock(&__ip_vs_mutex
);
3345 static struct genl_ops ip_vs_genl_ops
[] __read_mostly
= {
3347 .cmd
= IPVS_CMD_NEW_SERVICE
,
3348 .flags
= GENL_ADMIN_PERM
,
3349 .policy
= ip_vs_cmd_policy
,
3350 .doit
= ip_vs_genl_set_cmd
,
3353 .cmd
= IPVS_CMD_SET_SERVICE
,
3354 .flags
= GENL_ADMIN_PERM
,
3355 .policy
= ip_vs_cmd_policy
,
3356 .doit
= ip_vs_genl_set_cmd
,
3359 .cmd
= IPVS_CMD_DEL_SERVICE
,
3360 .flags
= GENL_ADMIN_PERM
,
3361 .policy
= ip_vs_cmd_policy
,
3362 .doit
= ip_vs_genl_set_cmd
,
3365 .cmd
= IPVS_CMD_GET_SERVICE
,
3366 .flags
= GENL_ADMIN_PERM
,
3367 .doit
= ip_vs_genl_get_cmd
,
3368 .dumpit
= ip_vs_genl_dump_services
,
3369 .policy
= ip_vs_cmd_policy
,
3372 .cmd
= IPVS_CMD_NEW_DEST
,
3373 .flags
= GENL_ADMIN_PERM
,
3374 .policy
= ip_vs_cmd_policy
,
3375 .doit
= ip_vs_genl_set_cmd
,
3378 .cmd
= IPVS_CMD_SET_DEST
,
3379 .flags
= GENL_ADMIN_PERM
,
3380 .policy
= ip_vs_cmd_policy
,
3381 .doit
= ip_vs_genl_set_cmd
,
3384 .cmd
= IPVS_CMD_DEL_DEST
,
3385 .flags
= GENL_ADMIN_PERM
,
3386 .policy
= ip_vs_cmd_policy
,
3387 .doit
= ip_vs_genl_set_cmd
,
3390 .cmd
= IPVS_CMD_GET_DEST
,
3391 .flags
= GENL_ADMIN_PERM
,
3392 .policy
= ip_vs_cmd_policy
,
3393 .dumpit
= ip_vs_genl_dump_dests
,
3396 .cmd
= IPVS_CMD_NEW_DAEMON
,
3397 .flags
= GENL_ADMIN_PERM
,
3398 .policy
= ip_vs_cmd_policy
,
3399 .doit
= ip_vs_genl_set_cmd
,
3402 .cmd
= IPVS_CMD_DEL_DAEMON
,
3403 .flags
= GENL_ADMIN_PERM
,
3404 .policy
= ip_vs_cmd_policy
,
3405 .doit
= ip_vs_genl_set_cmd
,
3408 .cmd
= IPVS_CMD_GET_DAEMON
,
3409 .flags
= GENL_ADMIN_PERM
,
3410 .dumpit
= ip_vs_genl_dump_daemons
,
3413 .cmd
= IPVS_CMD_SET_CONFIG
,
3414 .flags
= GENL_ADMIN_PERM
,
3415 .policy
= ip_vs_cmd_policy
,
3416 .doit
= ip_vs_genl_set_cmd
,
3419 .cmd
= IPVS_CMD_GET_CONFIG
,
3420 .flags
= GENL_ADMIN_PERM
,
3421 .doit
= ip_vs_genl_get_cmd
,
3424 .cmd
= IPVS_CMD_GET_INFO
,
3425 .flags
= GENL_ADMIN_PERM
,
3426 .doit
= ip_vs_genl_get_cmd
,
3429 .cmd
= IPVS_CMD_ZERO
,
3430 .flags
= GENL_ADMIN_PERM
,
3431 .policy
= ip_vs_cmd_policy
,
3432 .doit
= ip_vs_genl_set_cmd
,
3435 .cmd
= IPVS_CMD_FLUSH
,
3436 .flags
= GENL_ADMIN_PERM
,
3437 .doit
= ip_vs_genl_set_cmd
,
3441 static int __init
ip_vs_genl_register(void)
3443 return genl_register_family_with_ops(&ip_vs_genl_family
,
3444 ip_vs_genl_ops
, ARRAY_SIZE(ip_vs_genl_ops
));
3447 static void ip_vs_genl_unregister(void)
3449 genl_unregister_family(&ip_vs_genl_family
);
3452 /* End of Generic Netlink interface definitions */
3455 * per netns intit/exit func.
3457 int __net_init
__ip_vs_control_init(struct net
*net
)
3460 struct netns_ipvs
*ipvs
= net_ipvs(net
);
3462 if (!net_eq(net
, &init_net
)) /* netns not enabled yet */
3465 for (idx
= 0; idx
< IP_VS_RTAB_SIZE
; idx
++)
3466 INIT_LIST_HEAD(&ipvs
->rs_table
[idx
]);
3468 proc_net_fops_create(net
, "ip_vs", 0, &ip_vs_info_fops
);
3469 proc_net_fops_create(net
, "ip_vs_stats", 0, &ip_vs_stats_fops
);
3470 sysctl_header
= register_net_sysctl_table(net
, net_vs_ctl_path
,
3472 if (sysctl_header
== NULL
)
3474 ip_vs_new_estimator(net
, &ip_vs_stats
);
3481 static void __net_exit
__ip_vs_control_cleanup(struct net
*net
)
3483 if (!net_eq(net
, &init_net
)) /* netns not enabled yet */
3486 ip_vs_kill_estimator(net
, &ip_vs_stats
);
3487 unregister_net_sysctl_table(sysctl_header
);
3488 proc_net_remove(net
, "ip_vs_stats");
3489 proc_net_remove(net
, "ip_vs");
3492 static struct pernet_operations ipvs_control_ops
= {
3493 .init
= __ip_vs_control_init
,
3494 .exit
= __ip_vs_control_cleanup
,
3497 int __init
ip_vs_control_init(void)
3504 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3505 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
3506 INIT_LIST_HEAD(&ip_vs_svc_table
[idx
]);
3507 INIT_LIST_HEAD(&ip_vs_svc_fwm_table
[idx
]);
3510 ret
= register_pernet_subsys(&ipvs_control_ops
);
3512 pr_err("cannot register namespace.\n");
3516 smp_wmb(); /* Do we really need it now ? */
3518 ret
= nf_register_sockopt(&ip_vs_sockopts
);
3520 pr_err("cannot register sockopt.\n");
3524 ret
= ip_vs_genl_register();
3526 pr_err("cannot register Generic Netlink interface.\n");
3527 nf_unregister_sockopt(&ip_vs_sockopts
);
3531 /* Hook the defense timer */
3532 schedule_delayed_work(&defense_work
, DEFENSE_TIMER_PERIOD
);
3538 unregister_pernet_subsys(&ipvs_control_ops
);
3544 void ip_vs_control_cleanup(void)
3547 ip_vs_trash_cleanup();
3548 cancel_delayed_work_sync(&defense_work
);
3549 cancel_work_sync(&defense_work
.work
);
3550 unregister_pernet_subsys(&ipvs_control_ops
);
3551 ip_vs_genl_unregister();
3552 nf_unregister_sockopt(&ip_vs_sockopts
);