Linux-2.6.12-rc2
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv4 / ipvs / ip_vs_proto_udp.c
blob8ae5f2e0aefa259ba17a5de3540aed4738ec0178
1 /*
2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS
4 * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * Julian Anastasov <ja@ssi.bg>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
14 * Changes:
18 #include <linux/kernel.h>
19 #include <linux/netfilter_ipv4.h>
21 #include <net/ip_vs.h>
24 static struct ip_vs_conn *
25 udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
26 const struct iphdr *iph, unsigned int proto_off, int inverse)
28 struct ip_vs_conn *cp;
29 __u16 _ports[2], *pptr;
31 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
32 if (pptr == NULL)
33 return NULL;
35 if (likely(!inverse)) {
36 cp = ip_vs_conn_in_get(iph->protocol,
37 iph->saddr, pptr[0],
38 iph->daddr, pptr[1]);
39 } else {
40 cp = ip_vs_conn_in_get(iph->protocol,
41 iph->daddr, pptr[1],
42 iph->saddr, pptr[0]);
45 return cp;
49 static struct ip_vs_conn *
50 udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
51 const struct iphdr *iph, unsigned int proto_off, int inverse)
53 struct ip_vs_conn *cp;
54 __u16 _ports[2], *pptr;
56 pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
57 sizeof(_ports), _ports);
58 if (pptr == NULL)
59 return NULL;
61 if (likely(!inverse)) {
62 cp = ip_vs_conn_out_get(iph->protocol,
63 iph->saddr, pptr[0],
64 iph->daddr, pptr[1]);
65 } else {
66 cp = ip_vs_conn_out_get(iph->protocol,
67 iph->daddr, pptr[1],
68 iph->saddr, pptr[0]);
71 return cp;
75 static int
76 udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
77 int *verdict, struct ip_vs_conn **cpp)
79 struct ip_vs_service *svc;
80 struct udphdr _udph, *uh;
82 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
83 sizeof(_udph), &_udph);
84 if (uh == NULL) {
85 *verdict = NF_DROP;
86 return 0;
89 if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
90 skb->nh.iph->daddr, uh->dest))) {
91 if (ip_vs_todrop()) {
93 * It seems that we are very loaded.
94 * We have to drop this packet :(
96 ip_vs_service_put(svc);
97 *verdict = NF_DROP;
98 return 0;
102 * Let the virtual server select a real server for the
103 * incoming connection, and create a connection entry.
105 *cpp = ip_vs_schedule(svc, skb);
106 if (!*cpp) {
107 *verdict = ip_vs_leave(svc, skb, pp);
108 return 0;
110 ip_vs_service_put(svc);
112 return 1;
116 static inline void
117 udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
118 u16 oldport, u16 newport)
120 uhdr->check =
121 ip_vs_check_diff(~oldip, newip,
122 ip_vs_check_diff(oldport ^ 0xFFFF,
123 newport, uhdr->check));
124 if (!uhdr->check)
125 uhdr->check = 0xFFFF;
128 static int
129 udp_snat_handler(struct sk_buff **pskb,
130 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
132 struct udphdr *udph;
133 unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
135 /* csum_check requires unshared skb */
136 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
137 return 0;
139 if (unlikely(cp->app != NULL)) {
140 /* Some checks before mangling */
141 if (pp->csum_check && !pp->csum_check(*pskb, pp))
142 return 0;
145 * Call application helper if needed
147 if (!ip_vs_app_pkt_out(cp, pskb))
148 return 0;
151 udph = (void *)(*pskb)->nh.iph + udphoff;
152 udph->source = cp->vport;
155 * Adjust UDP checksums
157 if (!cp->app && (udph->check != 0)) {
158 /* Only port and addr are changed, do fast csum update */
159 udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
160 cp->dport, cp->vport);
161 if ((*pskb)->ip_summed == CHECKSUM_HW)
162 (*pskb)->ip_summed = CHECKSUM_NONE;
163 } else {
164 /* full checksum calculation */
165 udph->check = 0;
166 (*pskb)->csum = skb_checksum(*pskb, udphoff,
167 (*pskb)->len - udphoff, 0);
168 udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
169 (*pskb)->len - udphoff,
170 cp->protocol,
171 (*pskb)->csum);
172 if (udph->check == 0)
173 udph->check = 0xFFFF;
174 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
175 pp->name, udph->check,
176 (char*)&(udph->check) - (char*)udph);
178 return 1;
182 static int
183 udp_dnat_handler(struct sk_buff **pskb,
184 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
186 struct udphdr *udph;
187 unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
189 /* csum_check requires unshared skb */
190 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
191 return 0;
193 if (unlikely(cp->app != NULL)) {
194 /* Some checks before mangling */
195 if (pp->csum_check && !pp->csum_check(*pskb, pp))
196 return 0;
199 * Attempt ip_vs_app call.
200 * It will fix ip_vs_conn
202 if (!ip_vs_app_pkt_in(cp, pskb))
203 return 0;
206 udph = (void *)(*pskb)->nh.iph + udphoff;
207 udph->dest = cp->dport;
210 * Adjust UDP checksums
212 if (!cp->app && (udph->check != 0)) {
213 /* Only port and addr are changed, do fast csum update */
214 udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
215 cp->vport, cp->dport);
216 if ((*pskb)->ip_summed == CHECKSUM_HW)
217 (*pskb)->ip_summed = CHECKSUM_NONE;
218 } else {
219 /* full checksum calculation */
220 udph->check = 0;
221 (*pskb)->csum = skb_checksum(*pskb, udphoff,
222 (*pskb)->len - udphoff, 0);
223 udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
224 (*pskb)->len - udphoff,
225 cp->protocol,
226 (*pskb)->csum);
227 if (udph->check == 0)
228 udph->check = 0xFFFF;
229 (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
231 return 1;
235 static int
236 udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
238 struct udphdr _udph, *uh;
239 unsigned int udphoff = skb->nh.iph->ihl*4;
241 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
242 if (uh == NULL)
243 return 0;
245 if (uh->check != 0) {
246 switch (skb->ip_summed) {
247 case CHECKSUM_NONE:
248 skb->csum = skb_checksum(skb, udphoff,
249 skb->len - udphoff, 0);
250 case CHECKSUM_HW:
251 if (csum_tcpudp_magic(skb->nh.iph->saddr,
252 skb->nh.iph->daddr,
253 skb->len - udphoff,
254 skb->nh.iph->protocol,
255 skb->csum)) {
256 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
257 "Failed checksum for");
258 return 0;
260 break;
261 default:
262 /* CHECKSUM_UNNECESSARY */
263 break;
266 return 1;
271 * Note: the caller guarantees that only one of register_app,
272 * unregister_app or app_conn_bind is called each time.
275 #define UDP_APP_TAB_BITS 4
276 #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
277 #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
279 static struct list_head udp_apps[UDP_APP_TAB_SIZE];
280 static DEFINE_SPINLOCK(udp_app_lock);
282 static inline __u16 udp_app_hashkey(__u16 port)
284 return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK;
288 static int udp_register_app(struct ip_vs_app *inc)
290 struct ip_vs_app *i;
291 __u16 hash, port = inc->port;
292 int ret = 0;
294 hash = udp_app_hashkey(port);
297 spin_lock_bh(&udp_app_lock);
298 list_for_each_entry(i, &udp_apps[hash], p_list) {
299 if (i->port == port) {
300 ret = -EEXIST;
301 goto out;
304 list_add(&inc->p_list, &udp_apps[hash]);
305 atomic_inc(&ip_vs_protocol_udp.appcnt);
307 out:
308 spin_unlock_bh(&udp_app_lock);
309 return ret;
313 static void
314 udp_unregister_app(struct ip_vs_app *inc)
316 spin_lock_bh(&udp_app_lock);
317 atomic_dec(&ip_vs_protocol_udp.appcnt);
318 list_del(&inc->p_list);
319 spin_unlock_bh(&udp_app_lock);
323 static int udp_app_conn_bind(struct ip_vs_conn *cp)
325 int hash;
326 struct ip_vs_app *inc;
327 int result = 0;
329 /* Default binding: bind app only for NAT */
330 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
331 return 0;
333 /* Lookup application incarnations and bind the right one */
334 hash = udp_app_hashkey(cp->vport);
336 spin_lock(&udp_app_lock);
337 list_for_each_entry(inc, &udp_apps[hash], p_list) {
338 if (inc->port == cp->vport) {
339 if (unlikely(!ip_vs_app_inc_get(inc)))
340 break;
341 spin_unlock(&udp_app_lock);
343 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
344 "%u.%u.%u.%u:%u to app %s on port %u\n",
345 __FUNCTION__,
346 NIPQUAD(cp->caddr), ntohs(cp->cport),
347 NIPQUAD(cp->vaddr), ntohs(cp->vport),
348 inc->name, ntohs(inc->port));
349 cp->app = inc;
350 if (inc->init_conn)
351 result = inc->init_conn(inc, cp);
352 goto out;
355 spin_unlock(&udp_app_lock);
357 out:
358 return result;
362 static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
363 [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
364 [IP_VS_UDP_S_LAST] = 2*HZ,
367 static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
368 [IP_VS_UDP_S_NORMAL] = "UDP",
369 [IP_VS_UDP_S_LAST] = "BUG!",
373 static int
374 udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
376 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
377 udp_state_name_table, sname, to);
380 static const char * udp_state_name(int state)
382 if (state >= IP_VS_UDP_S_LAST)
383 return "ERR!";
384 return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
387 static int
388 udp_state_transition(struct ip_vs_conn *cp, int direction,
389 const struct sk_buff *skb,
390 struct ip_vs_protocol *pp)
392 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
393 return 1;
396 static void udp_init(struct ip_vs_protocol *pp)
398 IP_VS_INIT_HASH_TABLE(udp_apps);
399 pp->timeout_table = udp_timeouts;
402 static void udp_exit(struct ip_vs_protocol *pp)
407 struct ip_vs_protocol ip_vs_protocol_udp = {
408 .name = "UDP",
409 .protocol = IPPROTO_UDP,
410 .dont_defrag = 0,
411 .init = udp_init,
412 .exit = udp_exit,
413 .conn_schedule = udp_conn_schedule,
414 .conn_in_get = udp_conn_in_get,
415 .conn_out_get = udp_conn_out_get,
416 .snat_handler = udp_snat_handler,
417 .dnat_handler = udp_dnat_handler,
418 .csum_check = udp_csum_check,
419 .state_transition = udp_state_transition,
420 .state_name = udp_state_name,
421 .register_app = udp_register_app,
422 .unregister_app = udp_unregister_app,
423 .app_conn_bind = udp_app_conn_bind,
424 .debug_packet = ip_vs_tcpudp_debug_packet,
425 .timeout_change = NULL,
426 .set_state_timeout = udp_set_state_timeout,