2 * This implements the ROUTE target, which enables you to setup unusual
3 * routes not supported by the standard kernel routing table.
5 * Copyright (C) 2002 Cedric de Launois <delaunois@info.ucl.ac.be>
9 * This software is distributed under GNU GPL v2, 1991
12 #include <linux/module.h>
13 #include <linux/skbuff.h>
14 #include <linux/version.h>
16 #include <linux/netfilter_ipv4/ip_tables.h>
18 #include <linux/netfilter_ipv4/ipt_ROUTE.h>
19 #include <linux/netdevice.h>
20 #include <linux/route.h>
21 #include <linux/version.h>
22 #include <linux/if_arp.h>
24 #include <net/route.h>
26 #include <net/checksum.h>
28 #include <net/netfilter/nf_conntrack.h>
30 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
31 #include <net/net_namespace.h>
34 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
35 #define NF_INET_PRE_ROUTING NF_IP_PRE_ROUTING
36 #define NF_INET_LOCAL_IN NF_IP_LOCAL_IN
37 #define NF_INET_FORWARD NF_IP_FORWARD
38 #define NF_INET_LOCAL_OUT NF_IP_LOCAL_OUT
39 #define NF_INET_POST_ROUTING NF_IP_POST_ROUTING
45 #define DEBUGP(format, args...)
48 MODULE_LICENSE("GPL");
49 MODULE_AUTHOR("Cedric de Launois <delaunois@info.ucl.ac.be>");
50 MODULE_DESCRIPTION("iptables ROUTE target module");
52 /* Try to route the packet according to the routing keys specified in
53 * route_info. Keys are :
55 * 0 if no oif preferred,
56 * otherwise set to the index of the desired oif
58 * 0 if no gateway specified,
59 * otherwise set to the next host to which the pkt must be routed
60 * If success, skb->dev is the output device to which the packet must
61 * be sent and skb->dst is not NULL
63 * RETURN: -1 if an error occured
64 * 1 if the packet was succesfully routed to the
66 * 0 if the kernel routing table could not route the packet
67 * according to the keys specified
69 static int route(struct sk_buff
*skb
,
71 const struct ipt_route_target_info
*route_info
)
75 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
76 struct iphdr
*iph
= ip_hdr(skb
);
78 struct iphdr
*iph
= skb
->nh
.iph
;
86 .tos
= RT_TOS(iph
->tos
),
87 .scope
= RT_SCOPE_UNIVERSE
,
92 /* The destination address may be overloaded by the target */
94 fl
.fl4_dst
= route_info
->gw
;
96 /* Trying to route the packet using the standard routing table. */
97 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
98 if ((err
= ip_route_output_key(&init_net
, &rt
, &fl
))) {
100 if ((err
= ip_route_output_key(&rt
, &fl
))) {
103 DEBUGP("ipt_ROUTE: couldn't route pkt (err: %i)",err
);
107 /* Drop old route. */
108 dst_release(skb
->dst
);
111 /* Success if no oif specified or if the oif correspond to the
113 if (!ifindex
|| rt
->u
.dst
.dev
->ifindex
== ifindex
) {
114 skb
->dst
= &rt
->u
.dst
;
115 skb
->dev
= skb
->dst
->dev
;
116 skb
->protocol
= htons(ETH_P_IP
);
120 /* The interface selected by the routing table is not the one
121 * specified by the user. This may happen because the dst address
122 * is one of our own addresses.
125 DEBUGP("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i (got oif=%i)\n",
126 NIPQUAD(route_info
->gw
), ifindex
, rt
->u
.dst
.dev
->ifindex
);
132 /* Stolen from ip_finish_output2
133 * PRE : skb->dev is set to the device we are leaving by
134 * skb->dst is not NULL
135 * POST: the packet is sent with the link layer header pushed
136 * the packet is destroyed
138 static void ip_direct_send(struct sk_buff
*skb
)
140 struct dst_entry
*dst
= skb
->dst
;
141 struct hh_cache
*hh
= dst
->hh
;
142 struct net_device
*dev
= dst
->dev
;
143 int hh_len
= LL_RESERVED_SPACE(dev
);
146 /* Be paranoid, rather than too clever. */
147 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
148 if (unlikely(skb_headroom(skb
) < hh_len
&& dev
->header_ops
)) {
150 if (unlikely(skb_headroom(skb
) < hh_len
&& dev
->hard_header
)) {
152 struct sk_buff
*skb2
;
154 skb2
= skb_realloc_headroom(skb
, LL_RESERVED_SPACE(dev
));
160 skb_set_owner_w(skb2
, skb
->sk
);
169 seq
= read_seqbegin(&hh
->hh_lock
);
170 hh_alen
= HH_DATA_ALIGN(hh
->hh_len
);
171 memcpy(skb
->data
- hh_alen
, hh
->hh_data
, hh_alen
);
172 } while (read_seqretry(&hh
->hh_lock
, seq
));
173 skb_push(skb
, hh
->hh_len
);
175 } else if (dst
->neighbour
)
176 dst
->neighbour
->output(skb
);
179 DEBUGP(KERN_DEBUG
"ipt_ROUTE: no hdr & no neighbour cache!\n");
185 /* PRE : skb->dev is set to the device we are leaving by
186 * POST: - the packet is directly sent to the skb->dev device, without
187 * pushing the link layer header.
188 * - the packet is destroyed
190 static inline int dev_direct_send(struct sk_buff
*skb
)
192 return dev_queue_xmit(skb
);
196 static unsigned int route_oif(const struct ipt_route_target_info
*route_info
,
199 unsigned int ifindex
= 0;
200 struct net_device
*dev_out
= NULL
;
202 /* The user set the interface name to use.
203 * Getting the current interface index.
205 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
206 if ((dev_out
= dev_get_by_name(&init_net
, route_info
->oif
))) {
208 if ((dev_out
= dev_get_by_name(route_info
->oif
))) {
210 ifindex
= dev_out
->ifindex
;
212 /* Unknown interface name : packet dropped */
214 DEBUGP("ipt_ROUTE: oif interface %s not found\n", route_info
->oif
);
218 /* Trying the standard way of routing packets */
219 switch (route(skb
, ifindex
, route_info
)) {
222 if (route_info
->flags
& IPT_ROUTE_CONTINUE
)
229 /* Failed to send to oif. Trying the hard way */
230 if (route_info
->flags
& IPT_ROUTE_CONTINUE
)
234 DEBUGP("ipt_ROUTE: forcing the use of %i\n",
237 /* We have to force the use of an interface.
238 * This interface must be a tunnel interface since
239 * otherwise we can't guess the hw address for
240 * the packet. For a tunnel interface, no hw address
243 if ((dev_out
->type
!= ARPHRD_TUNNEL
)
244 && (dev_out
->type
!= ARPHRD_IPGRE
)) {
246 DEBUGP("ipt_ROUTE: can't guess the hw addr !\n");
251 /* Send the packet. This will also free skb
252 * Do not go through the POST_ROUTING hook because
253 * skb->dst is not set and because it will probably
254 * get confused by the destination IP address.
257 dev_direct_send(skb
);
262 /* Unexpected error */
269 static unsigned int route_iif(const struct ipt_route_target_info
*route_info
,
272 struct net_device
*dev_in
= NULL
;
274 /* Getting the current interface index. */
275 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
276 if (!(dev_in
= dev_get_by_name(&init_net
, route_info
->iif
))) {
278 if (!(dev_in
= dev_get_by_name(route_info
->iif
))) {
281 DEBUGP("ipt_ROUTE: iif interface %s not found\n", route_info
->iif
);
286 dst_release(skb
->dst
);
295 static unsigned int route_gw(const struct ipt_route_target_info
*route_info
,
298 if (route(skb
, 0, route_info
)!=1)
301 if (route_info
->flags
& IPT_ROUTE_CONTINUE
)
309 /* To detect and deter routed packet loopback when using the --tee option,
310 * we take a page out of the raw.patch book: on the copied skb, we set up
311 * a fake ->nfct entry, pointing to the local &route_tee_track. We skip
312 * routing packets when we see they already have that ->nfct.
315 static struct nf_conn route_tee_track
;
318 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
319 target(struct sk_buff
**pskb
,
320 unsigned int hooknum
,
321 const struct net_device
*in
,
322 const struct net_device
*out
,
323 const void *targinfo
,
325 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
326 target(struct sk_buff
**pskb
,
327 const struct net_device
*in
,
328 const struct net_device
*out
,
329 unsigned int hooknum
,
330 const void *targinfo
,
332 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
333 target(struct sk_buff
**pskb
,
334 const struct net_device
*in
,
335 const struct net_device
*out
,
336 unsigned int hooknum
,
337 const struct xt_target
*target
,
338 const void *targinfo
,
340 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
341 target(struct sk_buff
**pskb
,
342 const struct net_device
*in
,
343 const struct net_device
*out
,
344 unsigned int hooknum
,
345 const struct xt_target
*target
,
346 const void *targinfo
)
347 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
348 target(struct sk_buff
*skb
,
349 const struct net_device
*in
,
350 const struct net_device
*out
,
351 unsigned int hooknum
,
352 const struct xt_target
*target
,
353 const void *targinfo
)
354 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) */
355 target(struct sk_buff
*skb
,
356 const struct xt_target_param
*par
)
359 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
360 const struct ipt_route_target_info
*route_info
= targinfo
;
362 const struct ipt_route_target_info
*route_info
= par
->targinfo
;
363 unsigned int hooknum
= par
->hooknum
;
365 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
366 struct sk_buff
*skb
= *pskb
;
370 if (skb
->nfct
== &route_tee_track
.ct_general
) {
371 /* Loopback - a packet we already routed, is to be
372 * routed another time. Avoid that, now.
375 DEBUGP(KERN_DEBUG
"ipt_ROUTE: loopback - DROP!\n");
379 /* If we are at PREROUTING or INPUT hook
380 * the TTL isn't decreased by the IP stack
382 if (hooknum
== NF_INET_PRE_ROUTING
||
383 hooknum
== NF_INET_LOCAL_IN
) {
385 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
386 struct iphdr
*iph
= ip_hdr(skb
);
388 struct iphdr
*iph
= skb
->nh
.iph
;
398 .tos
= RT_TOS(iph
->tos
),
399 .scope
= ((iph
->tos
& RTO_ONLINK
) ?
406 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
407 if (ip_route_output_key(&init_net
, &rt
, &fl
)) {
409 if (ip_route_output_key(&rt
, &fl
)) {
414 if (skb
->dev
== rt
->u
.dst
.dev
) {
415 /* Drop old route. */
416 dst_release(skb
->dst
);
417 skb
->dst
= &rt
->u
.dst
;
419 /* this will traverse normal stack, and
420 * thus call conntrack on the icmp packet */
421 icmp_send(skb
, ICMP_TIME_EXCEEDED
,
429 * If we are at INPUT the checksum must be recalculated since
430 * the length could change as the result of a defragmentation.
432 if(hooknum
== NF_INET_LOCAL_IN
) {
433 iph
->ttl
= iph
->ttl
- 1;
435 iph
->check
= ip_fast_csum((unsigned char *)iph
, iph
->ihl
);
437 ip_decrease_ttl(iph
);
441 if ((route_info
->flags
& IPT_ROUTE_TEE
)) {
443 * Copy the skb, and route the copy. Will later return
444 * IPT_CONTINUE for the original skb, which should continue
445 * on its way as if nothing happened. The copy should be
446 * independantly delivered to the ROUTE --gw.
448 skb
= skb_copy(skb
, GFP_ATOMIC
);
451 DEBUGP(KERN_DEBUG
"ipt_ROUTE: copy failed!\n");
456 /* Tell conntrack to forget this packet since it may get confused
457 * when a packet is leaving with dst address == our address.
458 * Good idea ? Dunno. Need advice.
460 * NEW: mark the skb with our &route_tee_track, so we avoid looping
461 * on any already routed packet.
463 if (!(route_info
->flags
& IPT_ROUTE_CONTINUE
)) {
464 nf_conntrack_put(skb
->nfct
);
465 skb
->nfct
= &route_tee_track
.ct_general
;
466 skb
->nfctinfo
= IP_CT_NEW
;
467 nf_conntrack_get(skb
->nfct
);
470 if (route_info
->oif
[0] != '\0') {
471 res
= route_oif(route_info
, skb
);
472 } else if (route_info
->iif
[0] != '\0') {
473 res
= route_iif(route_info
, skb
);
474 } else if (route_info
->gw
) {
475 res
= route_gw(route_info
, skb
);
478 DEBUGP(KERN_DEBUG
"ipt_ROUTE: no parameter !\n");
482 if ((route_info
->flags
& IPT_ROUTE_TEE
))
488 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
490 checkentry(const char *tablename
,
491 const struct ipt_entry
*e
,
493 unsigned int targinfosize
,
494 unsigned int hook_mask
)
495 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
497 checkentry(const char *tablename
,
500 unsigned int targinfosize
,
501 unsigned int hook_mask
)
502 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
504 checkentry(const char *tablename
,
506 const struct xt_target
*target
,
508 unsigned int targinfosize
,
509 unsigned int hook_mask
)
510 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
512 checkentry(const char *tablename
,
514 const struct xt_target
*target
,
516 unsigned int hook_mask
)
517 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
519 checkentry(const char *tablename
,
521 const struct xt_target
*target
,
523 unsigned int hook_mask
)
524 #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) */
526 checkentry(const struct xt_tgchk_param
*par
)
530 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
531 const char *tablename
= par
->table
;
532 unsigned int hook_mask
= par
->hook_mask
;
535 if (strcmp(tablename
, "mangle") != 0) {
536 printk("ipt_ROUTE: bad table `%s', use the `mangle' table.\n",
541 if (hook_mask
& ~( (1 << NF_INET_PRE_ROUTING
)
542 | (1 << NF_INET_LOCAL_IN
)
543 | (1 << NF_INET_FORWARD
)
544 | (1 << NF_INET_LOCAL_OUT
)
545 | (1 << NF_INET_POST_ROUTING
))) {
546 printk("ipt_ROUTE: bad hook\n");
550 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
551 if (targinfosize
!= IPT_ALIGN(sizeof(struct ipt_route_target_info
))) {
552 printk(KERN_WARNING
"ipt_ROUTE: targinfosize %u != %Zu\n",
554 IPT_ALIGN(sizeof(struct ipt_route_target_info
)));
562 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
563 static struct ipt_target xt_route_reg
= {
565 static struct ipt_target ipt_route_reg
= {
568 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
572 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
573 .targetsize
= sizeof(struct ipt_route_target_info
),
575 .checkentry
= checkentry
,
579 static int __init
init(void)
581 /* Set up fake conntrack (stolen from raw.patch):
582 - to never be deleted, not in any hashes */
583 atomic_set(&route_tee_track
.ct_general
.use
, 1);
584 /* - and look it like as a confirmed connection */
585 set_bit(IPS_CONFIRMED_BIT
, &route_tee_track
.status
);
586 /* Initialize fake conntrack so that NAT will skip it */
587 route_tee_track
.status
|= IPS_NAT_DONE_MASK
;
589 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
590 return xt_register_target(&xt_route_reg
);
592 return ipt_register_target(&ipt_route_reg
);
597 static void __exit
fini(void)
599 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
600 xt_unregister_target(&xt_route_reg
);
602 ipt_unregister_target(&ipt_route_reg
);