Import 2.3.99pre4-2
[davej-history.git] / net / ipv4 / fib_frontend.c
blob0ae574a64e1aff982ade1ec345df50df36630b1a
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Version: $Id: fib_frontend.c,v 1.21 1999/12/15 22:39:07 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/icmp.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
48 #define FFprint(a...) printk(KERN_DEBUG a)
50 #ifndef CONFIG_IP_MULTIPLE_TABLES
52 #define RT_TABLE_MIN RT_TABLE_MAIN
54 struct fib_table *local_table;
55 struct fib_table *main_table;
57 #else
59 #define RT_TABLE_MIN 1
61 struct fib_table *fib_tables[RT_TABLE_MAX+1];
63 struct fib_table *__fib_new_table(int id)
65 struct fib_table *tb;
67 tb = fib_hash_init(id);
68 if (!tb)
69 return NULL;
70 fib_tables[id] = tb;
71 return tb;
75 #endif /* CONFIG_IP_MULTIPLE_TABLES */
78 void fib_flush(void)
80 int flushed = 0;
81 #ifdef CONFIG_IP_MULTIPLE_TABLES
82 struct fib_table *tb;
83 int id;
85 for (id = RT_TABLE_MAX; id>0; id--) {
86 if ((tb = fib_get_table(id))==NULL)
87 continue;
88 flushed += tb->tb_flush(tb);
90 #else /* CONFIG_IP_MULTIPLE_TABLES */
91 flushed += main_table->tb_flush(main_table);
92 flushed += local_table->tb_flush(local_table);
93 #endif /* CONFIG_IP_MULTIPLE_TABLES */
95 if (flushed)
96 rt_cache_flush(-1);
100 #ifdef CONFIG_PROC_FS
103 * Called from the PROCfs module. This outputs /proc/net/route.
105 * It always works in backward compatibility mode.
106 * The format of the file is not supposed to be changed.
109 static int
110 fib_get_procinfo(char *buffer, char **start, off_t offset, int length)
112 int first = offset/128;
113 char *ptr = buffer;
114 int count = (length+127)/128;
115 int len;
117 *start = buffer + offset%128;
119 if (--first < 0) {
120 sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
121 --count;
122 ptr += 128;
123 first = 0;
126 if (main_table && count > 0) {
127 int n = main_table->tb_get_info(main_table, ptr, first, count);
128 count -= n;
129 ptr += n*128;
131 len = ptr - *start;
132 if (len >= length)
133 return length;
134 if (len >= 0)
135 return len;
136 return 0;
139 #endif /* CONFIG_PROC_FS */
142 * Find the first device with a given source address.
145 struct net_device * ip_dev_find(u32 addr)
147 struct rt_key key;
148 struct fib_result res;
149 struct net_device *dev = NULL;
151 memset(&key, 0, sizeof(key));
152 key.dst = addr;
153 #ifdef CONFIG_IP_MULTIPLE_TABLES
154 res.r = NULL;
155 #endif
157 if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
158 return NULL;
160 if (res.type != RTN_LOCAL)
161 goto out;
162 dev = FIB_RES_DEV(res);
163 if (dev)
164 atomic_inc(&dev->refcnt);
166 out:
167 fib_res_put(&res);
168 return dev;
171 unsigned inet_addr_type(u32 addr)
173 struct rt_key key;
174 struct fib_result res;
175 unsigned ret = RTN_BROADCAST;
177 if (ZERONET(addr) || BADCLASS(addr))
178 return RTN_BROADCAST;
179 if (MULTICAST(addr))
180 return RTN_MULTICAST;
182 memset(&key, 0, sizeof(key));
183 key.dst = addr;
184 #ifdef CONFIG_IP_MULTIPLE_TABLES
185 res.r = NULL;
186 #endif
188 if (local_table) {
189 ret = RTN_UNICAST;
190 if (local_table->tb_lookup(local_table, &key, &res) == 0) {
191 ret = res.type;
192 fib_res_put(&res);
195 return ret;
198 /* Given (packet source, input interface) and optional (dst, oif, tos):
199 - (main) check, that source is valid i.e. not broadcast or our local
200 address.
201 - figure out what "logical" interface this packet arrived
202 and calculate "specific destination" address.
203 - check, that packet arrived from expected physical interface.
206 int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
207 struct net_device *dev, u32 *spec_dst, u32 *itag)
209 struct in_device *in_dev;
210 struct rt_key key;
211 struct fib_result res;
212 int no_addr, rpf;
213 int ret;
215 key.dst = src;
216 key.src = dst;
217 key.tos = tos;
218 key.oif = 0;
219 key.iif = oif;
220 key.scope = RT_SCOPE_UNIVERSE;
222 no_addr = rpf = 0;
223 read_lock(&inetdev_lock);
224 in_dev = __in_dev_get(dev);
225 if (in_dev) {
226 no_addr = in_dev->ifa_list == NULL;
227 rpf = IN_DEV_RPFILTER(in_dev);
229 read_unlock(&inetdev_lock);
231 if (in_dev == NULL)
232 goto e_inval;
234 if (fib_lookup(&key, &res))
235 goto last_resort;
236 if (res.type != RTN_UNICAST)
237 goto e_inval_res;
238 *spec_dst = FIB_RES_PREFSRC(res);
239 if (itag)
240 fib_combine_itag(itag, &res);
241 #ifdef CONFIG_IP_ROUTE_MULTIPATH
242 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
243 #else
244 if (FIB_RES_DEV(res) == dev)
245 #endif
247 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
248 fib_res_put(&res);
249 return ret;
251 fib_res_put(&res);
252 if (no_addr)
253 goto last_resort;
254 if (rpf)
255 goto e_inval;
256 key.oif = dev->ifindex;
258 ret = 0;
259 if (fib_lookup(&key, &res) == 0) {
260 if (res.type == RTN_UNICAST) {
261 *spec_dst = FIB_RES_PREFSRC(res);
262 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
264 fib_res_put(&res);
266 return ret;
268 last_resort:
269 if (rpf)
270 goto e_inval;
271 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
272 *itag = 0;
273 return 0;
275 e_inval_res:
276 fib_res_put(&res);
277 e_inval:
278 return -EINVAL;
281 #ifndef CONFIG_IP_NOSIOCRT
284 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
287 int ip_rt_ioctl(unsigned int cmd, void *arg)
289 int err;
290 struct kern_rta rta;
291 struct rtentry r;
292 struct {
293 struct nlmsghdr nlh;
294 struct rtmsg rtm;
295 } req;
297 switch (cmd) {
298 case SIOCADDRT: /* Add a route */
299 case SIOCDELRT: /* Delete a route */
300 if (!capable(CAP_NET_ADMIN))
301 return -EPERM;
302 if (copy_from_user(&r, arg, sizeof(struct rtentry)))
303 return -EFAULT;
304 rtnl_lock();
305 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
306 if (err == 0) {
307 if (cmd == SIOCDELRT) {
308 struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
309 err = -ESRCH;
310 if (tb)
311 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
312 } else {
313 struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
314 err = -ENOBUFS;
315 if (tb)
316 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
318 if (rta.rta_mx)
319 kfree(rta.rta_mx);
321 rtnl_unlock();
322 return err;
324 return -EINVAL;
327 #else
329 int ip_rt_ioctl(unsigned int cmd, void *arg)
331 return -EINVAL;
334 #endif
336 #ifdef CONFIG_RTNETLINK
338 static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
340 int i;
342 for (i=1; i<=RTA_MAX; i++) {
343 struct rtattr *attr = rta[i-1];
344 if (attr) {
345 if (RTA_PAYLOAD(attr) < 4)
346 return -EINVAL;
347 if (i != RTA_MULTIPATH && i != RTA_METRICS)
348 rta[i-1] = (struct rtattr*)RTA_DATA(attr);
351 return 0;
354 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
356 struct fib_table * tb;
357 struct rtattr **rta = arg;
358 struct rtmsg *r = NLMSG_DATA(nlh);
360 if (inet_check_attr(r, rta))
361 return -EINVAL;
363 tb = fib_get_table(r->rtm_table);
364 if (tb)
365 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
366 return -ESRCH;
369 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
371 struct fib_table * tb;
372 struct rtattr **rta = arg;
373 struct rtmsg *r = NLMSG_DATA(nlh);
375 if (inet_check_attr(r, rta))
376 return -EINVAL;
378 tb = fib_new_table(r->rtm_table);
379 if (tb)
380 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
381 return -ENOBUFS;
384 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
386 int t;
387 int s_t;
388 struct fib_table *tb;
390 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
391 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
392 return ip_rt_dump(skb, cb);
394 s_t = cb->args[0];
395 if (s_t == 0)
396 s_t = cb->args[0] = RT_TABLE_MIN;
398 for (t=s_t; t<=RT_TABLE_MAX; t++) {
399 if (t < s_t) continue;
400 if (t > s_t)
401 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
402 if ((tb = fib_get_table(t))==NULL)
403 continue;
404 if (tb->tb_dump(tb, skb, cb) < 0)
405 break;
408 cb->args[0] = t;
410 return skb->len;
413 #endif
415 /* Prepare and feed intra-kernel routing request.
416 Really, it should be netlink message, but :-( netlink
417 can be not configured, so that we feed it directly
418 to fib engine. It is legal, because all events occur
419 only when netlink is already locked.
422 static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
424 struct fib_table * tb;
425 struct {
426 struct nlmsghdr nlh;
427 struct rtmsg rtm;
428 } req;
429 struct kern_rta rta;
431 memset(&req.rtm, 0, sizeof(req.rtm));
432 memset(&rta, 0, sizeof(rta));
434 if (type == RTN_UNICAST)
435 tb = fib_new_table(RT_TABLE_MAIN);
436 else
437 tb = fib_new_table(RT_TABLE_LOCAL);
439 if (tb == NULL)
440 return;
442 req.nlh.nlmsg_len = sizeof(req);
443 req.nlh.nlmsg_type = cmd;
444 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
445 req.nlh.nlmsg_pid = 0;
446 req.nlh.nlmsg_seq = 0;
448 req.rtm.rtm_dst_len = dst_len;
449 req.rtm.rtm_table = tb->tb_id;
450 req.rtm.rtm_protocol = RTPROT_KERNEL;
451 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
452 req.rtm.rtm_type = type;
454 rta.rta_dst = &dst;
455 rta.rta_prefsrc = &ifa->ifa_local;
456 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
458 if (cmd == RTM_NEWROUTE)
459 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
460 else
461 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
464 static void fib_add_ifaddr(struct in_ifaddr *ifa)
466 struct in_device *in_dev = ifa->ifa_dev;
467 struct net_device *dev = in_dev->dev;
468 struct in_ifaddr *prim = ifa;
469 u32 mask = ifa->ifa_mask;
470 u32 addr = ifa->ifa_local;
471 u32 prefix = ifa->ifa_address&mask;
473 if (ifa->ifa_flags&IFA_F_SECONDARY) {
474 prim = inet_ifa_byprefix(in_dev, prefix, mask);
475 if (prim == NULL) {
476 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
477 return;
481 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
483 if (!(dev->flags&IFF_UP))
484 return;
486 /* Add broadcast address, if it is explicitly assigned. */
487 if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
488 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
490 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
491 (prefix != addr || ifa->ifa_prefixlen < 32)) {
492 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
493 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
495 /* Add network specific broadcasts, when it takes a sense */
496 if (ifa->ifa_prefixlen < 31) {
497 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
498 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
503 static void fib_del_ifaddr(struct in_ifaddr *ifa)
505 struct in_device *in_dev = ifa->ifa_dev;
506 struct net_device *dev = in_dev->dev;
507 struct in_ifaddr *ifa1;
508 struct in_ifaddr *prim = ifa;
509 u32 brd = ifa->ifa_address|~ifa->ifa_mask;
510 u32 any = ifa->ifa_address&ifa->ifa_mask;
511 #define LOCAL_OK 1
512 #define BRD_OK 2
513 #define BRD0_OK 4
514 #define BRD1_OK 8
515 unsigned ok = 0;
517 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
518 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
519 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
520 else {
521 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
522 if (prim == NULL) {
523 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
524 return;
528 /* Deletion is more complicated than add.
529 We should take care of not to delete too much :-)
531 Scan address list to be sure that addresses are really gone.
534 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
535 if (ifa->ifa_local == ifa1->ifa_local)
536 ok |= LOCAL_OK;
537 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
538 ok |= BRD_OK;
539 if (brd == ifa1->ifa_broadcast)
540 ok |= BRD1_OK;
541 if (any == ifa1->ifa_broadcast)
542 ok |= BRD0_OK;
545 if (!(ok&BRD_OK))
546 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
547 if (!(ok&BRD1_OK))
548 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
549 if (!(ok&BRD0_OK))
550 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
551 if (!(ok&LOCAL_OK)) {
552 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
554 /* Check, that this local address finally disappeared. */
555 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
556 /* And the last, but not the least thing.
557 We must flush stray FIB entries.
559 First of all, we scan fib_info list searching
560 for stray nexthop entries, then ignite fib_flush.
562 if (fib_sync_down(ifa->ifa_local, NULL, 0))
563 fib_flush();
566 #undef LOCAL_OK
567 #undef BRD_OK
568 #undef BRD0_OK
569 #undef BRD1_OK
572 static void fib_disable_ip(struct net_device *dev, int force)
574 if (fib_sync_down(0, dev, force))
575 fib_flush();
576 rt_cache_flush(0);
577 arp_ifdown(dev);
580 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
582 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
584 switch (event) {
585 case NETDEV_UP:
586 fib_add_ifaddr(ifa);
587 rt_cache_flush(-1);
588 break;
589 case NETDEV_DOWN:
590 if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
591 /* Last address was deleted from this interface.
592 Disable IP.
594 fib_disable_ip(ifa->ifa_dev->dev, 1);
595 } else {
596 fib_del_ifaddr(ifa);
597 rt_cache_flush(-1);
599 break;
601 return NOTIFY_DONE;
604 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
606 struct net_device *dev = ptr;
607 struct in_device *in_dev = __in_dev_get(dev);
609 if (!in_dev)
610 return NOTIFY_DONE;
612 switch (event) {
613 case NETDEV_UP:
614 for_ifa(in_dev) {
615 fib_add_ifaddr(ifa);
616 } endfor_ifa(in_dev);
617 #ifdef CONFIG_IP_ROUTE_MULTIPATH
618 fib_sync_up(dev);
619 #endif
620 rt_cache_flush(-1);
621 break;
622 case NETDEV_DOWN:
623 fib_disable_ip(dev, 0);
624 break;
625 case NETDEV_UNREGISTER:
626 fib_disable_ip(dev, 1);
627 break;
628 case NETDEV_CHANGEMTU:
629 case NETDEV_CHANGE:
630 rt_cache_flush(0);
631 break;
633 return NOTIFY_DONE;
636 struct notifier_block fib_inetaddr_notifier = {
637 fib_inetaddr_event,
638 NULL,
642 struct notifier_block fib_netdev_notifier = {
643 fib_netdev_event,
644 NULL,
648 void __init ip_fib_init(void)
650 #ifdef CONFIG_PROC_FS
651 proc_net_create("route",0,fib_get_procinfo);
652 #endif /* CONFIG_PROC_FS */
654 #ifndef CONFIG_IP_MULTIPLE_TABLES
655 local_table = fib_hash_init(RT_TABLE_LOCAL);
656 main_table = fib_hash_init(RT_TABLE_MAIN);
657 #else
658 fib_rules_init();
659 #endif
661 register_netdevice_notifier(&fib_netdev_notifier);
662 register_inetaddr_notifier(&fib_inetaddr_notifier);