Import 2.3.13
[davej-history.git] / net / ipv4 / fib_frontend.c
blobd57d4daa970bd072335a9f2c5303fca0f82a5195
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Version: $Id: fib_frontend.c,v 1.16 1999/06/09 10:10:42 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/icmp.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
48 #define FFprint(a...) printk(KERN_DEBUG a)
50 #ifndef CONFIG_IP_MULTIPLE_TABLES
52 #define RT_TABLE_MIN RT_TABLE_MAIN
54 struct fib_table *local_table;
55 struct fib_table *main_table;
57 #else
59 #define RT_TABLE_MIN 1
61 struct fib_table *fib_tables[RT_TABLE_MAX+1];
63 struct fib_table *__fib_new_table(int id)
65 struct fib_table *tb;
67 tb = fib_hash_init(id);
68 if (!tb)
69 return NULL;
70 fib_tables[id] = tb;
71 return tb;
75 #endif /* CONFIG_IP_MULTIPLE_TABLES */
78 void fib_flush(void)
80 int flushed = 0;
81 #ifdef CONFIG_IP_MULTIPLE_TABLES
82 struct fib_table *tb;
83 int id;
85 for (id = RT_TABLE_MAX; id>0; id--) {
86 if ((tb = fib_get_table(id))==NULL)
87 continue;
88 flushed += tb->tb_flush(tb);
90 #else /* CONFIG_IP_MULTIPLE_TABLES */
91 flushed += main_table->tb_flush(main_table);
92 flushed += local_table->tb_flush(local_table);
93 #endif /* CONFIG_IP_MULTIPLE_TABLES */
95 if (flushed)
96 rt_cache_flush(-1);
100 #ifdef CONFIG_PROC_FS
103 * Called from the PROCfs module. This outputs /proc/net/route.
105 * It always works in backward compatibility mode.
106 * The format of the file is not supposed to be changed.
109 static int
110 fib_get_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
112 int first = offset/128;
113 char *ptr = buffer;
114 int count = (length+127)/128;
115 int len;
117 *start = buffer + offset%128;
119 if (--first < 0) {
120 sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
121 --count;
122 ptr += 128;
123 first = 0;
126 if (main_table && count > 0) {
127 int n = main_table->tb_get_info(main_table, ptr, first, count);
128 count -= n;
129 ptr += n*128;
131 len = ptr - *start;
132 if (len >= length)
133 return length;
134 if (len >= 0)
135 return len;
136 return 0;
139 #endif /* CONFIG_PROC_FS */
142 * Find the first device with a given source address.
145 struct device * ip_dev_find(u32 addr)
147 struct rt_key key;
148 struct fib_result res;
150 memset(&key, 0, sizeof(key));
151 key.dst = addr;
153 if (!local_table || local_table->tb_lookup(local_table, &key, &res)
154 || res.type != RTN_LOCAL)
155 return NULL;
157 return FIB_RES_DEV(res);
160 unsigned inet_addr_type(u32 addr)
162 struct rt_key key;
163 struct fib_result res;
165 if (ZERONET(addr) || BADCLASS(addr))
166 return RTN_BROADCAST;
167 if (MULTICAST(addr))
168 return RTN_MULTICAST;
170 memset(&key, 0, sizeof(key));
171 key.dst = addr;
173 if (local_table) {
174 if (local_table->tb_lookup(local_table, &key, &res) == 0)
175 return res.type;
176 return RTN_UNICAST;
178 return RTN_BROADCAST;
181 /* Given (packet source, input interface) and optional (dst, oif, tos):
182 - (main) check, that source is valid i.e. not broadcast or our local
183 address.
184 - figure out what "logical" interface this packet arrived
185 and calculate "specific destination" address.
186 - check, that packet arrived from expected physical interface.
189 int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
190 struct device *dev, u32 *spec_dst, u32 *itag)
192 struct in_device *in_dev = dev->ip_ptr;
193 struct rt_key key;
194 struct fib_result res;
196 key.dst = src;
197 key.src = dst;
198 key.tos = tos;
199 key.oif = 0;
200 key.iif = oif;
201 key.scope = RT_SCOPE_UNIVERSE;
203 if (in_dev == NULL)
204 return -EINVAL;
205 if (fib_lookup(&key, &res))
206 goto last_resort;
207 if (res.type != RTN_UNICAST)
208 return -EINVAL;
209 *spec_dst = FIB_RES_PREFSRC(res);
210 if (itag)
211 fib_combine_itag(itag, &res);
212 #ifdef CONFIG_IP_ROUTE_MULTIPATH
213 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
214 #else
215 if (FIB_RES_DEV(res) == dev)
216 #endif
217 return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
219 if (in_dev->ifa_list == NULL)
220 goto last_resort;
221 if (IN_DEV_RPFILTER(in_dev))
222 return -EINVAL;
223 key.oif = dev->ifindex;
224 if (fib_lookup(&key, &res) == 0 && res.type == RTN_UNICAST) {
225 *spec_dst = FIB_RES_PREFSRC(res);
226 return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
228 return 0;
230 last_resort:
231 if (IN_DEV_RPFILTER(in_dev))
232 return -EINVAL;
233 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
234 *itag = 0;
235 return 0;
238 #ifndef CONFIG_IP_NOSIOCRT
241 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
244 int ip_rt_ioctl(unsigned int cmd, void *arg)
246 int err;
247 struct kern_rta rta;
248 struct rtentry r;
249 struct {
250 struct nlmsghdr nlh;
251 struct rtmsg rtm;
252 } req;
254 switch (cmd) {
255 case SIOCADDRT: /* Add a route */
256 case SIOCDELRT: /* Delete a route */
257 if (!capable(CAP_NET_ADMIN))
258 return -EPERM;
259 if (copy_from_user(&r, arg, sizeof(struct rtentry)))
260 return -EFAULT;
261 rtnl_lock();
262 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
263 if (err == 0) {
264 if (cmd == SIOCDELRT) {
265 struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
266 err = -ESRCH;
267 if (tb)
268 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
269 } else {
270 struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
271 err = -ENOBUFS;
272 if (tb)
273 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
275 if (rta.rta_mx)
276 kfree(rta.rta_mx);
278 rtnl_unlock();
279 return err;
281 return -EINVAL;
284 #else
286 int ip_rt_ioctl(unsigned int cmd, void *arg)
288 return -EINVAL;
291 #endif
293 #ifdef CONFIG_RTNETLINK
295 static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
297 int i;
299 for (i=1; i<=RTA_MAX; i++) {
300 struct rtattr *attr = rta[i-1];
301 if (attr) {
302 if (RTA_PAYLOAD(attr) < 4)
303 return -EINVAL;
304 if (i != RTA_MULTIPATH && i != RTA_METRICS)
305 rta[i-1] = (struct rtattr*)RTA_DATA(attr);
308 return 0;
311 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
313 struct fib_table * tb;
314 struct rtattr **rta = arg;
315 struct rtmsg *r = NLMSG_DATA(nlh);
317 if (inet_check_attr(r, rta))
318 return -EINVAL;
320 tb = fib_get_table(r->rtm_table);
321 if (tb)
322 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
323 return -ESRCH;
326 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
328 struct fib_table * tb;
329 struct rtattr **rta = arg;
330 struct rtmsg *r = NLMSG_DATA(nlh);
332 if (inet_check_attr(r, rta))
333 return -EINVAL;
335 tb = fib_new_table(r->rtm_table);
336 if (tb)
337 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
338 return -ENOBUFS;
341 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
343 int t;
344 int s_t;
345 struct fib_table *tb;
347 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
348 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
349 return ip_rt_dump(skb, cb);
351 s_t = cb->args[0];
352 if (s_t == 0)
353 s_t = cb->args[0] = RT_TABLE_MIN;
355 for (t=s_t; t<=RT_TABLE_MAX; t++) {
356 if (t < s_t) continue;
357 if (t > s_t)
358 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
359 if ((tb = fib_get_table(t))==NULL)
360 continue;
361 if (tb->tb_dump(tb, skb, cb) < 0)
362 break;
365 cb->args[0] = t;
367 return skb->len;
370 #endif
372 /* Prepare and feed intra-kernel routing request.
373 Really, it should be netlink message, but :-( netlink
374 can be not configured, so that we feed it directly
375 to fib engine. It is legal, because all events occur
376 only when netlink is already locked.
379 static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
381 struct fib_table * tb;
382 struct {
383 struct nlmsghdr nlh;
384 struct rtmsg rtm;
385 } req;
386 struct kern_rta rta;
388 memset(&req.rtm, 0, sizeof(req.rtm));
389 memset(&rta, 0, sizeof(rta));
391 if (type == RTN_UNICAST)
392 tb = fib_new_table(RT_TABLE_MAIN);
393 else
394 tb = fib_new_table(RT_TABLE_LOCAL);
396 if (tb == NULL)
397 return;
399 req.nlh.nlmsg_len = sizeof(req);
400 req.nlh.nlmsg_type = cmd;
401 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
402 req.nlh.nlmsg_pid = 0;
403 req.nlh.nlmsg_seq = 0;
405 req.rtm.rtm_dst_len = dst_len;
406 req.rtm.rtm_table = tb->tb_id;
407 req.rtm.rtm_protocol = RTPROT_KERNEL;
408 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
409 req.rtm.rtm_type = type;
411 rta.rta_dst = &dst;
412 rta.rta_prefsrc = &ifa->ifa_local;
413 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
415 if (cmd == RTM_NEWROUTE)
416 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
417 else
418 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
421 static void fib_add_ifaddr(struct in_ifaddr *ifa)
423 struct in_device *in_dev = ifa->ifa_dev;
424 struct device *dev = in_dev->dev;
425 struct in_ifaddr *prim = ifa;
426 u32 mask = ifa->ifa_mask;
427 u32 addr = ifa->ifa_local;
428 u32 prefix = ifa->ifa_address&mask;
430 if (ifa->ifa_flags&IFA_F_SECONDARY) {
431 prim = inet_ifa_byprefix(in_dev, prefix, mask);
432 if (prim == NULL) {
433 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
434 return;
438 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
440 if (!(dev->flags&IFF_UP))
441 return;
443 /* Add broadcast address, if it is explicitly assigned. */
444 if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
445 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
447 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
448 (prefix != addr || ifa->ifa_prefixlen < 32)) {
449 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
450 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
452 /* Add network specific broadcasts, when it takes a sense */
453 if (ifa->ifa_prefixlen < 31) {
454 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
455 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
460 static void fib_del_ifaddr(struct in_ifaddr *ifa)
462 struct in_device *in_dev = ifa->ifa_dev;
463 struct device *dev = in_dev->dev;
464 struct in_ifaddr *ifa1;
465 struct in_ifaddr *prim = ifa;
466 u32 brd = ifa->ifa_address|~ifa->ifa_mask;
467 u32 any = ifa->ifa_address&ifa->ifa_mask;
468 #define LOCAL_OK 1
469 #define BRD_OK 2
470 #define BRD0_OK 4
471 #define BRD1_OK 8
472 unsigned ok = 0;
474 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
475 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
476 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
477 else {
478 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
479 if (prim == NULL) {
480 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
481 return;
485 /* Deletion is more complicated than add.
486 We should take care of not to delete too much :-)
488 Scan address list to be sure that addresses are really gone.
491 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
492 if (ifa->ifa_local == ifa1->ifa_local)
493 ok |= LOCAL_OK;
494 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
495 ok |= BRD_OK;
496 if (brd == ifa1->ifa_broadcast)
497 ok |= BRD1_OK;
498 if (any == ifa1->ifa_broadcast)
499 ok |= BRD0_OK;
502 if (!(ok&BRD_OK))
503 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
504 if (!(ok&BRD1_OK))
505 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
506 if (!(ok&BRD0_OK))
507 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
508 if (!(ok&LOCAL_OK)) {
509 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
511 /* Check, that this local address finally disappeared. */
512 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
513 /* And the last, but not the least thing.
514 We must flush stray FIB entries.
516 First of all, we scan fib_info list searching
517 for stray nexthop entries, then ignite fib_flush.
519 if (fib_sync_down(ifa->ifa_local, NULL, 0))
520 fib_flush();
523 #undef LOCAL_OK
524 #undef BRD_OK
525 #undef BRD0_OK
526 #undef BRD1_OK
529 static void fib_disable_ip(struct device *dev, int force)
531 if (fib_sync_down(0, dev, force))
532 fib_flush();
533 rt_cache_flush(0);
534 arp_ifdown(dev);
537 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
539 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
541 switch (event) {
542 case NETDEV_UP:
543 fib_add_ifaddr(ifa);
544 rt_cache_flush(-1);
545 break;
546 case NETDEV_DOWN:
547 if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
548 /* Last address was deleted from this interface.
549 Disable IP.
551 fib_disable_ip(ifa->ifa_dev->dev, 1);
552 } else {
553 fib_del_ifaddr(ifa);
554 rt_cache_flush(-1);
556 break;
558 return NOTIFY_DONE;
561 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
563 struct device *dev = ptr;
564 struct in_device *in_dev = dev->ip_ptr;
566 if (!in_dev)
567 return NOTIFY_DONE;
569 switch (event) {
570 case NETDEV_UP:
571 for_ifa(in_dev) {
572 fib_add_ifaddr(ifa);
573 } endfor_ifa(in_dev);
574 #ifdef CONFIG_IP_ROUTE_MULTIPATH
575 fib_sync_up(dev);
576 #endif
577 rt_cache_flush(-1);
578 break;
579 case NETDEV_DOWN:
580 fib_disable_ip(dev, 0);
581 break;
582 case NETDEV_UNREGISTER:
583 fib_disable_ip(dev, 1);
584 break;
585 case NETDEV_CHANGEMTU:
586 case NETDEV_CHANGE:
587 rt_cache_flush(0);
588 break;
590 return NOTIFY_DONE;
593 struct notifier_block fib_inetaddr_notifier = {
594 fib_inetaddr_event,
595 NULL,
599 struct notifier_block fib_netdev_notifier = {
600 fib_netdev_event,
601 NULL,
605 __initfunc(void ip_fib_init(void))
607 #ifdef CONFIG_PROC_FS
608 proc_net_register(&(struct proc_dir_entry) {
609 PROC_NET_ROUTE, 5, "route",
610 S_IFREG | S_IRUGO, 1, 0, 0,
611 0, &proc_net_inode_operations,
612 fib_get_procinfo
614 #endif /* CONFIG_PROC_FS */
616 #ifndef CONFIG_IP_MULTIPLE_TABLES
617 local_table = fib_hash_init(RT_TABLE_LOCAL);
618 main_table = fib_hash_init(RT_TABLE_MAIN);
619 #else
620 fib_rules_init();
621 #endif
623 register_netdevice_notifier(&fib_netdev_notifier);
624 register_inetaddr_notifier(&fib_inetaddr_notifier);