Ok. I didn't make 2.4.0 in 2000. Tough. I tried, but we had some
[davej-history.git] / net / ipv4 / fib_semantics.c
blobbc8de34968369d30f3e16392eb3ac26ef9b54418
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.17 2000/08/19 23:22:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
46 #define FSprintk(a...)
48 static struct fib_info *fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
50 int fib_info_cnt;
52 #define for_fib_info() { struct fib_info *fi; \
53 for (fi = fib_info_list; fi; fi = fi->fib_next)
55 #define endfor_fib_info() }
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
59 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
60 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
62 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
63 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
65 #else /* CONFIG_IP_ROUTE_MULTIPATH */
67 /* Hope, that gcc will optimize it to get rid of dummy loop */
69 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
70 for (nhsel=0; nhsel < 1; nhsel++)
72 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
73 for (nhsel=0; nhsel < 1; nhsel++)
75 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
77 #define endfor_nexthops(fi) }
80 static struct
82 int error;
83 u8 scope;
84 } fib_props[RTA_MAX+1] = {
85 { 0, RT_SCOPE_NOWHERE}, /* RTN_UNSPEC */
86 { 0, RT_SCOPE_UNIVERSE}, /* RTN_UNICAST */
87 { 0, RT_SCOPE_HOST}, /* RTN_LOCAL */
88 { 0, RT_SCOPE_LINK}, /* RTN_BROADCAST */
89 { 0, RT_SCOPE_LINK}, /* RTN_ANYCAST */
90 { 0, RT_SCOPE_UNIVERSE}, /* RTN_MULTICAST */
91 { -EINVAL, RT_SCOPE_UNIVERSE}, /* RTN_BLACKHOLE */
92 { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
93 { -EACCES, RT_SCOPE_UNIVERSE}, /* RTN_PROHIBIT */
94 { -EAGAIN, RT_SCOPE_UNIVERSE}, /* RTN_THROW */
95 #ifdef CONFIG_IP_ROUTE_NAT
96 { 0, RT_SCOPE_HOST}, /* RTN_NAT */
97 #else
98 { -EINVAL, RT_SCOPE_NOWHERE}, /* RTN_NAT */
99 #endif
100 { -EINVAL, RT_SCOPE_NOWHERE} /* RTN_XRESOLVE */
104 /* Release a nexthop info record */
106 void free_fib_info(struct fib_info *fi)
108 if (fi->fib_dead == 0) {
109 printk("Freeing alive fib_info %p\n", fi);
110 return;
112 change_nexthops(fi) {
113 if (nh->nh_dev)
114 dev_put(nh->nh_dev);
115 nh->nh_dev = NULL;
116 } endfor_nexthops(fi);
117 fib_info_cnt--;
118 kfree(fi);
121 void fib_release_info(struct fib_info *fi)
123 write_lock(&fib_info_lock);
124 if (fi && --fi->fib_treeref == 0) {
125 if (fi->fib_next)
126 fi->fib_next->fib_prev = fi->fib_prev;
127 if (fi->fib_prev)
128 fi->fib_prev->fib_next = fi->fib_next;
129 if (fi == fib_info_list)
130 fib_info_list = fi->fib_next;
131 fi->fib_dead = 1;
132 fib_info_put(fi);
134 write_unlock(&fib_info_lock);
137 extern __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
139 const struct fib_nh *onh = ofi->fib_nh;
141 for_nexthops(fi) {
142 if (nh->nh_oif != onh->nh_oif ||
143 nh->nh_gw != onh->nh_gw ||
144 nh->nh_scope != onh->nh_scope ||
145 #ifdef CONFIG_IP_ROUTE_MULTIPATH
146 nh->nh_weight != onh->nh_weight ||
147 #endif
148 #ifdef CONFIG_NET_CLS_ROUTE
149 nh->nh_tclassid != onh->nh_tclassid ||
150 #endif
151 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
152 return -1;
153 onh++;
154 } endfor_nexthops(fi);
155 return 0;
158 extern __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
160 for_fib_info() {
161 if (fi->fib_nhs != nfi->fib_nhs)
162 continue;
163 if (nfi->fib_protocol == fi->fib_protocol &&
164 nfi->fib_prefsrc == fi->fib_prefsrc &&
165 nfi->fib_priority == fi->fib_priority &&
166 memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
167 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
168 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
169 return fi;
170 } endfor_fib_info();
171 return NULL;
174 /* Check, that the gateway is already configured.
175 Used only by redirect accept routine.
178 int ip_fib_check_default(u32 gw, struct net_device *dev)
180 read_lock(&fib_info_lock);
181 for_fib_info() {
182 if (fi->fib_flags & RTNH_F_DEAD)
183 continue;
184 for_nexthops(fi) {
185 if (nh->nh_dev == dev && nh->nh_gw == gw &&
186 !(nh->nh_flags&RTNH_F_DEAD)) {
187 read_unlock(&fib_info_lock);
188 return 0;
190 } endfor_nexthops(fi);
191 } endfor_fib_info();
192 read_unlock(&fib_info_lock);
193 return -1;
196 #ifdef CONFIG_IP_ROUTE_MULTIPATH
198 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
200 while (RTA_OK(attr,attrlen)) {
201 if (attr->rta_type == type)
202 return *(u32*)RTA_DATA(attr);
203 attr = RTA_NEXT(attr, attrlen);
205 return 0;
208 static int
209 fib_count_nexthops(struct rtattr *rta)
211 int nhs = 0;
212 struct rtnexthop *nhp = RTA_DATA(rta);
213 int nhlen = RTA_PAYLOAD(rta);
215 while (nhlen >= (int)sizeof(struct rtnexthop)) {
216 if ((nhlen -= nhp->rtnh_len) < 0)
217 return 0;
218 nhs++;
219 nhp = RTNH_NEXT(nhp);
221 return nhs;
224 static int
225 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
227 struct rtnexthop *nhp = RTA_DATA(rta);
228 int nhlen = RTA_PAYLOAD(rta);
230 change_nexthops(fi) {
231 int attrlen = nhlen - sizeof(struct rtnexthop);
232 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
233 return -EINVAL;
234 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
235 nh->nh_oif = nhp->rtnh_ifindex;
236 nh->nh_weight = nhp->rtnh_hops + 1;
237 if (attrlen) {
238 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
239 #ifdef CONFIG_NET_CLS_ROUTE
240 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
241 #endif
243 nhp = RTNH_NEXT(nhp);
244 } endfor_nexthops(fi);
245 return 0;
248 #endif
250 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
251 struct fib_info *fi)
253 #ifdef CONFIG_IP_ROUTE_MULTIPATH
254 struct rtnexthop *nhp;
255 int nhlen;
256 #endif
258 if (rta->rta_priority &&
259 *rta->rta_priority != fi->fib_priority)
260 return 1;
262 if (rta->rta_oif || rta->rta_gw) {
263 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
264 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
265 return 0;
266 return 1;
269 #ifdef CONFIG_IP_ROUTE_MULTIPATH
270 if (rta->rta_mp == NULL)
271 return 0;
272 nhp = RTA_DATA(rta->rta_mp);
273 nhlen = RTA_PAYLOAD(rta->rta_mp);
275 for_nexthops(fi) {
276 int attrlen = nhlen - sizeof(struct rtnexthop);
277 u32 gw;
279 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
280 return -EINVAL;
281 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
282 return 1;
283 if (attrlen) {
284 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
285 if (gw && gw != nh->nh_gw)
286 return 1;
287 #ifdef CONFIG_NET_CLS_ROUTE
288 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
289 if (gw && gw != nh->nh_tclassid)
290 return 1;
291 #endif
293 nhp = RTNH_NEXT(nhp);
294 } endfor_nexthops(fi);
295 #endif
296 return 0;
301 Picture
302 -------
304 Semantics of nexthop is very messy by historical reasons.
305 We have to take into account, that:
306 a) gateway can be actually local interface address,
307 so that gatewayed route is direct.
308 b) gateway must be on-link address, possibly
309 described not by an ifaddr, but also by a direct route.
310 c) If both gateway and interface are specified, they should not
311 contradict.
312 d) If we use tunnel routes, gateway could be not on-link.
314 Attempt to reconcile all of these (alas, self-contradictory) conditions
315 results in pretty ugly and hairy code with obscure logic.
317 I choosed to generalized it instead, so that the size
318 of code does not increase practically, but it becomes
319 much more general.
320 Every prefix is assigned a "scope" value: "host" is local address,
321 "link" is direct route,
322 [ ... "site" ... "interior" ... ]
323 and "universe" is true gateway route with global meaning.
325 Every prefix refers to a set of "nexthop"s (gw, oif),
326 where gw must have narrower scope. This recursion stops
327 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
328 which means that gw is forced to be on link.
330 Code is still hairy, but now it is apparently logically
331 consistent and very flexible. F.e. as by-product it allows
332 to co-exists in peace independent exterior and interior
333 routing processes.
335 Normally it looks as following.
337 {universe prefix} -> (gw, oif) [scope link]
339 |-> {link prefix} -> (gw, oif) [scope local]
341 |-> {local prefix} (terminal node)
344 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
346 int err;
348 if (nh->nh_gw) {
349 struct rt_key key;
350 struct fib_result res;
352 #ifdef CONFIG_IP_ROUTE_PERVASIVE
353 if (nh->nh_flags&RTNH_F_PERVASIVE)
354 return 0;
355 #endif
356 if (nh->nh_flags&RTNH_F_ONLINK) {
357 struct net_device *dev;
359 if (r->rtm_scope >= RT_SCOPE_LINK)
360 return -EINVAL;
361 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
362 return -EINVAL;
363 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
364 return -ENODEV;
365 if (!(dev->flags&IFF_UP))
366 return -ENETDOWN;
367 nh->nh_dev = dev;
368 atomic_inc(&dev->refcnt);
369 nh->nh_scope = RT_SCOPE_LINK;
370 return 0;
372 memset(&key, 0, sizeof(key));
373 key.dst = nh->nh_gw;
374 key.oif = nh->nh_oif;
375 key.scope = r->rtm_scope + 1;
377 /* It is not necessary, but requires a bit of thinking */
378 if (key.scope < RT_SCOPE_LINK)
379 key.scope = RT_SCOPE_LINK;
381 if ((err = fib_lookup(&key, &res)) != 0)
382 return err;
383 nh->nh_scope = res.scope;
384 nh->nh_oif = FIB_RES_OIF(res);
385 nh->nh_dev = FIB_RES_DEV(res);
386 if (nh->nh_dev)
387 atomic_inc(&nh->nh_dev->refcnt);
388 fib_res_put(&res);
389 } else {
390 struct in_device *in_dev;
392 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
393 return -EINVAL;
395 in_dev = inetdev_by_index(nh->nh_oif);
396 if (in_dev == NULL)
397 return -ENODEV;
398 if (!(in_dev->dev->flags&IFF_UP)) {
399 in_dev_put(in_dev);
400 return -ENETDOWN;
402 nh->nh_dev = in_dev->dev;
403 atomic_inc(&nh->nh_dev->refcnt);
404 nh->nh_scope = RT_SCOPE_HOST;
405 in_dev_put(in_dev);
407 return 0;
410 struct fib_info *
411 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
412 const struct nlmsghdr *nlh, int *errp)
414 int err;
415 struct fib_info *fi = NULL;
416 struct fib_info *ofi;
417 #ifdef CONFIG_IP_ROUTE_MULTIPATH
418 int nhs = 1;
419 #else
420 const int nhs = 1;
421 #endif
423 /* Fast check to catch the most weird cases */
424 if (fib_props[r->rtm_type].scope > r->rtm_scope)
425 goto err_inval;
427 #ifdef CONFIG_IP_ROUTE_MULTIPATH
428 if (rta->rta_mp) {
429 nhs = fib_count_nexthops(rta->rta_mp);
430 if (nhs == 0)
431 goto err_inval;
433 #endif
435 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
436 err = -ENOBUFS;
437 if (fi == NULL)
438 goto failure;
439 fib_info_cnt++;
440 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
442 fi->fib_protocol = r->rtm_protocol;
443 fi->fib_nhs = nhs;
444 fi->fib_flags = r->rtm_flags;
445 if (rta->rta_priority)
446 fi->fib_priority = *rta->rta_priority;
447 if (rta->rta_mx) {
448 int attrlen = RTA_PAYLOAD(rta->rta_mx);
449 struct rtattr *attr = RTA_DATA(rta->rta_mx);
451 while (RTA_OK(attr, attrlen)) {
452 unsigned flavor = attr->rta_type;
453 if (flavor) {
454 if (flavor > RTAX_MAX)
455 goto err_inval;
456 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
458 attr = RTA_NEXT(attr, attrlen);
461 if (rta->rta_prefsrc)
462 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
464 if (rta->rta_mp) {
465 #ifdef CONFIG_IP_ROUTE_MULTIPATH
466 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
467 goto failure;
468 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
469 goto err_inval;
470 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
471 goto err_inval;
472 #ifdef CONFIG_NET_CLS_ROUTE
473 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
474 goto err_inval;
475 #endif
476 #else
477 goto err_inval;
478 #endif
479 } else {
480 struct fib_nh *nh = fi->fib_nh;
481 if (rta->rta_oif)
482 nh->nh_oif = *rta->rta_oif;
483 if (rta->rta_gw)
484 memcpy(&nh->nh_gw, rta->rta_gw, 4);
485 #ifdef CONFIG_NET_CLS_ROUTE
486 if (rta->rta_flow)
487 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
488 #endif
489 nh->nh_flags = r->rtm_flags;
490 #ifdef CONFIG_IP_ROUTE_MULTIPATH
491 nh->nh_weight = 1;
492 #endif
495 #ifdef CONFIG_IP_ROUTE_NAT
496 if (r->rtm_type == RTN_NAT) {
497 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
498 goto err_inval;
499 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
500 goto link_it;
502 #endif
504 if (fib_props[r->rtm_type].error) {
505 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
506 goto err_inval;
507 goto link_it;
510 if (r->rtm_scope > RT_SCOPE_HOST)
511 goto err_inval;
513 if (r->rtm_scope == RT_SCOPE_HOST) {
514 struct fib_nh *nh = fi->fib_nh;
516 /* Local address is added. */
517 if (nhs != 1 || nh->nh_gw)
518 goto err_inval;
519 nh->nh_scope = RT_SCOPE_NOWHERE;
520 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
521 err = -ENODEV;
522 if (nh->nh_dev == NULL)
523 goto failure;
524 } else {
525 change_nexthops(fi) {
526 if ((err = fib_check_nh(r, fi, nh)) != 0)
527 goto failure;
528 } endfor_nexthops(fi)
531 if (fi->fib_prefsrc) {
532 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
533 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
534 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
535 goto err_inval;
538 link_it:
539 if ((ofi = fib_find_info(fi)) != NULL) {
540 fi->fib_dead = 1;
541 free_fib_info(fi);
542 ofi->fib_treeref++;
543 return ofi;
546 fi->fib_treeref++;
547 atomic_inc(&fi->fib_clntref);
548 write_lock(&fib_info_lock);
549 fi->fib_next = fib_info_list;
550 fi->fib_prev = NULL;
551 if (fib_info_list)
552 fib_info_list->fib_prev = fi;
553 fib_info_list = fi;
554 write_unlock(&fib_info_lock);
555 return fi;
557 err_inval:
558 err = -EINVAL;
560 failure:
561 *errp = err;
562 if (fi) {
563 fi->fib_dead = 1;
564 free_fib_info(fi);
566 return NULL;
569 int
570 fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
572 int err = fib_props[type].error;
574 if (err == 0) {
575 if (fi->fib_flags&RTNH_F_DEAD)
576 return 1;
578 res->fi = fi;
580 switch (type) {
581 #ifdef CONFIG_IP_ROUTE_NAT
582 case RTN_NAT:
583 FIB_RES_RESET(*res);
584 atomic_inc(&fi->fib_clntref);
585 return 0;
586 #endif
587 case RTN_UNICAST:
588 case RTN_LOCAL:
589 case RTN_BROADCAST:
590 case RTN_ANYCAST:
591 case RTN_MULTICAST:
592 for_nexthops(fi) {
593 if (nh->nh_flags&RTNH_F_DEAD)
594 continue;
595 if (!key->oif || key->oif == nh->nh_oif)
596 break;
598 #ifdef CONFIG_IP_ROUTE_MULTIPATH
599 if (nhsel < fi->fib_nhs) {
600 res->nh_sel = nhsel;
601 atomic_inc(&fi->fib_clntref);
602 return 0;
604 #else
605 if (nhsel < 1) {
606 atomic_inc(&fi->fib_clntref);
607 return 0;
609 #endif
610 endfor_nexthops(fi);
611 res->fi = NULL;
612 return 1;
613 default:
614 res->fi = NULL;
615 printk(KERN_DEBUG "impossible 102\n");
616 return -EINVAL;
619 return err;
622 /* Find appropriate source address to this destination */
624 u32 __fib_res_prefsrc(struct fib_result *res)
626 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
629 #ifdef CONFIG_RTNETLINK
632 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
633 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
634 struct fib_info *fi)
636 struct rtmsg *rtm;
637 struct nlmsghdr *nlh;
638 unsigned char *b = skb->tail;
640 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
641 rtm = NLMSG_DATA(nlh);
642 rtm->rtm_family = AF_INET;
643 rtm->rtm_dst_len = dst_len;
644 rtm->rtm_src_len = 0;
645 rtm->rtm_tos = tos;
646 rtm->rtm_table = tb_id;
647 rtm->rtm_type = type;
648 rtm->rtm_flags = fi->fib_flags;
649 rtm->rtm_scope = scope;
650 if (rtm->rtm_dst_len)
651 RTA_PUT(skb, RTA_DST, 4, dst);
652 rtm->rtm_protocol = fi->fib_protocol;
653 if (fi->fib_priority)
654 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
655 #ifdef CONFIG_NET_CLS_ROUTE
656 if (fi->fib_nh[0].nh_tclassid)
657 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
658 #endif
659 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
660 goto rtattr_failure;
661 if (fi->fib_prefsrc)
662 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
663 if (fi->fib_nhs == 1) {
664 if (fi->fib_nh->nh_gw)
665 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
666 if (fi->fib_nh->nh_oif)
667 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
669 #ifdef CONFIG_IP_ROUTE_MULTIPATH
670 if (fi->fib_nhs > 1) {
671 struct rtnexthop *nhp;
672 struct rtattr *mp_head;
673 if (skb_tailroom(skb) <= RTA_SPACE(0))
674 goto rtattr_failure;
675 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
677 for_nexthops(fi) {
678 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
679 goto rtattr_failure;
680 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
681 nhp->rtnh_flags = nh->nh_flags & 0xFF;
682 nhp->rtnh_hops = nh->nh_weight-1;
683 nhp->rtnh_ifindex = nh->nh_oif;
684 if (nh->nh_gw)
685 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
686 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
687 } endfor_nexthops(fi);
688 mp_head->rta_type = RTA_MULTIPATH;
689 mp_head->rta_len = skb->tail - (u8*)mp_head;
691 #endif
692 nlh->nlmsg_len = skb->tail - b;
693 return skb->len;
695 nlmsg_failure:
696 rtattr_failure:
697 skb_trim(skb, b - skb->data);
698 return -1;
701 #endif /* CONFIG_RTNETLINK */
703 #ifndef CONFIG_IP_NOSIOCRT
706 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
707 struct kern_rta *rta, struct rtentry *r)
709 int plen;
710 u32 *ptr;
712 memset(rtm, 0, sizeof(*rtm));
713 memset(rta, 0, sizeof(*rta));
715 if (r->rt_dst.sa_family != AF_INET)
716 return -EAFNOSUPPORT;
718 /* Check mask for validity:
719 a) it must be contiguous.
720 b) destination must have all host bits clear.
721 c) if application forgot to set correct family (AF_INET),
722 reject request unless it is absolutely clear i.e.
723 both family and mask are zero.
725 plen = 32;
726 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
727 if (!(r->rt_flags&RTF_HOST)) {
728 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
729 if (r->rt_genmask.sa_family != AF_INET) {
730 if (mask || r->rt_genmask.sa_family)
731 return -EAFNOSUPPORT;
733 if (bad_mask(mask, *ptr))
734 return -EINVAL;
735 plen = inet_mask_len(mask);
738 nl->nlmsg_flags = NLM_F_REQUEST;
739 nl->nlmsg_pid = 0;
740 nl->nlmsg_seq = 0;
741 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
742 if (cmd == SIOCDELRT) {
743 nl->nlmsg_type = RTM_DELROUTE;
744 nl->nlmsg_flags = 0;
745 } else {
746 nl->nlmsg_type = RTM_NEWROUTE;
747 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
748 rtm->rtm_protocol = RTPROT_BOOT;
751 rtm->rtm_dst_len = plen;
752 rta->rta_dst = ptr;
754 if (r->rt_metric) {
755 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
756 rta->rta_priority = (u32*)&r->rt_pad3;
758 if (r->rt_flags&RTF_REJECT) {
759 rtm->rtm_scope = RT_SCOPE_HOST;
760 rtm->rtm_type = RTN_UNREACHABLE;
761 return 0;
763 rtm->rtm_scope = RT_SCOPE_NOWHERE;
764 rtm->rtm_type = RTN_UNICAST;
766 if (r->rt_dev) {
767 char *colon;
768 struct net_device *dev;
769 char devname[IFNAMSIZ];
771 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
772 return -EFAULT;
773 devname[IFNAMSIZ-1] = 0;
774 colon = strchr(devname, ':');
775 if (colon)
776 *colon = 0;
777 dev = __dev_get_by_name(devname);
778 if (!dev)
779 return -ENODEV;
780 rta->rta_oif = &dev->ifindex;
781 if (colon) {
782 struct in_ifaddr *ifa;
783 struct in_device *in_dev = __in_dev_get(dev);
784 if (!in_dev)
785 return -ENODEV;
786 *colon = ':';
787 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
788 if (strcmp(ifa->ifa_label, devname) == 0)
789 break;
790 if (ifa == NULL)
791 return -ENODEV;
792 rta->rta_prefsrc = &ifa->ifa_local;
796 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
797 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
798 rta->rta_gw = ptr;
799 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
800 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
803 if (cmd == SIOCDELRT)
804 return 0;
806 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
807 return -EINVAL;
809 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
810 rtm->rtm_scope = RT_SCOPE_LINK;
812 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
813 struct rtattr *rec;
814 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
815 if (mx == NULL)
816 return -ENOMEM;
817 rta->rta_mx = mx;
818 mx->rta_type = RTA_METRICS;
819 mx->rta_len = RTA_LENGTH(0);
820 if (r->rt_flags&RTF_MTU) {
821 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
822 rec->rta_type = RTAX_ADVMSS;
823 rec->rta_len = RTA_LENGTH(4);
824 mx->rta_len += RTA_LENGTH(4);
825 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
827 if (r->rt_flags&RTF_WINDOW) {
828 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
829 rec->rta_type = RTAX_WINDOW;
830 rec->rta_len = RTA_LENGTH(4);
831 mx->rta_len += RTA_LENGTH(4);
832 *(u32*)RTA_DATA(rec) = r->rt_window;
834 if (r->rt_flags&RTF_IRTT) {
835 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
836 rec->rta_type = RTAX_RTT;
837 rec->rta_len = RTA_LENGTH(4);
838 mx->rta_len += RTA_LENGTH(4);
839 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
842 return 0;
845 #endif
848 Update FIB if:
849 - local address disappeared -> we must delete all the entries
850 referring to it.
851 - device went down -> we must shutdown all nexthops going via it.
854 int fib_sync_down(u32 local, struct net_device *dev, int force)
856 int ret = 0;
857 int scope = RT_SCOPE_NOWHERE;
859 if (force)
860 scope = -1;
862 for_fib_info() {
863 if (local && fi->fib_prefsrc == local) {
864 fi->fib_flags |= RTNH_F_DEAD;
865 ret++;
866 } else if (dev && fi->fib_nhs) {
867 int dead = 0;
869 change_nexthops(fi) {
870 if (nh->nh_flags&RTNH_F_DEAD)
871 dead++;
872 else if (nh->nh_dev == dev &&
873 nh->nh_scope != scope) {
874 nh->nh_flags |= RTNH_F_DEAD;
875 #ifdef CONFIG_IP_ROUTE_MULTIPATH
876 fi->fib_power -= nh->nh_power;
877 nh->nh_power = 0;
878 #endif
879 dead++;
881 } endfor_nexthops(fi)
882 if (dead == fi->fib_nhs) {
883 fi->fib_flags |= RTNH_F_DEAD;
884 ret++;
887 } endfor_fib_info();
888 return ret;
891 #ifdef CONFIG_IP_ROUTE_MULTIPATH
894 Dead device goes up. We wake up dead nexthops.
895 It takes sense only on multipath routes.
898 int fib_sync_up(struct net_device *dev)
900 int ret = 0;
902 if (!(dev->flags&IFF_UP))
903 return 0;
905 for_fib_info() {
906 int alive = 0;
908 change_nexthops(fi) {
909 if (!(nh->nh_flags&RTNH_F_DEAD)) {
910 alive++;
911 continue;
913 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
914 continue;
915 if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
916 continue;
917 alive++;
918 nh->nh_power = 0;
919 nh->nh_flags &= ~RTNH_F_DEAD;
920 } endfor_nexthops(fi)
922 if (alive > 0) {
923 fi->fib_flags &= ~RTNH_F_DEAD;
924 ret++;
926 } endfor_fib_info();
927 return ret;
931 The algorithm is suboptimal, but it provides really
932 fair weighted route distribution.
935 void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
937 struct fib_info *fi = res->fi;
938 int w;
940 if (fi->fib_power <= 0) {
941 int power = 0;
942 change_nexthops(fi) {
943 if (!(nh->nh_flags&RTNH_F_DEAD)) {
944 power += nh->nh_weight;
945 nh->nh_power = nh->nh_weight;
947 } endfor_nexthops(fi);
948 fi->fib_power = power;
949 #if 1
950 if (power <= 0) {
951 printk(KERN_CRIT "impossible 777\n");
952 return;
954 #endif
958 /* w should be random number [0..fi->fib_power-1],
959 it is pretty bad approximation.
962 w = jiffies % fi->fib_power;
964 change_nexthops(fi) {
965 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
966 if ((w -= nh->nh_power) <= 0) {
967 nh->nh_power--;
968 fi->fib_power--;
969 res->nh_sel = nhsel;
970 return;
973 } endfor_nexthops(fi);
975 #if 1
976 printk(KERN_CRIT "impossible 888\n");
977 #endif
978 return;
980 #endif
983 #ifdef CONFIG_PROC_FS
985 static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
987 static unsigned type2flags[RTN_MAX+1] = {
988 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
990 unsigned flags = type2flags[type];
992 if (fi && fi->fib_nh->nh_gw)
993 flags |= RTF_GATEWAY;
994 if (mask == 0xFFFFFFFF)
995 flags |= RTF_HOST;
996 if (!dead)
997 flags |= RTF_UP;
998 return flags;
1001 void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
1003 int len;
1004 unsigned flags = fib_flag_trans(type, dead, mask, fi);
1006 if (fi) {
1007 len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1008 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1009 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1010 mask, fi->fib_advmss+40, fi->fib_window, fi->fib_rtt>>3);
1011 } else {
1012 len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1013 prefix, 0,
1014 flags, 0, 0, 0,
1015 mask, 0, 0, 0);
1017 memset(buffer+len, ' ', 127-len);
1018 buffer[127] = '\n';
1021 #endif