Linux 2.2.0
[davej-history.git] / net / ipv4 / fib_semantics.c
blob7bff3609537d5302faab5d3731570a1b9fa3d516
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.11 1998/10/03 09:37:12 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
46 #define FSprintk(a...)
48 static struct fib_info *fib_info_list;
50 #define for_fib_info() { struct fib_info *fi; \
51 for (fi = fib_info_list; fi; fi = fi->fib_next)
53 #define endfor_fib_info() }
55 #ifdef CONFIG_IP_ROUTE_MULTIPATH
57 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
58 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
60 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
61 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
63 #else /* CONFIG_IP_ROUTE_MULTIPATH */
65 /* Hope, that gcc will optimize it to get rid of dummy loop */
67 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
68 for (nhsel=0; nhsel < 1; nhsel++)
70 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
71 for (nhsel=0; nhsel < 1; nhsel++)
73 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
75 #define endfor_nexthops(fi) }
78 static struct
80 int error;
81 u8 scope;
82 } fib_props[RTA_MAX+1] = {
83 { 0, RT_SCOPE_NOWHERE}, /* RTN_UNSPEC */
84 { 0, RT_SCOPE_UNIVERSE}, /* RTN_UNICAST */
85 { 0, RT_SCOPE_HOST}, /* RTN_LOCAL */
86 { 0, RT_SCOPE_LINK}, /* RTN_BROADCAST */
87 { 0, RT_SCOPE_LINK}, /* RTN_ANYCAST */
88 { 0, RT_SCOPE_UNIVERSE}, /* RTN_MULTICAST */
89 { -EINVAL, RT_SCOPE_UNIVERSE}, /* RTN_BLACKHOLE */
90 { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
91 { -EACCES, RT_SCOPE_UNIVERSE}, /* RTN_PROHIBIT */
92 { 1, RT_SCOPE_UNIVERSE}, /* RTN_THROW */
93 #ifdef CONFIG_IP_ROUTE_NAT
94 { 0, RT_SCOPE_HOST}, /* RTN_NAT */
95 #else
96 { -EINVAL, RT_SCOPE_NOWHERE}, /* RTN_NAT */
97 #endif
98 { -EINVAL, RT_SCOPE_NOWHERE} /* RTN_XRESOLVE */
101 /* Release a nexthop info record */
103 void fib_release_info(struct fib_info *fi)
105 if (fi && !--fi->fib_refcnt) {
106 if (fi->fib_next)
107 fi->fib_next->fib_prev = fi->fib_prev;
108 if (fi->fib_prev)
109 fi->fib_prev->fib_next = fi->fib_next;
110 if (fi == fib_info_list)
111 fib_info_list = fi->fib_next;
112 kfree(fi);
116 extern __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
118 const struct fib_nh *onh = ofi->fib_nh;
120 for_nexthops(fi) {
121 if (nh->nh_oif != onh->nh_oif ||
122 nh->nh_gw != onh->nh_gw ||
123 nh->nh_scope != onh->nh_scope ||
124 #ifdef CONFIG_IP_ROUTE_MULTIPATH
125 nh->nh_weight != onh->nh_weight ||
126 #endif
127 #ifdef CONFIG_NET_CLS_ROUTE
128 nh->nh_tclassid != onh->nh_tclassid ||
129 #endif
130 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
131 return -1;
132 onh++;
133 } endfor_nexthops(fi);
134 return 0;
137 extern __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
139 for_fib_info() {
140 if (fi->fib_nhs != nfi->fib_nhs)
141 continue;
142 if (nfi->fib_protocol == fi->fib_protocol &&
143 nfi->fib_prefsrc == fi->fib_prefsrc &&
144 nfi->fib_priority == fi->fib_priority &&
145 nfi->fib_mtu == fi->fib_mtu &&
146 nfi->fib_rtt == fi->fib_rtt &&
147 nfi->fib_window == fi->fib_window &&
148 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
149 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
150 return fi;
151 } endfor_fib_info();
152 return NULL;
155 /* Check, that the gateway is already configured.
156 Used only by redirect accept routine.
159 int ip_fib_check_default(u32 gw, struct device *dev)
161 for_fib_info() {
162 if (fi->fib_flags & RTNH_F_DEAD)
163 continue;
164 for_nexthops(fi) {
165 if (nh->nh_dev == dev && nh->nh_gw == gw &&
166 !(nh->nh_flags&RTNH_F_DEAD))
167 return 0;
168 } endfor_nexthops(fi);
169 } endfor_fib_info();
170 return -1;
173 #ifdef CONFIG_IP_ROUTE_MULTIPATH
175 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
177 while (RTA_OK(attr,attrlen)) {
178 if (attr->rta_type == type)
179 return *(u32*)RTA_DATA(attr);
180 attr = RTA_NEXT(attr, attrlen);
182 return 0;
185 static int
186 fib_count_nexthops(struct rtattr *rta)
188 int nhs = 0;
189 struct rtnexthop *nhp = RTA_DATA(rta);
190 int nhlen = RTA_PAYLOAD(rta);
192 while (nhlen >= (int)sizeof(struct rtnexthop)) {
193 if ((nhlen -= nhp->rtnh_len) < 0)
194 return 0;
195 nhs++;
196 nhp = RTNH_NEXT(nhp);
198 return nhs;
201 static int
202 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
204 struct rtnexthop *nhp = RTA_DATA(rta);
205 int nhlen = RTA_PAYLOAD(rta);
207 change_nexthops(fi) {
208 int attrlen = nhlen - sizeof(struct rtnexthop);
209 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
210 return -EINVAL;
211 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
212 nh->nh_oif = nhp->rtnh_ifindex;
213 nh->nh_weight = nhp->rtnh_hops + 1;
214 if (attrlen) {
215 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
216 #ifdef CONFIG_NET_CLS_ROUTE
217 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
218 #endif
220 nhp = RTNH_NEXT(nhp);
221 } endfor_nexthops(fi);
222 return 0;
225 #endif
227 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
228 struct fib_info *fi)
230 #ifdef CONFIG_IP_ROUTE_MULTIPATH
231 struct rtnexthop *nhp;
232 int nhlen;
233 #endif
235 if (rta->rta_priority &&
236 *rta->rta_priority != fi->fib_priority)
237 return 1;
239 if (rta->rta_oif || rta->rta_gw) {
240 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
241 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
242 return 0;
243 return 1;
246 #ifdef CONFIG_IP_ROUTE_MULTIPATH
247 if (rta->rta_mp == NULL)
248 return 0;
249 nhp = RTA_DATA(rta->rta_mp);
250 nhlen = RTA_PAYLOAD(rta->rta_mp);
252 for_nexthops(fi) {
253 int attrlen = nhlen - sizeof(struct rtnexthop);
254 u32 gw;
256 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
257 return -EINVAL;
258 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
259 return 1;
260 if (attrlen) {
261 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
262 if (gw && gw != nh->nh_gw)
263 return 1;
264 #ifdef CONFIG_NET_CLS_ROUTE
265 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
266 if (gw && gw != nh->nh_tclassid)
267 return 1;
268 #endif
270 nhp = RTNH_NEXT(nhp);
271 } endfor_nexthops(fi);
272 #endif
273 return 0;
278 Picture
279 -------
281 Semantics of nexthop is very messy by historical reasons.
282 We have to take into account, that:
283 a) gateway can be actually local interface address,
284 so that gatewayed route is direct.
285 b) gateway must be on-link address, possibly
286 described not by an ifaddr, but also by a direct route.
287 c) If both gateway and interface are specified, they should not
288 contradict.
289 d) If we use tunnel routes, gateway could be not on-link.
291 Attempt to reconcile all of these (alas, self-contradictory) conditions
292 results in pretty ugly and hairy code with obscure logic.
294 I choosed to generalized it instead, so that the size
295 of code does not increase practically, but it becomes
296 much more general.
297 Every prefix is assigned a "scope" value: "host" is local address,
298 "link" is direct route,
299 [ ... "site" ... "interior" ... ]
300 and "universe" is true gateway route with global meaning.
302 Every prefix refers to a set of "nexthop"s (gw, oif),
303 where gw must have narrower scope. This recursion stops
304 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
305 which means that gw is forced to be on link.
307 Code is still hairy, but now it is apparently logically
308 consistent and very flexible. F.e. as by-product it allows
309 to co-exists in peace independent exterior and interior
310 routing processes.
312 Normally it looks as following.
314 {universe prefix} -> (gw, oif) [scope link]
316 |-> {link prefix} -> (gw, oif) [scope local]
318 |-> {local prefix} (terminal node)
321 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
323 int err;
325 if (nh->nh_gw) {
326 struct rt_key key;
327 struct fib_result res;
329 #ifdef CONFIG_IP_ROUTE_PERVASIVE
330 if (nh->nh_flags&RTNH_F_PERVASIVE)
331 return 0;
332 #endif
333 if (nh->nh_flags&RTNH_F_ONLINK) {
334 struct device *dev;
336 if (r->rtm_scope >= RT_SCOPE_LINK)
337 return -EINVAL;
338 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
339 return -EINVAL;
340 if ((dev = dev_get_by_index(nh->nh_oif)) == NULL)
341 return -ENODEV;
342 if (!(dev->flags&IFF_UP))
343 return -ENETDOWN;
344 nh->nh_dev = dev;
345 nh->nh_scope = RT_SCOPE_LINK;
346 return 0;
348 memset(&key, 0, sizeof(key));
349 key.dst = nh->nh_gw;
350 key.oif = nh->nh_oif;
351 key.scope = r->rtm_scope + 1;
353 /* It is not necessary, but requires a bit of thinking */
354 if (key.scope < RT_SCOPE_LINK)
355 key.scope = RT_SCOPE_LINK;
357 if ((err = fib_lookup(&key, &res)) != 0)
358 return err;
359 nh->nh_scope = res.scope;
360 nh->nh_oif = FIB_RES_OIF(res);
361 nh->nh_dev = FIB_RES_DEV(res);
362 } else {
363 struct in_device *in_dev;
365 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
366 return -EINVAL;
368 in_dev = inetdev_by_index(nh->nh_oif);
369 if (in_dev == NULL)
370 return -ENODEV;
371 if (!(in_dev->dev->flags&IFF_UP))
372 return -ENETDOWN;
373 nh->nh_dev = in_dev->dev;
374 nh->nh_scope = RT_SCOPE_HOST;
376 return 0;
379 struct fib_info *
380 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
381 const struct nlmsghdr *nlh, int *errp)
383 int err;
384 struct fib_info *fi = NULL;
385 struct fib_info *ofi;
386 #ifdef CONFIG_IP_ROUTE_MULTIPATH
387 int nhs = 1;
388 #else
389 const int nhs = 1;
390 #endif
392 /* Fast check to catch the most weird cases */
393 if (fib_props[r->rtm_type].scope > r->rtm_scope)
394 goto err_inval;
396 #ifdef CONFIG_IP_ROUTE_MULTIPATH
397 if (rta->rta_mp) {
398 nhs = fib_count_nexthops(rta->rta_mp);
399 if (nhs == 0)
400 goto err_inval;
402 #endif
404 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
405 err = -ENOBUFS;
406 if (fi == NULL)
407 goto failure;
408 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
410 fi->fib_protocol = r->rtm_protocol;
411 fi->fib_nhs = nhs;
412 fi->fib_flags = r->rtm_flags;
413 if (rta->rta_priority)
414 fi->fib_priority = *rta->rta_priority;
415 if (rta->rta_mx) {
416 int attrlen = RTA_PAYLOAD(rta->rta_mx);
417 struct rtattr *attr = RTA_DATA(rta->rta_mx);
419 while (RTA_OK(attr, attrlen)) {
420 unsigned flavor = attr->rta_type;
421 if (flavor) {
422 if (flavor > FIB_MAX_METRICS)
423 goto failure;
424 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
426 attr = RTA_NEXT(attr, attrlen);
429 if (rta->rta_prefsrc)
430 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
432 if (rta->rta_mp) {
433 #ifdef CONFIG_IP_ROUTE_MULTIPATH
434 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
435 goto failure;
436 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
437 goto err_inval;
438 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
439 goto err_inval;
440 #ifdef CONFIG_NET_CLS_ROUTE
441 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
442 goto err_inval;
443 #endif
444 #else
445 goto err_inval;
446 #endif
447 } else {
448 struct fib_nh *nh = fi->fib_nh;
449 if (rta->rta_oif)
450 nh->nh_oif = *rta->rta_oif;
451 if (rta->rta_gw)
452 memcpy(&nh->nh_gw, rta->rta_gw, 4);
453 #ifdef CONFIG_NET_CLS_ROUTE
454 if (rta->rta_flow)
455 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
456 #endif
457 nh->nh_flags = r->rtm_flags;
458 #ifdef CONFIG_IP_ROUTE_MULTIPATH
459 nh->nh_weight = 1;
460 #endif
463 #ifdef CONFIG_IP_ROUTE_NAT
464 if (r->rtm_type == RTN_NAT) {
465 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
466 goto err_inval;
467 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
468 goto link_it;
470 #endif
472 if (fib_props[r->rtm_type].error) {
473 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
474 goto err_inval;
475 goto link_it;
478 if (r->rtm_scope > RT_SCOPE_HOST)
479 goto err_inval;
481 if (r->rtm_scope == RT_SCOPE_HOST) {
482 struct fib_nh *nh = fi->fib_nh;
484 /* Local address is added. */
485 if (nhs != 1 || nh->nh_gw)
486 goto err_inval;
487 nh->nh_scope = RT_SCOPE_NOWHERE;
488 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
489 err = -ENODEV;
490 if (nh->nh_dev == NULL)
491 goto failure;
492 } else {
493 change_nexthops(fi) {
494 if ((err = fib_check_nh(r, fi, nh)) != 0)
495 goto failure;
496 } endfor_nexthops(fi)
499 if (fi->fib_prefsrc) {
500 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
501 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
502 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
503 goto err_inval;
506 link_it:
507 if ((ofi = fib_find_info(fi)) != NULL) {
508 kfree(fi);
509 ofi->fib_refcnt++;
510 return ofi;
513 fi->fib_refcnt++;
514 fi->fib_next = fib_info_list;
515 fi->fib_prev = NULL;
516 if (fib_info_list)
517 fib_info_list->fib_prev = fi;
518 fib_info_list = fi;
519 return fi;
521 err_inval:
522 err = -EINVAL;
524 failure:
525 *errp = err;
526 if (fi)
527 kfree(fi);
528 return NULL;
531 int
532 fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
534 int err = fib_props[type].error;
536 if (err == 0) {
537 if (fi->fib_flags&RTNH_F_DEAD)
538 return 1;
540 res->fi = fi;
542 switch (type) {
543 #ifdef CONFIG_IP_ROUTE_NAT
544 case RTN_NAT:
545 FIB_RES_RESET(*res);
546 return 0;
547 #endif
548 case RTN_UNICAST:
549 case RTN_LOCAL:
550 case RTN_BROADCAST:
551 case RTN_ANYCAST:
552 case RTN_MULTICAST:
553 for_nexthops(fi) {
554 if (nh->nh_flags&RTNH_F_DEAD)
555 continue;
556 if (!key->oif || key->oif == nh->nh_oif)
557 break;
559 #ifdef CONFIG_IP_ROUTE_MULTIPATH
560 if (nhsel < fi->fib_nhs) {
561 res->nh_sel = nhsel;
562 return 0;
564 #else
565 if (nhsel < 1)
566 return 0;
567 #endif
568 endfor_nexthops(fi);
569 return 1;
570 default:
571 printk(KERN_DEBUG "impossible 102\n");
572 return -EINVAL;
575 return err;
578 /* Find appropriate source address to this destination */
580 u32 __fib_res_prefsrc(struct fib_result *res)
582 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
585 #ifdef CONFIG_RTNETLINK
588 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
589 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
590 struct fib_info *fi)
592 struct rtmsg *rtm;
593 struct nlmsghdr *nlh;
594 unsigned char *b = skb->tail;
596 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
597 rtm = NLMSG_DATA(nlh);
598 rtm->rtm_family = AF_INET;
599 rtm->rtm_dst_len = dst_len;
600 rtm->rtm_src_len = 0;
601 rtm->rtm_tos = tos;
602 rtm->rtm_table = tb_id;
603 rtm->rtm_type = type;
604 rtm->rtm_flags = fi->fib_flags;
605 rtm->rtm_scope = scope;
606 if (rtm->rtm_dst_len)
607 RTA_PUT(skb, RTA_DST, 4, dst);
608 rtm->rtm_protocol = fi->fib_protocol;
609 if (fi->fib_priority)
610 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
611 #ifdef CONFIG_NET_CLS_ROUTE
612 if (fi->fib_nh[0].nh_tclassid)
613 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
614 #endif
615 if (fi->fib_mtu || fi->fib_window || fi->fib_rtt) {
616 int i;
617 struct rtattr *mx = (struct rtattr *)skb->tail;
618 RTA_PUT(skb, RTA_METRICS, 0, NULL);
619 for (i=0; i<FIB_MAX_METRICS; i++) {
620 if (fi->fib_metrics[i])
621 RTA_PUT(skb, i+1, sizeof(unsigned), fi->fib_metrics + i);
623 mx->rta_len = skb->tail - (u8*)mx;
625 if (fi->fib_prefsrc)
626 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
627 if (fi->fib_nhs == 1) {
628 if (fi->fib_nh->nh_gw)
629 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
630 if (fi->fib_nh->nh_oif)
631 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
633 #ifdef CONFIG_IP_ROUTE_MULTIPATH
634 if (fi->fib_nhs > 1) {
635 struct rtnexthop *nhp;
636 struct rtattr *mp_head;
637 if (skb_tailroom(skb) <= RTA_SPACE(0))
638 goto rtattr_failure;
639 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
641 for_nexthops(fi) {
642 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
643 goto rtattr_failure;
644 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
645 nhp->rtnh_flags = nh->nh_flags & 0xFF;
646 nhp->rtnh_hops = nh->nh_weight-1;
647 nhp->rtnh_ifindex = nh->nh_oif;
648 if (nh->nh_gw)
649 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
650 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
651 } endfor_nexthops(fi);
652 mp_head->rta_type = RTA_MULTIPATH;
653 mp_head->rta_len = skb->tail - (u8*)mp_head;
655 #endif
656 nlh->nlmsg_len = skb->tail - b;
657 return skb->len;
659 nlmsg_failure:
660 rtattr_failure:
661 skb_trim(skb, b - skb->data);
662 return -1;
665 #endif /* CONFIG_RTNETLINK */
667 #ifndef CONFIG_IP_NOSIOCRT
670 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
671 struct kern_rta *rta, struct rtentry *r)
673 int plen;
674 u32 *ptr;
676 memset(rtm, 0, sizeof(*rtm));
677 memset(rta, 0, sizeof(*rta));
679 if (r->rt_dst.sa_family != AF_INET)
680 return -EAFNOSUPPORT;
682 /* Check mask for validity:
683 a) it must be contiguous.
684 b) destination must have all host bits clear.
685 c) if application forgot to set correct family (AF_INET),
686 reject request unless it is absolutely clear i.e.
687 both family and mask are zero.
689 plen = 32;
690 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
691 if (!(r->rt_flags&RTF_HOST)) {
692 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
693 if (r->rt_genmask.sa_family != AF_INET) {
694 if (mask || r->rt_genmask.sa_family)
695 return -EAFNOSUPPORT;
697 if (bad_mask(mask, *ptr))
698 return -EINVAL;
699 plen = inet_mask_len(mask);
702 nl->nlmsg_flags = NLM_F_REQUEST;
703 nl->nlmsg_pid = 0;
704 nl->nlmsg_seq = 0;
705 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
706 if (cmd == SIOCDELRT) {
707 nl->nlmsg_type = RTM_DELROUTE;
708 nl->nlmsg_flags = 0;
709 } else {
710 nl->nlmsg_type = RTM_NEWROUTE;
711 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
712 rtm->rtm_protocol = RTPROT_BOOT;
715 rtm->rtm_dst_len = plen;
716 rta->rta_dst = ptr;
718 if (r->rt_metric) {
719 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
720 rta->rta_priority = (u32*)&r->rt_pad3;
722 if (r->rt_flags&RTF_REJECT) {
723 rtm->rtm_scope = RT_SCOPE_HOST;
724 rtm->rtm_type = RTN_UNREACHABLE;
725 return 0;
727 rtm->rtm_scope = RT_SCOPE_NOWHERE;
728 rtm->rtm_type = RTN_UNICAST;
730 if (r->rt_dev) {
731 #ifdef CONFIG_IP_ALIAS
732 char *colon;
733 #endif
734 struct device *dev;
735 char devname[IFNAMSIZ];
737 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
738 return -EFAULT;
739 devname[IFNAMSIZ-1] = 0;
740 #ifdef CONFIG_IP_ALIAS
741 colon = strchr(devname, ':');
742 if (colon)
743 *colon = 0;
744 #endif
745 dev = dev_get(devname);
746 if (!dev)
747 return -ENODEV;
748 rta->rta_oif = &dev->ifindex;
749 #ifdef CONFIG_IP_ALIAS
750 if (colon) {
751 struct in_ifaddr *ifa;
752 struct in_device *in_dev = dev->ip_ptr;
753 if (!in_dev)
754 return -ENODEV;
755 *colon = ':';
756 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
757 if (strcmp(ifa->ifa_label, devname) == 0)
758 break;
759 if (ifa == NULL)
760 return -ENODEV;
761 rta->rta_prefsrc = &ifa->ifa_local;
763 #endif
766 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
767 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
768 rta->rta_gw = ptr;
769 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
770 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
773 if (cmd == SIOCDELRT)
774 return 0;
776 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
777 return -EINVAL;
779 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
780 rtm->rtm_scope = RT_SCOPE_LINK;
782 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
783 struct rtattr *rec;
784 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
785 if (mx == NULL)
786 return -ENOMEM;
787 rta->rta_mx = mx;
788 mx->rta_type = RTA_METRICS;
789 mx->rta_len = RTA_LENGTH(0);
790 if (r->rt_flags&RTF_MTU) {
791 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
792 rec->rta_type = RTAX_MTU;
793 rec->rta_len = RTA_LENGTH(4);
794 mx->rta_len += RTA_LENGTH(4);
795 *(u32*)RTA_DATA(rec) = r->rt_mtu;
797 if (r->rt_flags&RTF_WINDOW) {
798 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
799 rec->rta_type = RTAX_WINDOW;
800 rec->rta_len = RTA_LENGTH(4);
801 mx->rta_len += RTA_LENGTH(4);
802 *(u32*)RTA_DATA(rec) = r->rt_window;
804 if (r->rt_flags&RTF_IRTT) {
805 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
806 rec->rta_type = RTAX_RTT;
807 rec->rta_len = RTA_LENGTH(4);
808 mx->rta_len += RTA_LENGTH(4);
809 *(u32*)RTA_DATA(rec) = r->rt_irtt;
812 return 0;
815 #endif
818 Update FIB if:
819 - local address disappeared -> we must delete all the entries
820 referring to it.
821 - device went down -> we must shutdown all nexthops going via it.
824 int fib_sync_down(u32 local, struct device *dev, int force)
826 int ret = 0;
827 int scope = RT_SCOPE_NOWHERE;
829 if (force)
830 scope = -1;
832 for_fib_info() {
833 if (local && fi->fib_prefsrc == local) {
834 fi->fib_flags |= RTNH_F_DEAD;
835 ret++;
836 } else if (dev && fi->fib_nhs) {
837 int dead = 0;
839 change_nexthops(fi) {
840 if (nh->nh_flags&RTNH_F_DEAD)
841 dead++;
842 else if (nh->nh_dev == dev &&
843 nh->nh_scope != scope) {
844 nh->nh_flags |= RTNH_F_DEAD;
845 #ifdef CONFIG_IP_ROUTE_MULTIPATH
846 fi->fib_power -= nh->nh_power;
847 nh->nh_power = 0;
848 #endif
849 dead++;
851 } endfor_nexthops(fi)
852 if (dead == fi->fib_nhs) {
853 fi->fib_flags |= RTNH_F_DEAD;
854 ret++;
857 } endfor_fib_info();
858 return ret;
861 #ifdef CONFIG_IP_ROUTE_MULTIPATH
864 Dead device goes up. We wake up dead nexthops.
865 It takes sense only on multipath routes.
868 int fib_sync_up(struct device *dev)
870 int ret = 0;
872 if (!(dev->flags&IFF_UP))
873 return 0;
875 for_fib_info() {
876 int alive = 0;
878 change_nexthops(fi) {
879 if (!(nh->nh_flags&RTNH_F_DEAD)) {
880 alive++;
881 continue;
883 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
884 continue;
885 if (nh->nh_dev != dev || dev->ip_ptr == NULL)
886 continue;
887 alive++;
888 nh->nh_power = 0;
889 nh->nh_flags &= ~RTNH_F_DEAD;
890 } endfor_nexthops(fi)
892 if (alive == fi->fib_nhs) {
893 fi->fib_flags &= ~RTNH_F_DEAD;
894 ret++;
896 } endfor_fib_info();
897 return ret;
901 The algorithm is suboptimal, but it provides really
902 fair weighted route distribution.
905 void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
907 struct fib_info *fi = res->fi;
908 int w;
910 if (fi->fib_power <= 0) {
911 int power = 0;
912 change_nexthops(fi) {
913 if (!(nh->nh_flags&RTNH_F_DEAD)) {
914 power += nh->nh_weight;
915 nh->nh_power = nh->nh_weight;
917 } endfor_nexthops(fi);
918 fi->fib_power = power;
919 #if 1
920 if (power <= 0) {
921 printk(KERN_CRIT "impossible 777\n");
922 return;
924 #endif
928 /* w should be random number [0..fi->fib_power-1],
929 it is pretty bad approximation.
932 w = jiffies % fi->fib_power;
934 change_nexthops(fi) {
935 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
936 if ((w -= nh->nh_power) <= 0) {
937 nh->nh_power--;
938 fi->fib_power--;
939 res->nh_sel = nhsel;
940 return;
943 } endfor_nexthops(fi);
945 #if 1
946 printk(KERN_CRIT "impossible 888\n");
947 #endif
948 return;
950 #endif
953 #ifdef CONFIG_PROC_FS
955 static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
957 static unsigned type2flags[RTN_MAX+1] = {
958 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
960 unsigned flags = type2flags[type];
962 if (fi && fi->fib_nh->nh_gw)
963 flags |= RTF_GATEWAY;
964 if (mask == 0xFFFFFFFF)
965 flags |= RTF_HOST;
966 if (!dead)
967 flags |= RTF_UP;
968 return flags;
971 void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
973 int len;
974 unsigned flags = fib_flag_trans(type, dead, mask, fi);
976 if (fi) {
977 len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
978 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
979 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
980 mask, fi->fib_mtu, fi->fib_window, fi->fib_rtt);
981 } else {
982 len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
983 prefix, 0,
984 flags, 0, 0, 0,
985 mask, 0, 0, 0);
987 memset(buffer+len, ' ', 127-len);
988 buffer[127] = '\n';
991 #endif