NFS - Rewrite the RTT code and the request flags & low level state machine
[dragonfly.git] / usr.sbin / traceroute / traceroute.c
blob13eff5c445391e63cb0452792b23805428714948
1 /* $OpenBSD: traceroute.c,v 1.61 2004/01/26 18:23:51 deraadt Exp $ */
2 /* $NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $ */
4 /*-
5 * Copyright (c) 1990, 1993
6 * The Regents of the University of California. All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Van Jacobson.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 * $DragonFly: src/usr.sbin/traceroute/traceroute.c,v 1.9 2007/10/25 08:12:42 hasso Exp $
36 * @(#)traceroute.c 8.1 (Berkeley) 6/6/93
40 * traceroute host - trace the route ip packets follow going to "host".
42 * Attempt to trace the route an ip packet would follow to some
43 * internet host. We find out intermediate hops by launching probe
44 * packets with a small ttl (time to live) then listening for an
45 * icmp "time exceeded" reply from a gateway. We start our probes
46 * with a ttl of one and increase by one until we get an icmp "port
47 * unreachable" (which means we got to "host") or hit a max (which
48 * defaults to 64 hops & can be changed with the -m flag). Three
49 * probes (change with -q flag) are sent at each ttl setting and a
50 * line is printed showing the ttl, address of the gateway and
51 * round trip time of each probe. If the probe answers come from
52 * different gateways, the address of each responding system will
53 * be printed. If there is no response within a 5 sec. timeout
54 * interval (changed with the -w flag), a "*" is printed for that
55 * probe.
57 * Probe packets are UDP format. We don't want the destination
58 * host to process them so the destination port is set to an
59 * unlikely value (if some clod on the destination is using that
60 * value, it can be changed with the -p flag).
62 * A sample use might be:
64 * [yak 71]% traceroute nis.nsf.net.
65 * traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
66 * 1 helios.ee.lbl.gov (128.3.112.1) 19 ms 19 ms 0 ms
67 * 2 lilac-dmc.Berkeley.EDU (128.32.216.1) 39 ms 39 ms 19 ms
68 * 3 lilac-dmc.Berkeley.EDU (128.32.216.1) 39 ms 39 ms 19 ms
69 * 4 ccngw-ner-cc.Berkeley.EDU (128.32.136.23) 39 ms 40 ms 39 ms
70 * 5 ccn-nerif22.Berkeley.EDU (128.32.168.22) 39 ms 39 ms 39 ms
71 * 6 128.32.197.4 (128.32.197.4) 40 ms 59 ms 59 ms
72 * 7 131.119.2.5 (131.119.2.5) 59 ms 59 ms 59 ms
73 * 8 129.140.70.13 (129.140.70.13) 99 ms 99 ms 80 ms
74 * 9 129.140.71.6 (129.140.71.6) 139 ms 239 ms 319 ms
75 * 10 129.140.81.7 (129.140.81.7) 220 ms 199 ms 199 ms
76 * 11 nic.merit.edu (35.1.1.48) 239 ms 239 ms 239 ms
78 * Note that lines 2 & 3 are the same. This is due to a buggy
79 * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
80 * packets with a zero ttl.
82 * A more interesting example is:
84 * [yak 72]% traceroute allspice.lcs.mit.edu.
85 * traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
86 * 1 helios.ee.lbl.gov (128.3.112.1) 0 ms 0 ms 0 ms
87 * 2 lilac-dmc.Berkeley.EDU (128.32.216.1) 19 ms 19 ms 19 ms
88 * 3 lilac-dmc.Berkeley.EDU (128.32.216.1) 39 ms 19 ms 19 ms
89 * 4 ccngw-ner-cc.Berkeley.EDU (128.32.136.23) 19 ms 39 ms 39 ms
90 * 5 ccn-nerif22.Berkeley.EDU (128.32.168.22) 20 ms 39 ms 39 ms
91 * 6 128.32.197.4 (128.32.197.4) 59 ms 119 ms 39 ms
92 * 7 131.119.2.5 (131.119.2.5) 59 ms 59 ms 39 ms
93 * 8 129.140.70.13 (129.140.70.13) 80 ms 79 ms 99 ms
94 * 9 129.140.71.6 (129.140.71.6) 139 ms 139 ms 159 ms
95 * 10 129.140.81.7 (129.140.81.7) 199 ms 180 ms 300 ms
96 * 11 129.140.72.17 (129.140.72.17) 300 ms 239 ms 239 ms
97 * 12 * * *
98 * 13 128.121.54.72 (128.121.54.72) 259 ms 499 ms 279 ms
99 * 14 * * *
100 * 15 * * *
101 * 16 * * *
102 * 17 * * *
103 * 18 ALLSPICE.LCS.MIT.EDU (18.26.0.115) 339 ms 279 ms 279 ms
105 * (I start to see why I'm having so much trouble with mail to
106 * MIT.) Note that the gateways 12, 14, 15, 16 & 17 hops away
107 * either don't send ICMP "time exceeded" messages or send them
108 * with a ttl too small to reach us. 14 - 17 are running the
109 * MIT C Gateway code that doesn't send "time exceeded"s. God
110 * only knows what's going on with 12.
112 * The silent gateway 12 in the above may be the result of a bug in
113 * the 4.[23]BSD network code (and its derivatives): 4.x (x <= 3)
114 * sends an unreachable message using whatever ttl remains in the
115 * original datagram. Since, for gateways, the remaining ttl is
116 * zero, the icmp "time exceeded" is guaranteed to not make it back
117 * to us. The behavior of this bug is slightly more interesting
118 * when it appears on the destination system:
120 * 1 helios.ee.lbl.gov (128.3.112.1) 0 ms 0 ms 0 ms
121 * 2 lilac-dmc.Berkeley.EDU (128.32.216.1) 39 ms 19 ms 39 ms
122 * 3 lilac-dmc.Berkeley.EDU (128.32.216.1) 19 ms 39 ms 19 ms
123 * 4 ccngw-ner-cc.Berkeley.EDU (128.32.136.23) 39 ms 40 ms 19 ms
124 * 5 ccn-nerif35.Berkeley.EDU (128.32.168.35) 39 ms 39 ms 39 ms
125 * 6 csgw.Berkeley.EDU (128.32.133.254) 39 ms 59 ms 39 ms
126 * 7 * * *
127 * 8 * * *
128 * 9 * * *
129 * 10 * * *
130 * 11 * * *
131 * 12 * * *
132 * 13 rip.Berkeley.EDU (128.32.131.22) 59 ms ! 39 ms ! 39 ms !
134 * Notice that there are 12 "gateways" (13 is the final
135 * destination) and exactly the last half of them are "missing".
136 * What's really happening is that rip (a Sun-3 running Sun OS3.5)
137 * is using the ttl from our arriving datagram as the ttl in its
138 * icmp reply. So, the reply will time out on the return path
139 * (with no notice sent to anyone since icmp's aren't sent for
140 * icmp's) until we probe with a ttl that's at least twice the path
141 * length. I.e., rip is really only 7 hops away. A reply that
142 * returns with a ttl of 1 is a clue this problem exists.
143 * Traceroute prints a "!" after the time if the ttl is <= 1.
144 * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
145 * non-standard (HPUX) software, expect to see this problem
146 * frequently and/or take care picking the target host of your
147 * probes.
149 * Other possible annotations after the time are !H, !N, !P (got a host,
150 * network or protocol unreachable, respectively), !S or !F (source
151 * route failed or fragmentation needed -- neither of these should
152 * ever occur and the associated gateway is busted if you see one). If
153 * almost all the probes result in some kind of unreachable, traceroute
154 * will give up and exit.
156 * Notes
157 * -----
158 * This program must be run by root or be setuid. (I suggest that
159 * you *don't* make it setuid -- casual use could result in a lot
160 * of unnecessary traffic on our poor, congested nets.)
162 * This program requires a kernel mod that does not appear in any
163 * system available from Berkeley: A raw ip socket using proto
164 * IPPROTO_RAW must interpret the data sent as an ip datagram (as
165 * opposed to data to be wrapped in a ip datagram). See the README
166 * file that came with the source to this program for a description
167 * of the mods I made to /sys/netinet/raw_ip.c. Your mileage may
168 * vary. But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
169 * MODIFIED TO RUN THIS PROGRAM.
171 * The udp port usage may appear bizarre (well, ok, it is bizarre).
172 * The problem is that an icmp message only contains 8 bytes of
173 * data from the original datagram. 8 bytes is the size of a udp
174 * header so, if we want to associate replies with the original
175 * datagram, the necessary information must be encoded into the
176 * udp header (the ip id could be used but there's no way to
177 * interlock with the kernel's assignment of ip id's and, anyway,
178 * it would have taken a lot more kernel hacking to allow this
179 * code to set the ip id). So, to allow two or more users to
180 * use traceroute simultaneously, we use this task's pid as the
181 * source port (the high bit is set to move the port number out
182 * of the "likely" range). To keep track of which probe is being
183 * replied to (so times and/or hop counts don't get confused by a
184 * reply that was delayed in transit), we increment the destination
185 * port number before each probe.
187 * Don't use this as a coding example. I was trying to find a
188 * routing problem and this code sort-of popped out after 48 hours
189 * without sleep. I was amazed it ever compiled, much less ran.
191 * I stole the idea for this program from Steve Deering. Since
192 * the first release, I've learned that had I attended the right
193 * IETF working group meetings, I also could have stolen it from Guy
194 * Almes or Matt Mathis. I don't know (or care) who came up with
195 * the idea first. I envy the originators' perspicacity and I'm
196 * glad they didn't keep the idea a secret.
198 * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
199 * enhancements to the original distribution.
201 * I've hacked up a round-trip-route version of this that works by
202 * sending a loose-source-routed udp datagram through the destination
203 * back to yourself. Unfortunately, SO many gateways botch source
204 * routing, the thing is almost worthless. Maybe one day...
206 * -- Van Jacobson (van@helios.ee.lbl.gov)
207 * Tue Dec 20 03:50:13 PST 1988
210 #include <sys/param.h>
211 #include <sys/time.h>
212 #include <sys/socket.h>
213 #include <sys/file.h>
214 #include <sys/ioctl.h>
215 #include <sys/sysctl.h>
217 #include <netinet/in_systm.h>
218 #include <netinet/in.h>
219 #include <netinet/ip.h>
220 #include <netinet/ip_icmp.h>
221 #include <netinet/ip_var.h>
222 #include <netinet/udp.h>
224 #include <arpa/inet.h>
226 #include <ctype.h>
227 #include <err.h>
228 #include <errno.h>
229 #include <netdb.h>
230 #include <stdio.h>
231 #include <stdlib.h>
232 #include <string.h>
233 #include <unistd.h>
235 #define MAX_LSRR ((MAX_IPOPTLEN - 4) / 4)
238 * Format of the data in a (udp) probe packet.
240 struct packetdata {
241 u_char seq; /* sequence number of this packet */
242 u_int8_t ttl; /* ttl packet left with */
243 u_int32_t sec; /* time packet left */
244 u_int32_t usec;
248 * Support for ICMP extensions - RFC4950.
250 #define ICMP_EXT_OFFSET 8 + 128 /* ICMP type, code, checksum (unused)
251 * + original datagram */
252 #define ICMP_EXT_VERSION 2
254 /* ICMP Extension Header according to RFC4884. */
255 #define EXT_VERSION(x) (((x) & 0xf0000000) >> 28)
256 #define EXT_CHECKSUM(x) ((x) & 0x0000ffff)
259 * ICMP extensions, object header
261 struct icmp_ext_obj_hdr {
262 u_short length;
263 u_char class_num;
264 #define MPLS_STACK_ENTRY_CLASS 1
265 u_char c_type;
266 #define MPLS_STACK_ENTRY_C_TYPE 1
269 /* MPLS Label Stack Object. */
270 #define MPLS_LABEL(x) (((x) & 0xfffff000) >> 12)
271 #define MPLS_EXP(x) (((x) & 0x00000e00) >> 9)
272 #define MPLS_STACK(x) (((x) & 0x00000100) >> 8)
273 #define MPLS_TTL(x) ((x) & 0x000000ff)
275 struct in_addr gateway[MAX_LSRR + 1];
276 int lsrrlen = 0;
277 int32_t sec_perturb;
278 int32_t usec_perturb;
280 u_char packet[512], *outpacket; /* last inbound (icmp) packet */
282 void decode_extensions(unsigned char *, int);
283 void dump_packet(void);
284 int wait_for_reply(int, struct sockaddr_in *, struct timeval *);
285 void send_probe(int, u_int8_t, int, struct sockaddr_in *);
286 int packet_ok(u_char *, int, struct sockaddr_in *, int, int);
287 const char *pr_type(u_int8_t);
288 void print(u_char *, int, struct sockaddr_in *);
289 char *inetname(struct in_addr);
290 u_short in_cksum(u_short *, int);
291 void usage(void);
293 int s; /* receive (icmp) socket file descriptor */
294 int sndsock; /* send (udp) socket file descriptor */
296 int datalen; /* How much data */
297 int headerlen; /* How long packet's header is */
299 char *source = 0;
300 char *hostname;
302 int nprobes = 3;
303 u_int8_t max_ttl = IPDEFTTL;
304 u_int8_t first_ttl = 1;
305 u_short ident;
306 u_short port = 32768+666; /* start udp dest port # for probe packets */
307 u_char proto = IPPROTO_UDP;
308 u_int8_t icmp_type = ICMP_ECHO; /* default ICMP code/type */
309 u_char icmp_code = 0;
310 int options; /* socket options */
311 int verbose;
312 int waittime = 5; /* time to wait for response (in seconds) */
313 int nflag; /* print addresses numerically */
314 int dump;
315 int Mflag; /* show MPLS labels if any */
318 main(int argc, char *argv[])
320 int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
321 int ttl_flag = 0, incflag = 1, protoset = 0, sump = 0;
322 int ch, i, lsrr = 0, on = 1, probe, seq = 0, tos = 0;
323 size_t size = sizeof(max_ttl);
324 struct sockaddr_in from, to;
325 struct hostent *hp;
326 u_int32_t tmprnd;
327 struct ip *ip;
328 u_int8_t ttl;
329 char *ep;
330 long l;
332 if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
333 err(5, "icmp socket");
334 if ((sndsock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
335 err(5, "raw socket");
337 /* revoke privs */
338 seteuid(getuid());
339 setuid(getuid());
341 sysctl(mib, sizeof(mib)/sizeof(mib[0]), &max_ttl, &size, NULL, 0);
343 while ((ch = getopt(argc, argv, "SDIdg:f:m:np:q:rs:t:w:vlP:cM")) != -1)
344 switch (ch) {
345 case 'S':
346 sump = 1;
347 break;
348 case 'f':
349 errno = 0;
350 ep = NULL;
351 l = strtol(optarg, &ep, 10);
352 if (errno || !*optarg || *ep || l < 1 || l > max_ttl)
353 errx(1, "min ttl must be 1 to %u.", max_ttl);
354 first_ttl = (u_int8_t)l;
355 break;
356 case 'c':
357 incflag = 0;
358 break;
359 case 'd':
360 options |= SO_DEBUG;
361 break;
362 case 'D':
363 dump = 1;
364 break;
365 case 'g':
366 if (lsrr >= MAX_LSRR)
367 errx(1, "too many gateways; max %d", MAX_LSRR);
368 if (inet_aton(optarg, &gateway[lsrr]) == 0) {
369 hp = gethostbyname(optarg);
370 if (hp == 0)
371 errx(1, "unknown host %s", optarg);
372 memcpy(&gateway[lsrr], hp->h_addr, hp->h_length);
374 if (++lsrr == 1)
375 lsrrlen = 4;
376 lsrrlen += 4;
377 break;
378 case 'I':
379 if (protoset)
380 errx(1, "protocol already set with -P");
381 protoset = 1;
382 proto = IPPROTO_ICMP;
383 break;
384 case 'l':
385 ttl_flag++;
386 break;
387 case 'm':
388 errno = 0;
389 ep = NULL;
390 l = strtol(optarg, &ep, 10);
391 if (errno || !*optarg || *ep || l < first_ttl ||
392 l > MAXTTL)
393 errx(1, "max ttl must be %u to %u.", first_ttl,
394 MAXTTL);
395 max_ttl = (u_int8_t)l;
396 break;
397 case 'M':
398 Mflag = 1;
399 break;
400 case 'n':
401 nflag++;
402 break;
403 case 'p':
404 errno = 0;
405 ep = NULL;
406 l = strtol(optarg, &ep, 10);
407 if (errno || !*optarg || *ep || l <= 0 || l >= 65536)
408 errx(1, "port must be >0, <65536.");
409 port = (int)l;
410 break;
411 case 'P':
412 if (protoset)
413 errx(1, "protocol already set with -I");
414 protoset = 1;
415 errno = 0;
416 ep = NULL;
417 l = strtol(optarg, &ep, 10);
418 if (errno || !*optarg || *ep || l < 1 ||
419 l >= IPPROTO_MAX) {
420 struct protoent *pent;
422 pent = getprotobyname(optarg);
423 if (pent)
424 proto = pent->p_proto;
425 else
426 errx(1, "proto must be >=1, or a name.");
427 } else
428 proto = (int)l;
429 break;
430 case 'q':
431 errno = 0;
432 ep = NULL;
433 l = strtol(optarg, &ep, 10);
434 if (errno || !*optarg || *ep || l < 1 || l > INT_MAX)
435 errx(1, "nprobes must be >0.");
436 nprobes = (int)l;
437 break;
438 case 'r':
439 options |= SO_DONTROUTE;
440 break;
441 case 's':
443 * set the ip source address of the outbound
444 * probe (e.g., on a multi-homed host).
446 source = optarg;
447 break;
448 case 't':
449 errno = 0;
450 ep = NULL;
451 l = strtol(optarg, &ep, 10);
452 if (errno || !*optarg || *ep || l < 0 || l > 255)
453 errx(1, "tos must be 0 to 255.");
454 tos = (int)l;
455 break;
456 case 'v':
457 verbose++;
458 break;
459 case 'w':
460 errno = 0;
461 ep = NULL;
462 l = strtol(optarg, &ep, 10);
463 if (errno || !*optarg || *ep || l <= 1 || l > INT_MAX)
464 errx(1, "wait must be >1 sec.");
465 waittime = (int)l;
466 break;
467 default:
468 usage();
470 argc -= optind;
471 argv += optind;
473 if (argc < 1)
474 usage();
476 setlinebuf (stdout);
478 memset(&to, 0, sizeof(struct sockaddr));
479 to.sin_family = AF_INET;
480 if (inet_aton(*argv, &to.sin_addr) != 0)
481 hostname = *argv;
482 else {
483 hp = gethostbyname(*argv);
484 if (hp == 0)
485 errx(1, "unknown host %s", *argv);
486 to.sin_family = hp->h_addrtype;
487 memcpy(&to.sin_addr, hp->h_addr, hp->h_length);
488 if ((hostname = strdup(hp->h_name)) == NULL)
489 err(1, "malloc");
490 if (hp->h_addr_list[1] != NULL)
491 warnx("Warning: %s has multiple addresses; using %s",
492 hostname, inet_ntoa(to.sin_addr));
494 if (*++argv) {
495 errno = 0;
496 ep = NULL;
497 l = strtol(*argv, &ep, 10);
498 if (errno || !*argv || *ep || l < 0 || l > INT_MAX)
499 errx(1, "datalen out of range");
500 datalen = (int)l;
503 switch (proto) {
504 case IPPROTO_UDP:
505 headerlen = (sizeof(struct ip) + lsrrlen +
506 sizeof(struct udphdr) + sizeof(struct packetdata));
507 break;
508 case IPPROTO_ICMP:
509 headerlen = (sizeof(struct ip) + lsrrlen +
510 sizeof(struct icmp) + sizeof(struct packetdata));
511 break;
512 default:
513 headerlen = (sizeof(struct ip) + lsrrlen +
514 sizeof(struct packetdata));
517 if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
518 errx(1, "packet size must be 0 to %d.",
519 IP_MAXPACKET - headerlen);
521 datalen += headerlen;
523 outpacket = (u_char *)malloc(datalen);
524 if (outpacket == NULL)
525 err(1, "malloc");
526 memset(outpacket, 0, datalen);
528 ip = (struct ip *)outpacket;
529 if (lsrr != 0) {
530 u_char *p = (u_char *)(ip + 1);
532 *p++ = IPOPT_NOP;
533 *p++ = IPOPT_LSRR;
534 *p++ = lsrrlen - 1;
535 *p++ = IPOPT_MINOFF;
536 gateway[lsrr] = to.sin_addr;
537 for (i = 1; i <= lsrr; i++) {
538 memcpy(p, &gateway[i], sizeof(struct in_addr));
539 p += sizeof(struct in_addr);
541 ip->ip_dst = gateway[0];
542 } else
543 ip->ip_dst = to.sin_addr;
544 ip->ip_off = htons(0);
545 ip->ip_hl = (sizeof(struct ip) + lsrrlen) >> 2;
546 ip->ip_p = proto;
547 ip->ip_v = IPVERSION;
548 ip->ip_tos = tos;
550 ident = (getpid() & 0xffff) | 0x8000;
551 tmprnd = arc4random();
552 sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
553 (tmprnd & 0x7ff);
554 usec_perturb = arc4random();
556 if (options & SO_DEBUG)
557 setsockopt(s, SOL_SOCKET, SO_DEBUG, (char *)&on, sizeof(on));
558 #ifdef SO_SNDBUF
559 if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&datalen,
560 sizeof(datalen)) < 0)
561 err(6, "SO_SNDBUF");
562 #endif /* SO_SNDBUF */
563 #ifdef IP_HDRINCL
564 if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
565 sizeof(on)) < 0)
566 err(6, "IP_HDRINCL");
567 #endif /* IP_HDRINCL */
568 if (options & SO_DEBUG)
569 setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
570 (char *)&on, sizeof(on));
571 if (options & SO_DONTROUTE)
572 setsockopt(sndsock, SOL_SOCKET, SO_DONTROUTE,
573 (char *)&on, sizeof(on));
575 if (source) {
576 memset(&from, 0, sizeof(struct sockaddr));
577 from.sin_family = AF_INET;
578 if (inet_aton(source, &from.sin_addr) == 0)
579 errx(1, "unknown host %s", source);
580 ip->ip_src = from.sin_addr;
581 if (getuid() != 0 &&
582 (ntohl(from.sin_addr.s_addr) & 0xff000000U) == 0x7f000000U &&
583 (ntohl(to.sin_addr.s_addr) & 0xff000000U) != 0x7f000000U)
584 errx(1, "source is on 127/8, destination is not");
586 if (getuid() &&
587 bind(sndsock, (struct sockaddr *)&from, sizeof(from)) < 0)
588 err(1, "bind");
591 fprintf(stderr, "traceroute to %s (%s)", hostname,
592 inet_ntoa(to.sin_addr));
593 if (source)
594 fprintf(stderr, " from %s", source);
595 fprintf(stderr, ", %u hops max, %d byte packets\n", max_ttl, datalen);
596 fflush(stderr);
598 if (first_ttl > 1)
599 printf("Skipping %u intermediate hops\n", first_ttl - 1);
601 for (ttl = first_ttl; ttl <= max_ttl; ++ttl) {
602 int got_there = 0, unreachable = 0, timeout = 0, loss;
603 int gotlastaddr = 0;
604 in_addr_t lastaddr = 0;
605 quad_t dt;
607 printf("%2u ", ttl);
608 for (probe = 0, loss = 0; probe < nprobes; ++probe) {
609 int cc;
610 struct timeval t1, t2;
611 int code;
613 gettimeofday(&t1, NULL);
614 send_probe(++seq, ttl, incflag, &to);
615 while ((cc = wait_for_reply(s, &from, &t1))) {
616 gettimeofday(&t2, NULL);
617 if (t2.tv_sec - t1.tv_sec > waittime) {
618 cc = 0;
619 break;
621 i = packet_ok(packet, cc, &from, seq, incflag);
622 /* Skip short packet */
623 if (i == 0)
624 continue;
625 if (!gotlastaddr ||
626 from.sin_addr.s_addr != lastaddr) {
627 if (gotlastaddr)
628 printf("\n ");
629 print(packet, cc, &from);
630 lastaddr = from.sin_addr.s_addr;
631 ++gotlastaddr;
633 dt = (quad_t)(t2.tv_sec - t1.tv_sec) * 1000000 +
634 (quad_t)(t2.tv_usec - t1.tv_usec);
635 printf(" %u", (u_int)(dt / 1000));
636 if (dt % 1000)
637 printf(".%u", (u_int)(dt % 1000));
638 printf(" ms");
639 ip = (struct ip *)packet;
640 if (ttl_flag)
641 printf(" (%u)", ip->ip_ttl);
642 if (i == -2) {
643 #ifndef ARCHAIC
644 ip = (struct ip *)packet;
645 if (ip->ip_ttl <= 1)
646 printf(" !");
647 #endif
648 ++got_there;
649 break;
651 /* time exceeded in transit */
652 if (i == -1)
653 break;
654 code = i - 1;
655 switch (code) {
656 case ICMP_UNREACH_PORT:
657 #ifndef ARCHAIC
658 ip = (struct ip *)packet;
659 if (ip->ip_ttl <= 1)
660 printf(" !");
661 #endif /* ARCHAIC */
662 ++got_there;
663 break;
664 case ICMP_UNREACH_NET:
665 ++unreachable;
666 printf(" !N");
667 break;
668 case ICMP_UNREACH_HOST:
669 ++unreachable;
670 printf(" !H");
671 break;
672 case ICMP_UNREACH_PROTOCOL:
673 ++got_there;
674 printf(" !P");
675 break;
676 case ICMP_UNREACH_NEEDFRAG:
677 ++unreachable;
678 printf(" !F");
679 break;
680 case ICMP_UNREACH_SRCFAIL:
681 ++unreachable;
682 printf(" !S");
683 break;
684 case ICMP_UNREACH_FILTER_PROHIB:
685 ++unreachable;
686 printf(" !X");
687 break;
688 case ICMP_UNREACH_NET_PROHIB: /*misuse*/
689 ++unreachable;
690 printf(" !A");
691 break;
692 case ICMP_UNREACH_HOST_PROHIB:
693 ++unreachable;
694 printf(" !C");
695 break;
696 case ICMP_UNREACH_NET_UNKNOWN:
697 case ICMP_UNREACH_HOST_UNKNOWN:
698 ++unreachable;
699 printf(" !U");
700 break;
701 case ICMP_UNREACH_ISOLATED:
702 ++unreachable;
703 printf(" !I");
704 break;
705 case ICMP_UNREACH_TOSNET:
706 case ICMP_UNREACH_TOSHOST:
707 ++unreachable;
708 printf(" !T");
709 break;
710 default:
711 ++unreachable;
712 printf(" !<%d>", i - 1);
713 break;
715 break;
717 if (cc == 0) {
718 printf(" *");
719 timeout++;
720 loss++;
722 else if (cc && probe == nprobes - 1 && Mflag)
723 decode_extensions(packet, cc);
724 fflush(stdout);
726 if (sump)
727 printf(" (%d%% loss)", (loss * 100) / nprobes);
728 putchar('\n');
729 if (got_there || (unreachable && (unreachable + timeout) >= nprobes))
730 break;
732 exit(0);
736 wait_for_reply(int sock, struct sockaddr_in *from, struct timeval *sent)
738 socklen_t fromlen = sizeof (*from);
739 struct timeval now, wait;
740 int cc = 0, fdsn;
741 fd_set *fdsp;
743 fdsn = howmany(sock+1, NFDBITS) * sizeof(fd_mask);
744 if ((fdsp = (fd_set *)malloc(fdsn)) == NULL)
745 err(1, "malloc");
746 memset(fdsp, 0, fdsn);
747 FD_SET(sock, fdsp);
748 gettimeofday(&now, NULL);
749 wait.tv_sec = (sent->tv_sec + waittime) - now.tv_sec;
750 wait.tv_usec = sent->tv_usec - now.tv_usec;
751 if (wait.tv_usec < 0) {
752 wait.tv_usec += 1000000;
753 wait.tv_sec--;
755 if (wait.tv_sec < 0)
756 wait.tv_sec = wait.tv_usec = 0;
758 if (select(sock+1, fdsp, NULL, NULL, &wait) > 0)
759 cc = recvfrom(s, (char *)packet, sizeof(packet), 0,
760 (struct sockaddr *)from, &fromlen);
762 free(fdsp);
763 return (cc);
766 void
767 decode_extensions(unsigned char *buf, int ip_len)
769 uint32_t *cmn_hdr;
770 struct icmp_ext_obj_hdr *obj_hdr;
771 uint32_t mpls_hdr;
772 int data_len, obj_len;
773 struct ip *ip;
775 ip = (struct ip *)buf;
777 if (ip_len <= (int)(sizeof(struct ip) + ICMP_EXT_OFFSET)) {
779 * No support for ICMP extensions on this host
781 return;
785 * Move forward to the start of the ICMP extensions, if present
787 buf += (ip->ip_hl << 2) + ICMP_EXT_OFFSET;
788 cmn_hdr = (uint32_t *)buf;
790 if (EXT_VERSION(ntohl(*cmn_hdr)) != ICMP_EXT_VERSION) {
792 * Unknown version
794 return;
797 data_len = ip_len - ((u_char *)cmn_hdr - (u_char *)ip);
800 * Check the checksum, cmn_hdr->checksum == 0 means no checksum'ing
801 * done by sender.
803 * If the checksum is ok, we'll get 0, as the checksum is calculated
804 * with the checksum field being 0'd.
806 if (EXT_CHECKSUM(ntohl(*cmn_hdr)) &&
807 in_cksum((u_short *)cmn_hdr, data_len)) {
808 return;
811 buf += sizeof(*cmn_hdr);
812 data_len -= sizeof(*cmn_hdr);
814 while (data_len >= (int)sizeof(struct icmp_ext_obj_hdr)) {
815 unsigned char *nextbuf;
817 obj_hdr = (struct icmp_ext_obj_hdr *)buf;
818 obj_len = ntohs(obj_hdr->length);
821 * Sanity check the length field
823 if (obj_len < (int)sizeof(*obj_hdr) || obj_len > data_len)
824 return;
826 /* Object has to be 4-byte aligned. */
827 if (obj_len & 3)
828 return;
830 nextbuf = buf + obj_len;
831 data_len -= obj_len;
834 * Move past the object header
836 buf += sizeof(struct icmp_ext_obj_hdr);
837 obj_len -= sizeof(struct icmp_ext_obj_hdr);
839 switch (obj_hdr->class_num) {
840 case MPLS_STACK_ENTRY_CLASS:
841 switch (obj_hdr->c_type) {
842 case MPLS_STACK_ENTRY_C_TYPE:
843 while (obj_len >= (int)sizeof(uint32_t)) {
844 mpls_hdr = ntohl(*(uint32_t *)buf);
846 buf += sizeof(uint32_t);
847 obj_len -= sizeof(uint32_t);
848 printf(" [MPLS: Label %d Exp %d]",
849 MPLS_LABEL(mpls_hdr),
850 MPLS_EXP(mpls_hdr));
852 if (obj_len > 0) {
854 * Something went wrong, and we're at
855 * a unknown offset into the packet,
856 * ditch the rest of it.
858 return;
860 break;
861 default:
863 * Unknown object, skip past it
865 buf = nextbuf;
866 break;
868 break;
870 default:
872 * Unknown object, skip past it
874 buf = nextbuf;
875 break;
880 void
881 dump_packet(void)
883 u_char *p;
884 int i;
886 fprintf(stderr, "packet data:");
887 for (p = outpacket, i = 0; i < datalen; i++) {
888 if ((i % 24) == 0)
889 fprintf(stderr, "\n ");
890 fprintf(stderr, " %02x", *p++);
892 fprintf(stderr, "\n");
895 void
896 send_probe(int seq, u_int8_t ttl, int iflag, struct sockaddr_in *to)
898 struct ip *ip = (struct ip *)outpacket;
899 u_char *p = (u_char *)(ip + 1);
900 struct udphdr *up = (struct udphdr *)(p + lsrrlen);
901 struct icmp *icmpp = (struct icmp *)(p + lsrrlen);
902 struct packetdata *op;
903 struct timeval tv;
904 int i;
906 ip->ip_len = datalen;
907 ip->ip_ttl = ttl;
908 ip->ip_id = htons(ident+seq);
910 switch (proto) {
911 case IPPROTO_ICMP:
912 icmpp->icmp_type = icmp_type;
913 icmpp->icmp_code = icmp_code;
914 icmpp->icmp_seq = htons(seq);
915 icmpp->icmp_id = htons(ident);
916 op = (struct packetdata *)(icmpp + 1);
917 break;
918 case IPPROTO_UDP:
919 up->uh_sport = htons(ident);
920 if (iflag)
921 up->uh_dport = htons(port+seq);
922 else
923 up->uh_dport = htons(port);
924 up->uh_ulen = htons((u_short)(datalen - sizeof(struct ip) -
925 lsrrlen));
926 up->uh_sum = 0;
927 op = (struct packetdata *)(up + 1);
928 break;
929 default:
930 op = (struct packetdata *)(ip + 1);
931 break;
933 op->seq = seq;
934 op->ttl = ttl;
937 * We don't want hostiles snooping the net to get any useful
938 * information about us. Send the timestamp in network byte order,
939 * and perturb the timestamp enough that they won't know our
940 * real clock ticker. We don't want to perturb the time by too
941 * much: being off by a suspiciously large amount might indicate
942 * OpenBSD.
944 * The timestamps in the packet are currently unused. If future
945 * work wants to use them they will have to subtract out the
946 * perturbation first.
948 gettimeofday(&tv, NULL);
949 op->sec = htonl(tv.tv_sec + sec_perturb);
950 op->usec = htonl((tv.tv_usec + usec_perturb) % 1000000);
952 if (proto == IPPROTO_ICMP && icmp_type == ICMP_ECHO) {
953 icmpp->icmp_cksum = 0;
954 icmpp->icmp_cksum = in_cksum((u_short *)icmpp,
955 datalen - sizeof(struct ip) - lsrrlen);
956 if (icmpp->icmp_cksum == 0)
957 icmpp->icmp_cksum = 0xffff;
960 if (dump)
961 dump_packet();
963 i = sendto(sndsock, outpacket, datalen, 0, (struct sockaddr *)to,
964 sizeof(struct sockaddr_in));
965 if (i < 0 || i != datalen) {
966 if (i < 0)
967 perror("sendto");
968 printf("traceroute: wrote %s %d chars, ret=%d\n", hostname,
969 datalen, i);
970 fflush(stdout);
974 static const char *ttab[] = {
975 "Echo Reply",
976 "ICMP 1",
977 "ICMP 2",
978 "Dest Unreachable",
979 "Source Quench",
980 "Redirect",
981 "ICMP 6",
982 "ICMP 7",
983 "Echo",
984 "Router Advert",
985 "Router Solicit",
986 "Time Exceeded",
987 "Param Problem",
988 "Timestamp",
989 "Timestamp Reply",
990 "Info Request",
991 "Info Reply",
992 "Mask Request",
993 "Mask Reply"
997 * Convert an ICMP "type" field to a printable string.
999 const char *
1000 pr_type(u_int8_t t)
1002 if (t > 18)
1003 return ("OUT-OF-RANGE");
1004 return (ttab[t]);
1008 packet_ok(u_char *buf, int cc, struct sockaddr_in *from, int seq, int iflag)
1010 struct icmp *icp;
1011 u_char code;
1012 u_int8_t type;
1013 int hlen;
1014 #ifndef ARCHAIC
1015 struct ip *ip;
1017 ip = (struct ip *) buf;
1018 hlen = ip->ip_hl << 2;
1019 if (cc < hlen + ICMP_MINLEN) {
1020 if (verbose)
1021 printf("packet too short (%d bytes) from %s\n", cc,
1022 inet_ntoa(from->sin_addr));
1023 return (0);
1025 cc -= hlen;
1026 icp = (struct icmp *)(buf + hlen);
1027 #else
1028 icp = (struct icmp *)buf;
1029 #endif /* ARCHAIC */
1030 type = icp->icmp_type;
1031 code = icp->icmp_code;
1032 if ((type == ICMP_TIMXCEED && code == ICMP_TIMXCEED_INTRANS) ||
1033 type == ICMP_UNREACH || type == ICMP_ECHOREPLY) {
1034 struct ip *hip;
1035 struct udphdr *up;
1036 struct icmp *icmpp;
1038 hip = &icp->icmp_ip;
1039 hlen = hip->ip_hl << 2;
1041 switch (proto) {
1042 case IPPROTO_ICMP:
1043 if (icmp_type == ICMP_ECHO &&
1044 type == ICMP_ECHOREPLY &&
1045 icp->icmp_id == htons(ident) &&
1046 icp->icmp_seq == htons(seq))
1047 return (-2); /* we got there */
1049 icmpp = (struct icmp *)((u_char *)hip + hlen);
1050 if (hlen + 8 <= cc && hip->ip_p == IPPROTO_ICMP &&
1051 icmpp->icmp_id == htons(ident) &&
1052 icmpp->icmp_seq == htons(seq))
1053 return (type == ICMP_TIMXCEED? -1 : code + 1);
1054 break;
1056 case IPPROTO_UDP:
1057 up = (struct udphdr *)((u_char *)hip + hlen);
1058 if (hlen + 12 <= cc && hip->ip_p == proto &&
1059 up->uh_sport == htons(ident) &&
1060 ((iflag && up->uh_dport == htons(port + seq)) ||
1061 (!iflag && up->uh_dport == htons(port))))
1062 return (type == ICMP_TIMXCEED? -1 : code + 1);
1063 break;
1064 default:
1065 /* this is some odd, user specified proto,
1066 * how do we check it?
1068 if (hip->ip_p == proto)
1069 return (type == ICMP_TIMXCEED? -1 : code + 1);
1072 #ifndef ARCHAIC
1073 if (verbose) {
1074 int i;
1075 in_addr_t *lp = (in_addr_t *)&icp->icmp_ip;
1077 printf("\n%d bytes from %s", cc, inet_ntoa(from->sin_addr));
1078 printf(" to %s", inet_ntoa(ip->ip_dst));
1079 printf(": icmp type %u (%s) code %d\n", type, pr_type(type),
1080 icp->icmp_code);
1081 for (i = 4; i < cc ; i += sizeof(in_addr_t))
1082 printf("%2d: x%8.8lx\n", i, (unsigned long)*lp++);
1084 #endif /* ARCHAIC */
1085 return (0);
1088 void
1089 print(u_char *buf, int cc, struct sockaddr_in *from)
1091 struct ip *ip;
1092 int hlen;
1094 ip = (struct ip *) buf;
1095 hlen = ip->ip_hl << 2;
1096 cc -= hlen;
1098 if (nflag)
1099 printf(" %s", inet_ntoa(from->sin_addr));
1100 else
1101 printf(" %s (%s)", inetname(from->sin_addr),
1102 inet_ntoa(from->sin_addr));
1104 if (verbose)
1105 printf(" %d bytes to %s", cc, inet_ntoa (ip->ip_dst));
1110 * Checksum routine for Internet Protocol family headers (C Version)
1112 u_short
1113 in_cksum(u_short *addr, int len)
1115 u_short *w = addr, answer;
1116 int nleft = len, sum = 0;
1119 * Our algorithm is simple, using a 32 bit accumulator (sum),
1120 * we add sequential 16 bit words to it, and at the end, fold
1121 * back all the carry bits from the top 16 bits into the lower
1122 * 16 bits.
1124 while (nleft > 1) {
1125 sum += *w++;
1126 nleft -= 2;
1129 /* mop up an odd byte, if necessary */
1130 if (nleft == 1)
1131 sum += *(u_char *)w;
1134 * add back carry outs from top 16 bits to low 16 bits
1136 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
1137 sum += (sum >> 16); /* add carry */
1138 answer = ~sum; /* truncate to 16 bits */
1139 return (answer);
1143 * Construct an Internet address representation.
1144 * If the nflag has been supplied, give
1145 * numeric value, otherwise try for symbolic name.
1147 char *
1148 inetname(struct in_addr in)
1150 static char domain[MAXHOSTNAMELEN], line[MAXHOSTNAMELEN];
1151 static int first = 1;
1152 struct hostent *hp;
1153 char *cp;
1155 if (first && !nflag) {
1156 first = 0;
1157 if (gethostname(domain, sizeof domain) == 0 &&
1158 (cp = strchr(domain, '.')) != NULL) {
1159 strlcpy(domain, cp + 1, sizeof(domain));
1162 if (!nflag && in.s_addr != INADDR_ANY) {
1163 hp = gethostbyaddr(&in, sizeof(in), AF_INET);
1164 if (hp != NULL) {
1165 if ((cp = strchr(hp->h_name, '.')) != NULL &&
1166 strcmp(cp + 1, domain) == 0)
1167 *cp = '\0';
1168 strlcpy(line, hp->h_name, sizeof(line));
1169 return (line);
1172 return (inet_ntoa(in));
1175 void
1176 usage(void)
1178 fprintf(stderr,
1179 "usage: %s [-cdDIlMnrSv] [-f first_ttl] [-g gateway_addr] [-m max_ttl]\n"
1180 "\t[-p port] [-P proto] [-q nqueries] [-s src_addr] [-t tos]\n"
1181 "\t[-w waittime] host [packetsize]\n", getprogname());
1182 exit(1);