ctdb-common: Fix aliasing issue in IPv6 checksum
[Samba.git] / ctdb / common / system_socket.c
blob75286795d775c940418487bd3556dcaf7b093d10
1 /*
2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
24 * Use BSD struct tcphdr field names for portability. Modern glibc
25 * makes them available by default via <netinet/tcp.h> but older glibc
26 * requires __FAVOR_BSD to be defined.
28 * __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
29 * (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
30 * set. Including "replace.h" above causes <features.h> to be
31 * indirectly included and this will not set __FAVOR_BSD because
32 * _GNU_SOURCE is set in Samba's "config.h" (which is included by
33 * "replace.h").
35 * Therefore, set __FAVOR_BSD by hand below.
37 #define __FAVOR_BSD 1
38 #include "system/network.h"
40 #ifdef HAVE_NETINET_IF_ETHER_H
41 #include <netinet/if_ether.h>
42 #endif
43 #ifdef HAVE_NETINET_IP6_H
44 #include <netinet/ip6.h>
45 #endif
46 #ifdef HAVE_NETINET_ICMP6_H
47 #include <netinet/icmp6.h>
48 #endif
49 #ifdef HAVE_LINUX_IF_PACKET_H
50 #include <linux/if_packet.h>
51 #endif
53 #ifndef ETHERTYPE_IP6
54 #define ETHERTYPE_IP6 0x86dd
55 #endif
57 #include "lib/util/debug.h"
58 #include "lib/util/blocking.h"
60 #include "protocol/protocol.h"
62 #include "common/logging.h"
63 #include "common/system_socket.h"
66 uint16 checksum for n bytes
68 static uint32_t uint16_checksum(uint16_t *data, size_t n)
70 uint32_t sum=0;
71 while (n>=2) {
72 sum += (uint32_t)ntohs(*data);
73 data++;
74 n -= 2;
76 if (n == 1) {
77 sum += (uint32_t)ntohs(*(uint8_t *)data);
79 return sum;
83 * See if the given IP is currently on an interface
85 bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
87 int s;
88 int ret;
89 ctdb_sock_addr __addr = *_addr;
90 ctdb_sock_addr *addr = &__addr;
91 socklen_t addrlen = 0;
93 switch (addr->sa.sa_family) {
94 case AF_INET:
95 addr->ip.sin_port = 0;
96 addrlen = sizeof(struct sockaddr_in);
97 break;
98 case AF_INET6:
99 addr->ip6.sin6_port = 0;
100 addrlen = sizeof(struct sockaddr_in6);
101 break;
104 s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
105 if (s == -1) {
106 return false;
109 ret = bind(s, (struct sockaddr *)addr, addrlen);
111 close(s);
112 return ret == 0;
115 static bool parse_ipv4(const char *s, unsigned port, struct sockaddr_in *sin)
117 sin->sin_family = AF_INET;
118 sin->sin_port = htons(port);
120 if (inet_pton(AF_INET, s, &sin->sin_addr) != 1) {
121 DBG_ERR("Failed to translate %s into sin_addr\n", s);
122 return false;
125 #ifdef HAVE_SOCK_SIN_LEN
126 sin->sin_len = sizeof(*sin);
127 #endif
128 return true;
131 static bool parse_ipv6(const char *s,
132 const char *ifaces,
133 unsigned port,
134 ctdb_sock_addr *saddr)
136 saddr->ip6.sin6_family = AF_INET6;
137 saddr->ip6.sin6_port = htons(port);
138 saddr->ip6.sin6_flowinfo = 0;
139 saddr->ip6.sin6_scope_id = 0;
141 if (inet_pton(AF_INET6, s, &saddr->ip6.sin6_addr) != 1) {
142 DBG_ERR("Failed to translate %s into sin6_addr\n", s);
143 return false;
146 if (ifaces && IN6_IS_ADDR_LINKLOCAL(&saddr->ip6.sin6_addr)) {
147 if (strchr(ifaces, ',')) {
148 DBG_ERR("Link local address %s "
149 "is specified for multiple ifaces %s\n",
150 s, ifaces);
151 return false;
153 saddr->ip6.sin6_scope_id = if_nametoindex(ifaces);
156 #ifdef HAVE_SOCK_SIN6_LEN
157 saddr->ip6.sin6_len = sizeof(*saddr);
158 #endif
159 return true;
162 static bool parse_ip(const char *addr,
163 const char *ifaces,
164 unsigned port,
165 ctdb_sock_addr *saddr)
167 char *p;
168 bool ret;
170 ZERO_STRUCTP(saddr); /* valgrind :-) */
173 * IPv4 or IPv6 address?
175 * Use rindex() because we need the right-most ':' below for
176 * IPv4-mapped IPv6 addresses anyway...
178 p = rindex(addr, ':');
179 if (p == NULL) {
180 ret = parse_ipv4(addr, port, &saddr->ip);
181 } else {
182 uint8_t ipv4_mapped_prefix[12] = {
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff
186 ret = parse_ipv6(addr, ifaces, port, saddr);
187 if (! ret) {
188 return ret;
192 * Check for IPv4-mapped IPv6 address
193 * (e.g. ::ffff:192.0.2.128) - reparse as IPv4 if
194 * necessary
196 if (memcmp(&saddr->ip6.sin6_addr.s6_addr[0],
197 ipv4_mapped_prefix,
198 sizeof(ipv4_mapped_prefix)) == 0) {
199 /* Reparse as IPv4 */
200 ret = parse_ipv4(p+1, port, &saddr->ip);
204 return ret;
208 * Parse an ip/mask pair
210 bool parse_ip_mask(const char *str,
211 const char *ifaces,
212 ctdb_sock_addr *addr,
213 unsigned *mask)
215 char *p;
216 char s[64]; /* Much longer than INET6_ADDRSTRLEN */
217 char *endp = NULL;
218 ssize_t len;
219 bool ret;
221 ZERO_STRUCT(*addr);
223 len = strlcpy(s, str, sizeof(s));
224 if (len >= sizeof(s)) {
225 DBG_ERR("Address %s is unreasonably long\n", str);
226 return false;
229 p = rindex(s, '/');
230 if (p == NULL) {
231 DBG_ERR("Address %s does not contain a mask\n", s);
232 return false;
235 *mask = strtoul(p+1, &endp, 10);
236 if (endp == NULL || *endp != 0) {
237 /* trailing garbage */
238 DBG_ERR("Trailing garbage after the mask in %s\n", s);
239 return false;
241 *p = 0;
244 /* now is this a ipv4 or ipv6 address ?*/
245 ret = parse_ip(s, ifaces, 0, addr);
247 return ret;
251 * simple TCP checksum - assumes data is multiple of 2 bytes long
253 static uint16_t ip_checksum(uint16_t *data, size_t n, struct ip *ip)
255 uint32_t sum = uint16_checksum(data, n);
256 uint16_t sum2;
258 sum += uint16_checksum((uint16_t *)&ip->ip_src, sizeof(ip->ip_src));
259 sum += uint16_checksum((uint16_t *)&ip->ip_dst, sizeof(ip->ip_dst));
260 sum += ip->ip_p + n;
261 sum = (sum & 0xFFFF) + (sum >> 16);
262 sum = (sum & 0xFFFF) + (sum >> 16);
263 sum2 = htons(sum);
264 sum2 = ~sum2;
265 if (sum2 == 0) {
266 return 0xFFFF;
268 return sum2;
271 static uint16_t ip6_checksum(uint16_t *data, size_t n, struct ip6_hdr *ip6)
273 uint16_t phdr[3];
274 uint32_t sum = 0;
275 uint16_t sum2;
276 uint32_t len;
278 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
279 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
281 len = htonl(n);
282 phdr[0] = len & UINT16_MAX;
283 phdr[1] = (len >> 16) & UINT16_MAX;
284 /* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
285 phdr[2] = htons(ip6->ip6_nxt);
286 sum += uint16_checksum(phdr, sizeof(phdr));
288 sum += uint16_checksum(data, n);
290 sum = (sum & 0xFFFF) + (sum >> 16);
291 sum = (sum & 0xFFFF) + (sum >> 16);
292 sum2 = htons(sum);
293 sum2 = ~sum2;
294 if (sum2 == 0) {
295 return 0xFFFF;
297 return sum2;
301 * Send gratuitous ARP request/reply or IPv6 neighbor advertisement
304 #ifdef HAVE_PACKETSOCKET
306 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
308 int s, ret;
309 struct sockaddr_ll sall;
310 struct ether_header *eh;
311 struct arphdr *ah;
312 struct ip6_hdr *ip6;
313 struct nd_neighbor_advert *nd_na;
314 struct nd_opt_hdr *nd_oh;
315 struct ifreq if_hwaddr;
316 /* Size of IPv6 neighbor advertisement (with option) */
317 unsigned char buffer[sizeof(struct ether_header) +
318 sizeof(struct ip6_hdr) +
319 sizeof(struct nd_neighbor_advert) +
320 sizeof(struct nd_opt_hdr) + ETH_ALEN];
321 char *ptr;
322 char bdcast[] = {0xff,0xff,0xff,0xff,0xff,0xff};
323 struct ifreq ifr;
325 ZERO_STRUCT(sall);
326 ZERO_STRUCT(ifr);
327 ZERO_STRUCT(if_hwaddr);
329 switch (addr->ip.sin_family) {
330 case AF_INET:
331 s = socket(AF_PACKET, SOCK_RAW, 0);
332 if (s == -1){
333 DBG_ERR("Failed to open raw socket\n");
334 return -1;
337 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
338 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
339 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
340 DBG_ERR("Interface '%s' not found\n", iface);
341 close(s);
342 return -1;
345 /* get the mac address */
346 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
347 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
348 if ( ret < 0 ) {
349 close(s);
350 DBG_ERR("ioctl failed\n");
351 return -1;
353 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
354 D_DEBUG("Ignoring loopback arp request\n");
355 close(s);
356 return 0;
358 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
359 close(s);
360 errno = EINVAL;
361 DBG_ERR("Not an ethernet address family (0x%x)\n",
362 if_hwaddr.ifr_hwaddr.sa_family);
363 return -1;
367 memset(buffer, 0 , 64);
368 eh = (struct ether_header *)buffer;
369 memset(eh->ether_dhost, 0xff, ETH_ALEN);
370 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
371 eh->ether_type = htons(ETHERTYPE_ARP);
373 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
374 ah->ar_hrd = htons(ARPHRD_ETHER);
375 ah->ar_pro = htons(ETH_P_IP);
376 ah->ar_hln = ETH_ALEN;
377 ah->ar_pln = 4;
379 /* send a gratious arp */
380 ah->ar_op = htons(ARPOP_REQUEST);
381 ptr = (char *)&ah[1];
382 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
383 ptr+=ETH_ALEN;
384 memcpy(ptr, &addr->ip.sin_addr, 4);
385 ptr+=4;
386 memset(ptr, 0, ETH_ALEN);
387 ptr+=ETH_ALEN;
388 memcpy(ptr, &addr->ip.sin_addr, 4);
389 ptr+=4;
391 sall.sll_family = AF_PACKET;
392 sall.sll_halen = 6;
393 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
394 sall.sll_protocol = htons(ETH_P_ALL);
395 sall.sll_ifindex = ifr.ifr_ifindex;
396 ret = sendto(s,buffer, 64, 0,
397 (struct sockaddr *)&sall, sizeof(sall));
398 if (ret < 0 ){
399 close(s);
400 DBG_ERR("Failed sendto\n");
401 return -1;
404 /* send unsolicited arp reply broadcast */
405 ah->ar_op = htons(ARPOP_REPLY);
406 ptr = (char *)&ah[1];
407 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
408 ptr+=ETH_ALEN;
409 memcpy(ptr, &addr->ip.sin_addr, 4);
410 ptr+=4;
411 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
412 ptr+=ETH_ALEN;
413 memcpy(ptr, &addr->ip.sin_addr, 4);
414 ptr+=4;
416 ret = sendto(s, buffer, 64, 0,
417 (struct sockaddr *)&sall, sizeof(sall));
418 if (ret < 0 ){
419 DBG_ERR("Failed sendto\n");
420 close(s);
421 return -1;
424 close(s);
425 break;
426 case AF_INET6:
427 s = socket(AF_PACKET, SOCK_RAW, 0);
428 if (s == -1){
429 DBG_ERR("Failed to open raw socket\n");
430 return -1;
433 DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
434 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
435 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
436 DBG_ERR("Interface '%s' not found\n", iface);
437 close(s);
438 return -1;
441 /* get the mac address */
442 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
443 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
444 if ( ret < 0 ) {
445 close(s);
446 DBG_ERR("ioctl failed\n");
447 return -1;
449 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
450 DBG_DEBUG("Ignoring loopback arp request\n");
451 close(s);
452 return 0;
454 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
455 close(s);
456 errno = EINVAL;
457 DBG_ERR("Not an ethernet address family (0x%x)\n",
458 if_hwaddr.ifr_hwaddr.sa_family);
459 return -1;
462 memset(buffer, 0 , sizeof(buffer));
463 eh = (struct ether_header *)buffer;
465 * Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
466 * section 7) - note zeroes above!
468 eh->ether_dhost[0] = eh->ether_dhost[1] = 0x33;
469 eh->ether_dhost[5] = 0x01;
470 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
471 eh->ether_type = htons(ETHERTYPE_IP6);
473 ip6 = (struct ip6_hdr *)(eh+1);
474 ip6->ip6_vfc = 0x60;
475 ip6->ip6_plen = htons(sizeof(*nd_na) +
476 sizeof(struct nd_opt_hdr) +
477 ETH_ALEN);
478 ip6->ip6_nxt = IPPROTO_ICMPV6;
479 ip6->ip6_hlim = 255;
480 ip6->ip6_src = addr->ip6.sin6_addr;
481 /* all-nodes multicast */
483 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
484 if (ret != 1) {
485 close(s);
486 DBG_ERR("Failed inet_pton\n");
487 return -1;
490 nd_na = (struct nd_neighbor_advert *)(ip6+1);
491 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
492 nd_na->nd_na_code = 0;
493 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
494 nd_na->nd_na_target = addr->ip6.sin6_addr;
495 /* Option: Target link-layer address */
496 nd_oh = (struct nd_opt_hdr *)(nd_na+1);
497 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
498 nd_oh->nd_opt_len = 1;
499 memcpy(&(nd_oh+1)[0], if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
501 nd_na->nd_na_cksum = ip6_checksum((uint16_t *)nd_na,
502 ntohs(ip6->ip6_plen), ip6);
504 sall.sll_family = AF_PACKET;
505 sall.sll_halen = 6;
506 memcpy(&sall.sll_addr[0], &eh->ether_dhost[0], sall.sll_halen);
507 sall.sll_protocol = htons(ETH_P_ALL);
508 sall.sll_ifindex = ifr.ifr_ifindex;
509 ret = sendto(s, buffer, sizeof(buffer),
510 0, (struct sockaddr *)&sall, sizeof(sall));
511 if (ret < 0 ){
512 close(s);
513 DBG_ERR("Failed sendto\n");
514 return -1;
517 close(s);
518 break;
519 default:
520 DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
521 addr->ip.sin_family);
522 return -1;
525 return 0;
528 #else /* HAVE_PACKETSOCKET */
530 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
532 /* Not implemented */
533 errno = ENOSYS;
534 return -1;
537 #endif /* HAVE_PACKETSOCKET */
540 * Send tcp segment from the specified IP/port to the specified
541 * destination IP/port.
543 * This is used to trigger the receiving host into sending its own ACK,
544 * which should trigger early detection of TCP reset by the client
545 * after IP takeover
547 * This can also be used to send RST segments (if rst is true) and also
548 * if correct seq and ack numbers are provided.
550 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
551 const ctdb_sock_addr *src,
552 uint32_t seq,
553 uint32_t ack,
554 int rst)
556 int s;
557 int ret;
558 uint32_t one = 1;
559 uint16_t tmpport;
560 ctdb_sock_addr *tmpdest;
561 struct {
562 struct ip ip;
563 struct tcphdr tcp;
564 } ip4pkt;
565 struct {
566 struct ip6_hdr ip6;
567 struct tcphdr tcp;
568 } ip6pkt;
569 int saved_errno;
571 switch (src->ip.sin_family) {
572 case AF_INET:
573 ZERO_STRUCT(ip4pkt);
574 ip4pkt.ip.ip_v = 4;
575 ip4pkt.ip.ip_hl = sizeof(ip4pkt.ip)/4;
576 ip4pkt.ip.ip_len = htons(sizeof(ip4pkt));
577 ip4pkt.ip.ip_ttl = 255;
578 ip4pkt.ip.ip_p = IPPROTO_TCP;
579 ip4pkt.ip.ip_src.s_addr = src->ip.sin_addr.s_addr;
580 ip4pkt.ip.ip_dst.s_addr = dest->ip.sin_addr.s_addr;
581 ip4pkt.ip.ip_sum = 0;
583 ip4pkt.tcp.th_sport = src->ip.sin_port;
584 ip4pkt.tcp.th_dport = dest->ip.sin_port;
585 ip4pkt.tcp.th_seq = seq;
586 ip4pkt.tcp.th_ack = ack;
587 ip4pkt.tcp.th_flags = 0;
588 ip4pkt.tcp.th_flags |= TH_ACK;
589 if (rst) {
590 ip4pkt.tcp.th_flags |= TH_RST;
592 ip4pkt.tcp.th_off = sizeof(ip4pkt.tcp)/4;
593 /* this makes it easier to spot in a sniffer */
594 ip4pkt.tcp.th_win = htons(1234);
595 ip4pkt.tcp.th_sum = ip_checksum((uint16_t *)&ip4pkt.tcp,
596 sizeof(ip4pkt.tcp),
597 &ip4pkt.ip);
599 /* open a raw socket to send this segment from */
600 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
601 if (s == -1) {
602 DBG_ERR("Failed to open raw socket (%s)\n",
603 strerror(errno));
604 return -1;
607 ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
608 if (ret != 0) {
609 DBG_ERR("Failed to setup IP headers (%s)\n",
610 strerror(errno));
611 close(s);
612 return -1;
615 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0,
616 (const struct sockaddr *)&dest->ip,
617 sizeof(dest->ip));
618 saved_errno = errno;
619 close(s);
620 if (ret != sizeof(ip4pkt)) {
621 D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
622 return -1;
624 break;
625 case AF_INET6:
626 ZERO_STRUCT(ip6pkt);
627 ip6pkt.ip6.ip6_vfc = 0x60;
628 ip6pkt.ip6.ip6_plen = htons(20);
629 ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
630 ip6pkt.ip6.ip6_hlim = 64;
631 ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
632 ip6pkt.ip6.ip6_dst = dest->ip6.sin6_addr;
634 ip6pkt.tcp.th_sport = src->ip6.sin6_port;
635 ip6pkt.tcp.th_dport = dest->ip6.sin6_port;
636 ip6pkt.tcp.th_seq = seq;
637 ip6pkt.tcp.th_ack = ack;
638 ip6pkt.tcp.th_flags = 0;
639 ip6pkt.tcp.th_flags |= TH_RST;
640 if (rst) {
641 ip6pkt.tcp.th_flags |= TH_RST;
643 ip6pkt.tcp.th_off = sizeof(ip6pkt.tcp)/4;
644 /* this makes it easier to spot in a sniffer */
645 ip6pkt.tcp.th_win = htons(1234);
646 ip6pkt.tcp.th_sum = ip6_checksum((uint16_t *)&ip6pkt.tcp,
647 sizeof(ip6pkt.tcp),
648 &ip6pkt.ip6);
650 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
651 if (s == -1) {
652 DBG_ERR("Failed to open sending socket\n");
653 return -1;
656 /* sendto() don't like if the port is set and the socket is
657 in raw mode.
659 tmpdest = discard_const(dest);
660 tmpport = tmpdest->ip6.sin6_port;
662 tmpdest->ip6.sin6_port = 0;
663 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0,
664 (const struct sockaddr *)&dest->ip6,
665 sizeof(dest->ip6));
666 saved_errno = errno;
667 tmpdest->ip6.sin6_port = tmpport;
668 close(s);
670 if (ret != sizeof(ip6pkt)) {
671 D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
672 return -1;
674 break;
676 default:
677 DBG_ERR("Not an ipv4/v6 address\n");
678 return -1;
681 return 0;
685 * Packet capture
687 * If AF_PACKET is available then use a raw socket otherwise use pcap.
688 * wscript has checked to make sure that pcap is available if needed.
691 #ifdef HAVE_AF_PACKET
694 * This function is used to open a raw socket to capture from
696 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
698 int s, ret;
700 /* Open a socket to capture all traffic */
701 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
702 if (s == -1) {
703 DBG_ERR("Failed to open raw socket\n");
704 return -1;
707 DBG_DEBUG("Created RAW SOCKET FD:%d for tcp tickle\n", s);
709 ret = set_blocking(s, false);
710 if (ret != 0) {
711 DBG_ERR("Failed to set socket non-blocking (%s)\n",
712 strerror(errno));
713 close(s);
714 return -1;
717 set_close_on_exec(s);
719 return s;
723 * This function is used to do any additional cleanup required when closing
724 * a capture socket.
725 * Note that the socket itself is closed automatically in the caller.
727 int ctdb_sys_close_capture_socket(void *private_data)
729 return 0;
734 * called when the raw socket becomes readable
736 int ctdb_sys_read_tcp_packet(int s, void *private_data,
737 ctdb_sock_addr *src,
738 ctdb_sock_addr *dst,
739 uint32_t *ack_seq,
740 uint32_t *seq,
741 int *rst,
742 uint16_t *window)
744 int ret;
745 #define RCVPKTSIZE 100
746 char pkt[RCVPKTSIZE];
747 struct ether_header *eth;
748 struct iphdr *ip;
749 struct ip6_hdr *ip6;
750 struct tcphdr *tcp;
752 ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
753 if (ret < sizeof(*eth)+sizeof(*ip)) {
754 return -1;
757 ZERO_STRUCTP(src);
758 ZERO_STRUCTP(dst);
760 /* Ethernet */
761 eth = (struct ether_header *)pkt;
763 /* we want either IPv4 or IPv6 */
764 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
765 /* IP */
766 ip = (struct iphdr *)(eth+1);
768 /* We only want IPv4 packets */
769 if (ip->version != 4) {
770 return -1;
772 /* Dont look at fragments */
773 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
774 return -1;
776 /* we only want TCP */
777 if (ip->protocol != IPPROTO_TCP) {
778 return -1;
781 /* make sure its not a short packet */
782 if (offsetof(struct tcphdr, th_ack) + 4 +
783 (ip->ihl*4) + sizeof(*eth) > ret) {
784 return -1;
786 /* TCP */
787 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
789 /* tell the caller which one we've found */
790 src->ip.sin_family = AF_INET;
791 src->ip.sin_addr.s_addr = ip->saddr;
792 src->ip.sin_port = tcp->th_sport;
793 dst->ip.sin_family = AF_INET;
794 dst->ip.sin_addr.s_addr = ip->daddr;
795 dst->ip.sin_port = tcp->th_dport;
796 *ack_seq = tcp->th_ack;
797 *seq = tcp->th_seq;
798 if (window != NULL) {
799 *window = tcp->th_win;
801 if (rst != NULL) {
802 *rst = tcp->th_flags & TH_RST;
805 return 0;
806 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
807 /* IP6 */
808 ip6 = (struct ip6_hdr *)(eth+1);
810 /* we only want TCP */
811 if (ip6->ip6_nxt != IPPROTO_TCP) {
812 return -1;
815 /* TCP */
816 tcp = (struct tcphdr *)(ip6+1);
818 /* tell the caller which one we've found */
819 src->ip6.sin6_family = AF_INET6;
820 src->ip6.sin6_port = tcp->th_sport;
821 src->ip6.sin6_addr = ip6->ip6_src;
823 dst->ip6.sin6_family = AF_INET6;
824 dst->ip6.sin6_port = tcp->th_dport;
825 dst->ip6.sin6_addr = ip6->ip6_dst;
827 *ack_seq = tcp->th_ack;
828 *seq = tcp->th_seq;
829 if (window != NULL) {
830 *window = tcp->th_win;
832 if (rst != NULL) {
833 *rst = tcp->th_flags & TH_RST;
836 return 0;
839 return -1;
842 #else /* HAVE_AF_PACKET */
844 #include <pcap.h>
846 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
848 pcap_t *pt;
850 pt=pcap_open_live(iface, 100, 0, 0, NULL);
851 if (pt == NULL) {
852 DBG_ERR("Failed to open capture device %s\n", iface);
853 return -1;
855 *((pcap_t **)private_data) = pt;
857 return pcap_fileno(pt);
860 int ctdb_sys_close_capture_socket(void *private_data)
862 pcap_t *pt = (pcap_t *)private_data;
863 pcap_close(pt);
864 return 0;
867 int ctdb_sys_read_tcp_packet(int s,
868 void *private_data,
869 ctdb_sock_addr *src,
870 ctdb_sock_addr *dst,
871 uint32_t *ack_seq,
872 uint32_t *seq,
873 int *rst,
874 uint16_t *window)
876 int ret;
877 struct ether_header *eth;
878 struct ip *ip;
879 struct ip6_hdr *ip6;
880 struct tcphdr *tcp;
881 struct ctdb_killtcp_connection *conn;
882 struct pcap_pkthdr pkthdr;
883 const u_char *buffer;
884 pcap_t *pt = (pcap_t *)private_data;
886 buffer=pcap_next(pt, &pkthdr);
887 if (buffer==NULL) {
888 return -1;
891 ZERO_STRUCTP(src);
892 ZERO_STRUCTP(dst);
894 /* Ethernet */
895 eth = (struct ether_header *)buffer;
897 /* we want either IPv4 or IPv6 */
898 if (eth->ether_type == htons(ETHERTYPE_IP)) {
899 /* IP */
900 ip = (struct ip *)(eth+1);
902 /* We only want IPv4 packets */
903 if (ip->ip_v != 4) {
904 return -1;
906 /* Dont look at fragments */
907 if ((ntohs(ip->ip_off)&0x1fff) != 0) {
908 return -1;
910 /* we only want TCP */
911 if (ip->ip_p != IPPROTO_TCP) {
912 return -1;
915 /* make sure its not a short packet */
916 if (offsetof(struct tcphdr, th_ack) + 4 +
917 (ip->ip_hl*4) > pkthdr.len) {
918 return -1;
920 /* TCP */
921 tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip);
923 /* tell the caller which one we've found */
924 src->ip.sin_family = AF_INET;
925 src->ip.sin_addr.s_addr = ip->ip_src.s_addr;
926 src->ip.sin_port = tcp->th_sport;
927 dst->ip.sin_family = AF_INET;
928 dst->ip.sin_addr.s_addr = ip->ip_dst.s_addr;
929 dst->ip.sin_port = tcp->th_dport;
930 *ack_seq = tcp->th_ack;
931 *seq = tcp->th_seq;
932 if (window != NULL) {
933 *window = tcp->th_win;
935 if (rst != NULL) {
936 *rst = tcp->th_flags & TH_RST;
939 return 0;
940 } else if (eth->ether_type == htons(ETHERTYPE_IP6)) {
941 /* IP6 */
942 ip6 = (struct ip6_hdr *)(eth+1);
944 /* we only want TCP */
945 if (ip6->ip6_nxt != IPPROTO_TCP) {
946 return -1;
949 /* TCP */
950 tcp = (struct tcphdr *)(ip6+1);
952 /* tell the caller which one we've found */
953 src->ip6.sin6_family = AF_INET6;
954 src->ip6.sin6_port = tcp->th_sport;
955 src->ip6.sin6_addr = ip6->ip6_src;
957 dst->ip6.sin6_family = AF_INET6;
958 dst->ip6.sin6_port = tcp->th_dport;
959 dst->ip6.sin6_addr = ip6->ip6_dst;
961 *ack_seq = tcp->th_ack;
962 *seq = tcp->th_seq;
963 if (window != NULL) {
964 *window = tcp->th_win;
966 if (rst != NULL) {
967 *rst = tcp->th_flags & TH_RST;
970 return 0;
973 return -1;
976 #endif /* HAVE_AF_PACKET */