VERSION: Disable GIT_SNAPSHOT for the 4.8.12 release.
[Samba.git] / ctdb / common / system_linux.c
blobfa77a45460f854c1ca84d69d9bca1b47cfbf7d63
1 /*
2 ctdb system specific code to manage raw sockets on linux
4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
26 #include "lib/util/debug.h"
27 #include "lib/util/blocking.h"
29 #include "protocol/protocol.h"
31 #include <netinet/if_ether.h>
32 #include <netinet/ip6.h>
33 #include <netinet/icmp6.h>
34 #include <net/if_arp.h>
35 #include <netpacket/packet.h>
36 #include <sys/prctl.h>
38 #include "common/logging.h"
39 #include "common/system.h"
41 #ifndef ETHERTYPE_IP6
42 #define ETHERTYPE_IP6 0x86dd
43 #endif
46 calculate the tcp checksum for tcp over ipv6
48 static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
50 uint32_t phdr[2];
51 uint32_t sum = 0;
52 uint16_t sum2;
54 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
55 sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
57 phdr[0] = htonl(n);
58 phdr[1] = htonl(ip6->ip6_nxt);
59 sum += uint16_checksum((uint16_t *)phdr, 8);
61 sum += uint16_checksum(data, n);
63 sum = (sum & 0xFFFF) + (sum >> 16);
64 sum = (sum & 0xFFFF) + (sum >> 16);
65 sum2 = htons(sum);
66 sum2 = ~sum2;
67 if (sum2 == 0) {
68 return 0xFFFF;
70 return sum2;
74 send gratuitous arp reply after we have taken over an ip address
76 saddr is the address we are trying to claim
77 iface is the interface name we will be using to claim the address
79 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
81 int s, ret;
82 struct sockaddr_ll sall;
83 struct ether_header *eh;
84 struct arphdr *ah;
85 struct ip6_hdr *ip6;
86 struct nd_neighbor_advert *nd_na;
87 struct nd_opt_hdr *nd_oh;
88 struct ifreq if_hwaddr;
89 /* Size of IPv6 neighbor advertisement (with option) */
90 unsigned char buffer[sizeof(struct ether_header) +
91 sizeof(struct ip6_hdr) +
92 sizeof(struct nd_neighbor_advert) +
93 sizeof(struct nd_opt_hdr) + ETH_ALEN];
94 char *ptr;
95 char bdcast[] = {0xff,0xff,0xff,0xff,0xff,0xff};
96 struct ifreq ifr;
98 ZERO_STRUCT(sall);
99 ZERO_STRUCT(ifr);
100 ZERO_STRUCT(if_hwaddr);
102 switch (addr->ip.sin_family) {
103 case AF_INET:
104 s = socket(AF_PACKET, SOCK_RAW, 0);
105 if (s == -1){
106 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
107 return -1;
110 DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
111 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
112 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
113 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
114 close(s);
115 return -1;
118 /* get the mac address */
119 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
120 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
121 if ( ret < 0 ) {
122 close(s);
123 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
124 return -1;
126 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
127 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
128 close(s);
129 return 0;
131 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
132 close(s);
133 errno = EINVAL;
134 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
135 if_hwaddr.ifr_hwaddr.sa_family));
136 return -1;
140 memset(buffer, 0 , 64);
141 eh = (struct ether_header *)buffer;
142 memset(eh->ether_dhost, 0xff, ETH_ALEN);
143 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
144 eh->ether_type = htons(ETHERTYPE_ARP);
146 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
147 ah->ar_hrd = htons(ARPHRD_ETHER);
148 ah->ar_pro = htons(ETH_P_IP);
149 ah->ar_hln = ETH_ALEN;
150 ah->ar_pln = 4;
152 /* send a gratious arp */
153 ah->ar_op = htons(ARPOP_REQUEST);
154 ptr = (char *)&ah[1];
155 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
156 ptr+=ETH_ALEN;
157 memcpy(ptr, &addr->ip.sin_addr, 4);
158 ptr+=4;
159 memset(ptr, 0, ETH_ALEN);
160 ptr+=ETH_ALEN;
161 memcpy(ptr, &addr->ip.sin_addr, 4);
162 ptr+=4;
164 sall.sll_family = AF_PACKET;
165 sall.sll_halen = 6;
166 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
167 sall.sll_protocol = htons(ETH_P_ALL);
168 sall.sll_ifindex = ifr.ifr_ifindex;
169 ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
170 if (ret < 0 ){
171 close(s);
172 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
173 return -1;
176 /* send unsolicited arp reply broadcast */
177 ah->ar_op = htons(ARPOP_REPLY);
178 ptr = (char *)&ah[1];
179 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
180 ptr+=ETH_ALEN;
181 memcpy(ptr, &addr->ip.sin_addr, 4);
182 ptr+=4;
183 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
184 ptr+=ETH_ALEN;
185 memcpy(ptr, &addr->ip.sin_addr, 4);
186 ptr+=4;
188 ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
189 if (ret < 0 ){
190 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
191 close(s);
192 return -1;
195 close(s);
196 break;
197 case AF_INET6:
198 s = socket(AF_PACKET, SOCK_RAW, 0);
199 if (s == -1){
200 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
201 return -1;
204 DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
205 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
206 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
207 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
208 close(s);
209 return -1;
212 /* get the mac address */
213 strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
214 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
215 if ( ret < 0 ) {
216 close(s);
217 DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
218 return -1;
220 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
221 DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
222 close(s);
223 return 0;
225 if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
226 close(s);
227 errno = EINVAL;
228 DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
229 if_hwaddr.ifr_hwaddr.sa_family));
230 return -1;
233 memset(buffer, 0 , sizeof(buffer));
234 eh = (struct ether_header *)buffer;
235 /* Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
236 * section 7) - note zeroes above! */
237 eh->ether_dhost[0] = eh->ether_dhost[1] = 0x33;
238 eh->ether_dhost[5] = 0x01;
239 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
240 eh->ether_type = htons(ETHERTYPE_IP6);
242 ip6 = (struct ip6_hdr *)(eh+1);
243 ip6->ip6_vfc = 0x60;
244 ip6->ip6_plen = htons(sizeof(*nd_na) +
245 sizeof(struct nd_opt_hdr) +
246 ETH_ALEN);
247 ip6->ip6_nxt = IPPROTO_ICMPV6;
248 ip6->ip6_hlim = 255;
249 ip6->ip6_src = addr->ip6.sin6_addr;
250 /* all-nodes multicast */
252 ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
253 if (ret != 1) {
254 close(s);
255 DEBUG(DEBUG_CRIT,(__location__ " failed inet_pton\n"));
256 return -1;
259 nd_na = (struct nd_neighbor_advert *)(ip6+1);
260 nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
261 nd_na->nd_na_code = 0;
262 nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
263 nd_na->nd_na_target = addr->ip6.sin6_addr;
264 /* Option: Target link-layer address */
265 nd_oh = (struct nd_opt_hdr *)(nd_na+1);
266 nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
267 nd_oh->nd_opt_len = 1;
268 memcpy(&(nd_oh+1)[0], if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
270 nd_na->nd_na_cksum = tcp_checksum6((uint16_t *)nd_na,
271 ntohs(ip6->ip6_plen), ip6);
273 sall.sll_family = AF_PACKET;
274 sall.sll_halen = 6;
275 memcpy(&sall.sll_addr[0], &eh->ether_dhost[0], sall.sll_halen);
276 sall.sll_protocol = htons(ETH_P_ALL);
277 sall.sll_ifindex = ifr.ifr_ifindex;
278 ret = sendto(s, buffer, sizeof(buffer),
279 0, (struct sockaddr *)&sall, sizeof(sall));
280 if (ret < 0 ){
281 close(s);
282 DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
283 return -1;
286 close(s);
287 break;
288 default:
289 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/ipv6 address (family is %u)\n", addr->ip.sin_family));
290 return -1;
293 return 0;
298 simple TCP checksum - assumes data is multiple of 2 bytes long
300 static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
302 uint32_t sum = uint16_checksum(data, n);
303 uint16_t sum2;
304 sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
305 sizeof(ip->saddr));
306 sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
307 sizeof(ip->daddr));
308 sum += ip->protocol + n;
309 sum = (sum & 0xFFFF) + (sum >> 16);
310 sum = (sum & 0xFFFF) + (sum >> 16);
311 sum2 = htons(sum);
312 sum2 = ~sum2;
313 if (sum2 == 0) {
314 return 0xFFFF;
316 return sum2;
320 Send tcp segment from the specified IP/port to the specified
321 destination IP/port.
323 This is used to trigger the receiving host into sending its own ACK,
324 which should trigger early detection of TCP reset by the client
325 after IP takeover
327 This can also be used to send RST segments (if rst is true) and also
328 if correct seq and ack numbers are provided.
330 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
331 const ctdb_sock_addr *src,
332 uint32_t seq, uint32_t ack, int rst)
334 int s;
335 int ret;
336 uint32_t one = 1;
337 uint16_t tmpport;
338 ctdb_sock_addr *tmpdest;
339 struct {
340 struct iphdr ip;
341 struct tcphdr tcp;
342 } ip4pkt;
343 struct {
344 struct ip6_hdr ip6;
345 struct tcphdr tcp;
346 } ip6pkt;
347 int saved_errno;
349 switch (src->ip.sin_family) {
350 case AF_INET:
351 ZERO_STRUCT(ip4pkt);
352 ip4pkt.ip.version = 4;
353 ip4pkt.ip.ihl = sizeof(ip4pkt.ip)/4;
354 ip4pkt.ip.tot_len = htons(sizeof(ip4pkt));
355 ip4pkt.ip.ttl = 255;
356 ip4pkt.ip.protocol = IPPROTO_TCP;
357 ip4pkt.ip.saddr = src->ip.sin_addr.s_addr;
358 ip4pkt.ip.daddr = dest->ip.sin_addr.s_addr;
359 ip4pkt.ip.check = 0;
361 ip4pkt.tcp.source = src->ip.sin_port;
362 ip4pkt.tcp.dest = dest->ip.sin_port;
363 ip4pkt.tcp.seq = seq;
364 ip4pkt.tcp.ack_seq = ack;
365 ip4pkt.tcp.ack = 1;
366 if (rst) {
367 ip4pkt.tcp.rst = 1;
369 ip4pkt.tcp.doff = sizeof(ip4pkt.tcp)/4;
370 /* this makes it easier to spot in a sniffer */
371 ip4pkt.tcp.window = htons(1234);
372 ip4pkt.tcp.check = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
374 /* open a raw socket to send this segment from */
375 s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
376 if (s == -1) {
377 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket (%s)\n",
378 strerror(errno)));
379 return -1;
382 ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
383 if (ret != 0) {
384 DEBUG(DEBUG_CRIT,(__location__ " failed to setup IP headers (%s)\n",
385 strerror(errno)));
386 close(s);
387 return -1;
390 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0,
391 (const struct sockaddr *)&dest->ip,
392 sizeof(dest->ip));
393 saved_errno = errno;
394 close(s);
395 if (ret != sizeof(ip4pkt)) {
396 DEBUG(DEBUG_ERR,
397 ("Failed sendto (%s)\n", strerror(saved_errno)));
398 return -1;
400 break;
401 case AF_INET6:
402 ZERO_STRUCT(ip6pkt);
403 ip6pkt.ip6.ip6_vfc = 0x60;
404 ip6pkt.ip6.ip6_plen = htons(20);
405 ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
406 ip6pkt.ip6.ip6_hlim = 64;
407 ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
408 ip6pkt.ip6.ip6_dst = dest->ip6.sin6_addr;
410 ip6pkt.tcp.source = src->ip6.sin6_port;
411 ip6pkt.tcp.dest = dest->ip6.sin6_port;
412 ip6pkt.tcp.seq = seq;
413 ip6pkt.tcp.ack_seq = ack;
414 ip6pkt.tcp.ack = 1;
415 if (rst) {
416 ip6pkt.tcp.rst = 1;
418 ip6pkt.tcp.doff = sizeof(ip6pkt.tcp)/4;
419 /* this makes it easier to spot in a sniffer */
420 ip6pkt.tcp.window = htons(1234);
421 ip6pkt.tcp.check = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
423 s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
424 if (s == -1) {
425 DEBUG(DEBUG_CRIT, (__location__ " Failed to open sending socket\n"));
426 return -1;
429 /* sendto() don't like if the port is set and the socket is
430 in raw mode.
432 tmpdest = discard_const(dest);
433 tmpport = tmpdest->ip6.sin6_port;
435 tmpdest->ip6.sin6_port = 0;
436 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0,
437 (const struct sockaddr *)&dest->ip6,
438 sizeof(dest->ip6));
439 saved_errno = errno;
440 tmpdest->ip6.sin6_port = tmpport;
441 close(s);
443 if (ret != sizeof(ip6pkt)) {
444 DEBUG(DEBUG_ERR,
445 ("Failed sendto (%s)\n", strerror(saved_errno)));
446 return -1;
448 break;
450 default:
451 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/v6 address\n"));
452 return -1;
455 return 0;
459 This function is used to open a raw socket to capture from
461 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
463 int s, ret;
465 /* Open a socket to capture all traffic */
466 s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
467 if (s == -1) {
468 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
469 return -1;
472 DEBUG(DEBUG_DEBUG, (__location__ " Created RAW SOCKET FD:%d for tcp tickle\n", s));
474 ret = set_blocking(s, false);
475 if (ret != 0) {
476 DEBUG(DEBUG_ERR,
477 (__location__
478 " failed to set socket non-blocking (%s)\n",
479 strerror(errno)));
480 close(s);
481 return -1;
484 set_close_on_exec(s);
486 return s;
490 This function is used to do any additional cleanup required when closing
491 a capture socket.
492 Note that the socket itself is closed automatically in the caller.
494 int ctdb_sys_close_capture_socket(void *private_data)
496 return 0;
501 called when the raw socket becomes readable
503 int ctdb_sys_read_tcp_packet(int s, void *private_data,
504 ctdb_sock_addr *src, ctdb_sock_addr *dst,
505 uint32_t *ack_seq, uint32_t *seq,
506 int *rst, uint16_t *window)
508 int ret;
509 #define RCVPKTSIZE 100
510 char pkt[RCVPKTSIZE];
511 struct ether_header *eth;
512 struct iphdr *ip;
513 struct ip6_hdr *ip6;
514 struct tcphdr *tcp;
516 ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
517 if (ret < sizeof(*eth)+sizeof(*ip)) {
518 return -1;
521 ZERO_STRUCTP(src);
522 ZERO_STRUCTP(dst);
524 /* Ethernet */
525 eth = (struct ether_header *)pkt;
527 /* we want either IPv4 or IPv6 */
528 if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
529 /* IP */
530 ip = (struct iphdr *)(eth+1);
532 /* We only want IPv4 packets */
533 if (ip->version != 4) {
534 return -1;
536 /* Dont look at fragments */
537 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
538 return -1;
540 /* we only want TCP */
541 if (ip->protocol != IPPROTO_TCP) {
542 return -1;
545 /* make sure its not a short packet */
546 if (offsetof(struct tcphdr, ack_seq) + 4 +
547 (ip->ihl*4) + sizeof(*eth) > ret) {
548 return -1;
550 /* TCP */
551 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
553 /* tell the caller which one we've found */
554 src->ip.sin_family = AF_INET;
555 src->ip.sin_addr.s_addr = ip->saddr;
556 src->ip.sin_port = tcp->source;
557 dst->ip.sin_family = AF_INET;
558 dst->ip.sin_addr.s_addr = ip->daddr;
559 dst->ip.sin_port = tcp->dest;
560 *ack_seq = tcp->ack_seq;
561 *seq = tcp->seq;
562 if (window != NULL) {
563 *window = tcp->window;
565 if (rst != NULL) {
566 *rst = tcp->rst;
569 return 0;
570 } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
571 /* IP6 */
572 ip6 = (struct ip6_hdr *)(eth+1);
574 /* we only want TCP */
575 if (ip6->ip6_nxt != IPPROTO_TCP) {
576 return -1;
579 /* TCP */
580 tcp = (struct tcphdr *)(ip6+1);
582 /* tell the caller which one we've found */
583 src->ip6.sin6_family = AF_INET6;
584 src->ip6.sin6_port = tcp->source;
585 src->ip6.sin6_addr = ip6->ip6_src;
587 dst->ip6.sin6_family = AF_INET6;
588 dst->ip6.sin6_port = tcp->dest;
589 dst->ip6.sin6_addr = ip6->ip6_dst;
591 *ack_seq = tcp->ack_seq;
592 *seq = tcp->seq;
593 if (window != NULL) {
594 *window = tcp->window;
596 if (rst != NULL) {
597 *rst = tcp->rst;
600 return 0;
603 return -1;
607 bool ctdb_sys_check_iface_exists(const char *iface)
609 int s;
610 struct ifreq ifr;
612 s = socket(AF_PACKET, SOCK_RAW, 0);
613 if (s == -1){
614 /* We don't know if the interface exists, so assume yes */
615 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
616 return true;
619 strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
620 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0 && errno == ENODEV) {
621 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
622 close(s);
623 return false;
625 close(s);
627 return true;
630 int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
632 struct ucred cr;
633 socklen_t crl = sizeof(struct ucred);
634 int ret;
635 if ((ret = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl)) == 0) {
636 *peer_pid = cr.pid;
638 return ret;