Project.pm: tighten up is_empty check
[girocco/readme.git] / src / peek_packet.c
blob1316ea058662a7cface75d45fa309d4a22607b49
1 /*
3 peek_packet.c -- peek_packet utility to peek at incoming git-daemon request
4 Copyright (C) 2015,2020 Kyle J. McKay. All rights reserved.
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 This utility is intended to be used by a script front end to git daemon
24 running in inetd mode. The first thing the script does is call this utility
25 which attempts to peek the first incoming Git packet off the connection
26 and then output contents after the initial 4-character hex length upto but
27 excluding the first \0 character.
29 At that point the script can validate the incoming request and if it chooses
30 to allow it then exec git daemon to process it. Since the packet was peeked
31 it's still there to be read by git daemon.
33 Note that there is a hard-coded timeout of 30 seconds and a hard-coded limit
34 of PATH_MAX for the length of the initial packet.
36 On failure a non-zero exit code is returned. On success a 0 exit code is
37 returned and peeked text is output to stdout.
39 The connection to be peeked must be fd 0 and will have SO_KEEPALIVE set on it.
41 This utility does not take any arguments and ignores any that are given.
43 The output of a successful peek should be one of these:
45 git-upload-pack /<...>
46 git-upload-archive /<...>
47 git-receive-pack /<...>
49 where "<...>" is replaced with the repository path so, for example, doing
50 "git ls-remote git://example.com/foo.git" would result in this output:
52 git-upload-pack /foo.git
54 Note that the first character could be a ~ instead of a /, but it's
55 probably best to reject those.
57 The output is guaranteed to not contain any bytes with a value less than
58 %x20 except for possibly tab (%x09) characters. Any request that does
59 will produce no output and return a non-zero exit code.
61 If the extra optional "host=" parameter is present, then an additional
62 second line is output in the format:
64 host=<hostname>
66 where <hostname> is the host name only from the extra "host=" parameter
67 that's been lowercased and had a trailing "." removed (but only if that
68 doesn't create the empty string) and had any surrounding '[' and ']' removed
69 from a literal IPv6 address. The <hostname> is guaranteed to only contain
70 bytes in the range %x21-%xFF.
72 If the extra optional "host=" parameter contains a ":" <portnum> suffix
73 then an additional third line will be output of the format:
75 port=<portnum>
77 If just the ":" was present <portnum> will be the empty string otherwise
78 it's guaranteed to be a decimal number with no leading zeros in the range
79 1..65535.
81 For example, this git command:
83 git ls-remote git://example.com/repo.git
85 Will result in peek_packet producing these two lines (unless a very, very
86 old version of Git was used in which case only the first line):
88 git-upload-pack /repo.git
89 host=example.com
91 This git command:
93 git ls-remote git://[::1]:8765/repo.git
95 Will result in peek_packet producing these three lines (unless a very, very
96 old version of Git was used in which case only the first line):
98 git-upload-pack /repo.git
99 host=::1
100 port=8765
102 If the incoming packet looks invalid (or a timeout occurs) then no output
103 is produced, but a non-zero exit code is set.
105 If the remote address is available (getpeername) then two lines of the form
107 remote_addr=<address>
108 remote_port=<port>
110 will be output where <address> is either a dotted IPv4 or an IPv6
111 (without brackets) and <port> is a decimal number with no leading zeros
112 in the range 1..65535.
114 If the local address is available (getsockname) then two lines of the form
116 server_addr=<address>
117 server_port=<port>
119 will be output where <address> is in the same format as remote_addr and
120 <port> is in the same format as remote_port.
126 ;; The Git packet protocol is defined as follows in RFC 5234+7405 syntax
129 BYTE = %x00-FF
131 DIGIT = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9"
133 ; Note that hexdigits are case insensitive
134 HEX-DIGIT = DIGIT / "a" / "b" / "c" / "d" / "e" / "f"
136 PKT-LENGTH = 4HEX-DIGIT ; represents a hexadecimal big-endian non-negative
137 ; value. a length of "0002" or "0003" is invalid.
138 ; lengths "0000" and "0001" have special meaning.
140 PKT-DATA = *BYTE ; first 4 <BYTE>s MUST NOT be %s"ERR "
142 PKT = FLUSH-PKT / DELIM-PKT / ERR-PKT / PROTOCOL-PKT
144 FLUSH-PKT = "0000"
146 DELIM-PKT = "0001"
148 PROTOCOL-PKT = PKT-LENGTH PKT-DATA ; PKT-DATA must contain exactly
149 ; PKT-LENGTH - 4 bytes
151 ERR-PKT = PKT-LENGTH ERR-DATA; ERR-DATA must contain exactly PKT-LENGTH - 4 bytes
153 ERR-DATA = %s"ERR " *BYTE ; Note that "ERR " *IS* case sensitive
156 ;; The first packet sent by a client connecting to a "Git Transport" server
157 ;; has the <GIT-REQUEST-PKT> format
160 GIT-REQUEST-PKT = PKT-LENGTH GIT-REQUEST-DATA ; GIT-REQUEST-DATA must contain
161 ; exactly PKT-LENGTH - 4 bytes
163 ; Normally if %x0A is present it's the final byte (very old Git versions)
164 ; Normally if %x00 is present then %x0A is not (modern Git versions)
165 ; But the current Git versions do parse the %x0A.00 form correctly
166 GIT-REQUEST-DATA = GIT-COMMAND [EXTRA-ARGS]
168 GIT-COMMAND = REQUEST-COMMAND %20 PATHNAME [%0A]
170 ; these are all case sensitive
171 REQUEST-COMMAND = %s"git-upload-pack" /
172 %s"git-receive-pack" /
173 %s"git-upload-archive"
175 PATHNAME = NON-NULL-BYTES
177 EXTRA-ARGS = %x00 HOST-ARG-TRUNCATED /
178 %x00 [HOST-ARG] [%x00 EXTRA-PARMS]
180 HOST-ARG-TRUNCATED = HOST-PARAM HOST-NAME [ ":" [PORTNUM] ]
182 HOST-ARG = HOST-ARG-TRUNCATED %x00
184 ; "host=" is case insensitive
185 HOST-PARAM = "host="
187 HOST-NAME = NON-NULL-BYTES ; should be a valid DNS name
188 ; or IPv4 literal
189 ; or "[" IPv6 literal "]"
190 ; a ":" is only allowed between
191 ; the "[" and "]" of an IPv6 literal
193 ; PORTNUM matches 1..65535 with no leading zeros allowed
194 PORTNUM = ( "1" / "2" / "3" / "4" / "5" ) *4DIGIT /
195 "6" /
196 "6" ( "0" / "1" / "2" / "3" / "4" ) *3DIGIT /
197 "65" ( "0" / "1" / "2" / "3" / "4" ) *2DIGIT /
198 "655" ( "0" / "1" / "2" ) *1DIGIT /
199 "6553" ( "0" / "1" / "2" / "3" / "4" / "5" ) /
200 "6" ( "6" / "7" / "8" / "9" ) *2DIGIT /
201 ( "7" / "8" / "9" ) *3DIGIT
203 EXTRA-PARAMS = *EXTRA-PARAM [EXTRA-PARAM-TRUNCATED]
205 EXTRA-PARAM-TRUNCATED = NON-NULL-BYTES
207 EXTRA-PARAM = EXTRA-PARAM-TRUNCATED %x00
209 NON-NULL-BYTES = *NON-NULL-BYTE
211 NON-NULL-BYTE = %x01-FF
215 #define HAVE_STDINT_H 1
216 #define HAVE_IPV6 1
217 #define HAVE_INET_NTOP 1
218 #define HAVE_SNPRINTF 1
219 #ifdef CONFIG_H
220 #include CONFIG_H
221 #endif
223 #include <stddef.h>
224 #ifdef HAVE_STDINT_H
225 #include <stdint.h>
226 #endif
227 #include <stdio.h>
228 #include <string.h>
229 #include <stdlib.h>
230 #include <unistd.h>
231 #include <limits.h>
232 #include <time.h>
233 #include <signal.h>
234 #include <sys/types.h>
235 #include <sys/socket.h>
236 #include <netinet/in.h>
237 #include <arpa/inet.h>
238 #ifdef HAVE_IPV6
239 #include <net/if.h>
240 #endif
241 #ifndef HAVE_STDINT_H
242 typedef unsigned int uint32_t;
243 typedef unsigned short uint16_t;
244 #endif
246 #ifdef HAVE_IPV6
247 #define IPTOASIZE (INET6_ADDRSTRLEN+IF_NAMESIZE+INET6_ADDRSTRLEN+IF_NAMESIZE+1)
248 #else
249 #define IPTOASIZE (15+1) /* IPv4 only */
250 #endif
251 static const char *iptoa(const struct sockaddr *ip, char *outstr, size_t s);
252 static uint16_t xsockport(const struct sockaddr *ip);
254 /* Note that mod_reqtimeout has a default configuration of 20 seconds
255 * maximum to wait for the first byte of the initial request line and
256 * then no more than 40 seconds total, but after the first byte is
257 * received the rest must arrive at 500 bytes/sec or faster. That
258 * means 10000 bytes minimum in 40 seconds. We do not allow the
259 * initial Git packet to be longer than PATH_MAX (which is typically
260 * either 1024 or 4096). And since 20 + 1024/500 = 22.048 and
261 * 20 + 4096/500 = 28.192 using 30 seconds for a total timeout is
262 * quite reasonable in comparison to mod_reqtimeout's default conf.
265 #define TIMEOUT_SECS 30 /* no more than 30 seconds for initial packet */
267 #define POLL_QUANTUM 100000U /* how often to poll in microseconds */
269 #if !defined(PATH_MAX) || PATH_MAX+0 < 4096
270 #define BUFF_MAX 4096
271 #else
272 #define BUFF_MAX PATH_MAX
273 #endif
275 /* avoid requiring C99 library */
276 static size_t xstrnlen(const char *s, size_t maxlen)
278 size_t l = 0;
279 if (!s) return l;
280 while (l < maxlen && *s) { ++s; ++l; }
281 return l;
284 /* avoid requiring C99 library */
285 #ifdef HAVE_IPV6
286 static char *xstrncat(char *s1, const char *s2, size_t n)
288 char *p;
289 if (!s1 || !s2 || !n) return s1;
290 p = s1 + strlen(s1);
291 while (n-- && *s2) { *p++ = *s2++; }
292 *p = '\0';
293 return s1;
295 #endif /* HAVE_IPV6 */
297 #define LC(c) (((c)<'A'||(c)>'Z')?(c):((c)+('a'-'A')))
299 /* returns >0 if m1 and m2 are NOT equal comparing first len bytes
300 ** returns 0 if m1 and m2 ARE equal but ignoring case (POSIX locale)
301 ** essentially the same as the non-existent memcasecmp except that only
302 ** a 0 or >0 result is possible and a >0 result only means not-equal */
303 static size_t xmemcaseneql(const char *m1, const char *m2, size_t len)
305 for (; len; --len, ++m1, ++m2) {
306 char c1 = *m1;
307 char c2 = *m2;
308 c1 = LC(c1);
309 c2 = LC(c2);
310 if (c1 != c2) break;
312 return len;
315 static int xdig(char c)
317 if ('0' <= c && c <= '9')
318 return c - '0';
319 if ('a' <= c && c <= 'f')
320 return c - 'a' + 10;
321 if ('A' <= c && c <= 'F')
322 return c - 'A' + 10;
323 return -1;
326 static char buffer[BUFF_MAX];
327 static time_t expiry;
329 /* Ideally we could just use MSG_PEEK + MSG_WAITALL, and that works nicely
330 * on BSD-type distros. Unfortunately very bad things happen on Linux with
331 * that combination -- a CPU core runs at 100% until all the data arrives.
332 * So instead we omit the MSG_WAITALL and poll every POLL_QUANTUM interval
333 * to see if we've satisfied the requested amount yet.
335 static int recv_peekall(int fd, void *buff, size_t len)
337 int ans;
338 while ((ans = recv(fd, buff, len, MSG_PEEK)) > 0 && (size_t)ans < len) {
339 if (time(NULL) > expiry)
340 exit(2);
341 usleep(POLL_QUANTUM);
343 return ans < 0 ? -1 : (int)len;
346 static void handle_sigalrm(int s)
348 (void)s;
349 _exit(2);
352 static void clear_alarm(void)
354 alarm(0);
357 static int parse_host_and_port(char *ptr, size_t zlen, char **host,
358 size_t *hlen, const char **port, size_t *plen);
360 static size_t has_controls(const void *_ptr, size_t zlen);
362 typedef union {
363 struct sockaddr sa;
364 struct sockaddr_in in;
365 #ifdef HAVE_IPV6
366 struct sockaddr_in6 in6;
367 #endif
368 char padding[128];
369 } sockaddr_univ_t;
371 int main(int argc, char *argv[])
373 int len;
374 int xvals[4];
375 char hexlen[4];
376 size_t pktlen, zlen, gitlen, hlen=0, plen=0;
377 char *ptr, *gitcmd, *host=NULL;
378 const char *pktend, *port=NULL;
379 int optval;
380 sockaddr_univ_t sockname;
381 socklen_t socknamelen;
382 char ipstr[IPTOASIZE];
384 (void)argc;
385 (void)argv;
387 /* Ideally calling recv with MSG_PEEK would never, ever hang. However
388 * even with MSG_PEEK, recv still waits for at least the first message
389 * to arrive on the socket (unless it's non-blocking). For this reason
390 * we set an alarm timer at TIMEOUT_SECS + 2 to make sure we don't
391 * remain stuck in the recv call waiting for the first message.
393 signal(SIGALRM, handle_sigalrm);
394 alarm(TIMEOUT_SECS + 2); /* Some slop as this shouldn't be needed */
395 atexit(clear_alarm); /* Probably not necessary, but do it anyway */
397 expiry = time(NULL) + TIMEOUT_SECS;
399 optval = 1;
400 if (setsockopt(0, SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval)))
401 return 1;
403 len = recv_peekall(0, hexlen, 4);
404 if (len != 4)
405 return 1;
407 if ((xvals[0]=xdig(hexlen[0])) < 0 ||
408 (xvals[1]=xdig(hexlen[1])) < 0 ||
409 (xvals[2]=xdig(hexlen[2])) < 0 ||
410 (xvals[3]=xdig(hexlen[3])) < 0)
411 return 1;
412 pktlen = ((unsigned)xvals[0] << 12) |
413 ((unsigned)xvals[1] << 8) |
414 ((unsigned)xvals[2] << 4) |
415 (unsigned)xvals[3];
416 if (pktlen < 22 || pktlen > sizeof(buffer))
417 return 1;
419 len = recv_peekall(0, buffer, pktlen);
420 if (len != (int)pktlen)
421 return 1;
423 /* skip over 4-byte <PKT-LENGTH> */
424 pktend = buffer + pktlen;
425 ptr = buffer + 4;
427 /* thanks to check above, pktend - ptr always >= 18 */
428 if (memcmp(ptr, "git-", 4)) /* quick sanity check */
429 return 1;
431 /* validate the entire packet format now */
433 /* find length of <GIT-COMMAND> */
434 gitlen = xstrnlen(ptr, pktend - ptr);
435 /* thanks to the quick sanity check, gitlen always >= 4 */
436 gitcmd = ptr;
437 /* skip over <GIT-COMMAND> */
438 ptr += gitlen + 1; /* not a problem if ptr > pktend */
439 if (gitcmd[gitlen-1] == '\n') {
440 /* strip trailing \n from <GIT-COMMAND> */
441 gitcmd[--gitlen] = '\0';
443 if (has_controls(gitcmd, gitlen))
444 return 1; /* bad bytes in command */
446 /* now comes the optional <HOST-ARG> */
447 if (ptr < pktend && (pktend - ptr) >= 5 &&
448 !xmemcaseneql(ptr, "host=", 5)) {
449 /* skip over <HOST-PARAM> part */
450 ptr += 5;
451 zlen = xstrnlen(ptr, pktend - ptr);
452 if (!parse_host_and_port(ptr, zlen, &host, &hlen, &port, &plen))
453 /* failed to parse rest of <HOST-ARG-TRUNCATED> */
454 return 1;
455 /* skip over rest of <HOST-ARG>, okay if ptr ends up > pktend */
456 ptr += zlen + 1;
459 if (ptr < pktend && *ptr)
460 return 1; /* invalid, missing required %x00 before <EXTRA-PARMS> */
461 ++ptr; /* skip over %x00 */
463 /* now skip over the rest of the extra args with minimal validation */
464 while (ptr < pktend) {
465 zlen = xstrnlen(ptr, pktend - ptr);
466 /* if (zlen) process_arg(ptr, zlen); */
467 ptr += zlen + 1; /* okay if ptr ends up > pktend */
470 if (ptr < pktend)
471 return 1; /* not a valid <GIT-REQUEST-PKT> */
473 printf("%.*s\n", (int)gitlen, gitcmd);
474 if (host != NULL)
475 printf("host=%.*s\n", (int)hlen, host);
476 if (port != NULL)
477 printf("port=%.*s\n", (int)plen, port);
479 socknamelen = (socklen_t)sizeof(sockname);
480 if (!getpeername(0, &sockname.sa, &socknamelen) &&
481 iptoa(&sockname.sa, ipstr, sizeof(ipstr)) && ipstr[0]) {
482 uint16_t p = xsockport(&sockname.sa);
483 printf("remote_addr=%s\n", ipstr);
484 if (p)
485 printf("remote_port=%u\n", (unsigned)p);
487 socknamelen = (socklen_t)sizeof(sockname);
488 if (!getsockname(0, &sockname.sa, &socknamelen) &&
489 iptoa(&sockname.sa, ipstr, sizeof(ipstr)) && ipstr[0]) {
490 uint16_t p = xsockport(&sockname.sa);
491 printf("server_addr=%s\n", ipstr);
492 if (p)
493 printf("server_port=%u\n", (unsigned)p);
496 return 0;
499 static size_t has_controls_or_spaces(const void *ptr, size_t zlen);
501 static int parse_host_and_port(char *ptr, size_t zlen, char **host,
502 size_t *hlen, const char **port, size_t *plen)
504 const char *colon = NULL;
505 if (!ptr) return 0; /* bogus ptr argument */
506 if (has_controls_or_spaces(ptr, zlen)) return 0; /* bogus host= value */
507 if (zlen >= 1 && *ptr == '[') {
508 /* IPv6 literal */
509 const char *ebrkt = (const char *)memchr(ptr, ']', zlen);
510 if (!ebrkt) return 0; /* missing closing ']' */
511 *host = ptr + 1;
512 *hlen = ebrkt - ptr - 1; /* yes, could be 0 */
513 if ((size_t)(++ebrkt - ptr) < zlen) {
514 if (*ebrkt != ':') return 0; /* missing ':' after ']' */
515 colon = ebrkt;
517 } else {
518 colon = (const char *)memchr(ptr, ':', zlen);
519 *host = ptr;
520 *hlen = colon ? ((size_t)(colon - ptr)) : zlen;
521 if (*hlen > 1 && ptr[*hlen - 1] == '.')
522 --*hlen;
524 if (colon) {
525 zlen = (ptr + zlen) - ++colon;
526 if (zlen > 5) return 0; /* invalid port number */
527 if (zlen == 0) {
528 /* empty port */
529 *port = colon;
530 *plen = 0;
531 } else {
532 unsigned pval;
533 const char *pptr;
534 size_t pl;
535 while (zlen > 1 && *colon == '0') {
536 ++colon;
537 --zlen;
539 pptr = colon;
540 pl = zlen;
541 pval = 0;
542 while (zlen) {
543 if (*colon < '0' || *colon > '9')
544 return 0; /* invalid port number */
545 pval *= 10;
546 pval += (*colon++) - '0';
547 --zlen;
549 if (!pval || pval > 65535)
550 return 0; /* invalid port number */
551 *port = pptr;
552 *plen = pl;
554 } else {
555 *port = NULL;
556 *plen = 0;
558 ptr = *host;
559 zlen = *hlen;
560 while (zlen) {
561 char c = *ptr;
562 c = LC(c);
563 *ptr++ = c;
564 --zlen;
566 return 1;
569 /* the tab character %x09 is not considered a control here */
570 static size_t has_controls(const void *_ptr, size_t zlen)
572 const unsigned char *ptr = (const unsigned char *)_ptr;
573 if (!ptr) return 0;
574 while (zlen && (*ptr >= ' ' || *ptr == '\t')) {
575 ++ptr;
576 --zlen;
578 return zlen;
581 static size_t has_controls_or_spaces(const void *_ptr, size_t zlen)
583 const unsigned char *ptr = (const unsigned char *)_ptr;
584 if (!ptr) return 0;
585 while (zlen && *ptr > ' ') {
586 ++ptr;
587 --zlen;
589 return zlen;
592 static const char *iptoa(const struct sockaddr *ip, char *outstr, size_t s)
594 if (outstr)
595 *outstr = '\0';
596 if (ip && outstr && s) {
597 if (ip->sa_family == AF_INET) {
598 const struct sockaddr_in *sin = (const struct sockaddr_in *)ip;
599 inet_ntop(AF_INET, &sin->sin_addr, outstr, (socklen_t)s);
601 #if defined(HAVE_IPV6) && defined(HAVE_INET_NTOP)
602 else if (ip->sa_family == AF_INET6) {
603 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)ip;
604 inet_ntop(AF_INET6, sin6->sin6_addr.s6_addr, outstr, (socklen_t)s);
605 if (sin6->sin6_scope_id) {
606 size_t outlen = strlen(outstr);
607 #if IF_NAMESIZE < 9
608 #define XIF_NAMESIZE 9
609 #else
610 #define XIF_NAMESIZE IF_NAMESIZE
611 #endif
612 char scope[XIF_NAMESIZE+1];
613 char *ifname = if_indextoname(sin6->sin6_scope_id, scope+1);
614 if (ifname) {
615 scope[0] = '%';
616 } else {
617 /* This can happen on odd systems */
618 #ifdef HAVE_SNPRINTF
619 snprintf(scope, sizeof(scope), "%%%d", (int)sin6->sin6_scope_id);
620 #else
621 /* 0xFFFFFFFF only requires 9 digits and XIF_NAMESIZE is >= 9
622 * therefore it's guaranteed to fit and not overflow */
623 sprintf(scope, "%%%u", (unsigned)(sin6->sin6_scope_id & 0xFFFFFFFF));
624 #endif
626 scope[sizeof(scope)-1] = 0;
627 if (outlen+1 < s)
628 xstrncat(outstr, scope, s-outlen-1);
631 #endif /* HAVE_IPV6 */
633 return outstr;
636 static uint16_t xsockport(const struct sockaddr *ip)
638 if (ip->sa_family == AF_INET) {
639 const struct sockaddr_in *sin = (const struct sockaddr_in *)ip;
640 return (uint16_t)ntohs(sin->sin_port);
642 #ifdef HAVE_IPV6
643 if (ip->sa_family == AF_INET6) {
644 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)ip;
645 return (uint16_t)ntohs(sin6->sin6_port);
647 #endif
648 return 0;