2 * PgBouncer - Lightweight connection pooler for PostgreSQL.
4 * Copyright (c) 2007-2009 Marko Kreen, Skype Technologies OÜ
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 * Random small utility functions
31 static int syslog_started
= 0;
32 static int log_fd
= 0;
34 struct FacName
{ const char *name
; int code
; };
35 static struct FacName facility_names
[] = {
38 { "authpriv", LOG_AUTHPRIV
},
40 { "daemon", LOG_DAEMON
},
42 { "local0", LOG_LOCAL0
},
43 { "local1", LOG_LOCAL1
},
44 { "local2", LOG_LOCAL2
},
45 { "local3", LOG_LOCAL3
},
46 { "local4", LOG_LOCAL4
},
47 { "local5", LOG_LOCAL5
},
48 { "local6", LOG_LOCAL6
},
49 { "local7", LOG_LOCAL7
},
54 void *zmalloc(size_t len
)
56 void *p
= malloc(len
);
66 static void render_time(char *buf
, int max
)
71 gettimeofday(&tv
, NULL
);
74 snprintf(buf
, max
, "%04d-%02d-%02d %02d:%02d:%02d.%03d",
75 tm
->tm_year
+ 1900, tm
->tm_mon
+ 1, tm
->tm_mday
,
76 tm
->tm_hour
, tm
->tm_min
, tm
->tm_sec
,
77 (int)(tv
.tv_usec
/ 1000));
80 static void close_syslog(void)
88 static void init_syslog(void)
91 int facility
= LOG_DAEMON
;
93 for (fn
= facility_names
; fn
->name
; fn
++)
94 if (strcmp(cf_syslog_facility
, fn
->name
) == 0) {
99 openlog(cf_jobname
, LOG_PID
, facility
);
103 static void write_syslog(const char *pfx
, const char *msg
)
105 int prio
= LOG_WARNING
;
111 case 'F': prio
= LOG_CRIT
; break;
112 case 'E': prio
= LOG_ERR
; break;
113 case 'W': prio
= LOG_WARNING
; break;
114 case 'I': prio
= LOG_INFO
; break;
115 case 'L': prio
= LOG_INFO
; break;
116 case 'D': prio
= LOG_DEBUG
; break;
117 case 'N': prio
= LOG_DEBUG
; break;
120 syslog(prio
, "%s", msg
);
123 void close_logfile(void)
132 static void write_logfile(const char *buf
, int len
)
136 int fd
= open(cf_logfile
, O_CREAT
| O_APPEND
| O_WRONLY
, 0644);
141 res
= safe_write(log_fd
, buf
, len
);
143 /* nothing to do here */
147 static void _log_write(const char *pfx
, const char *msg
)
152 int old_errno
= errno
;
154 render_time(tbuf
, sizeof(tbuf
));
155 len
= snprintf(buf
, sizeof(buf
), "%s %u %s %s\n",
156 tbuf
, (unsigned)getpid(), pfx
, msg
);
159 write_logfile(buf
, len
);
162 write_syslog(pfx
, msg
);
165 fprintf(stderr
, "%s", buf
);
167 if (old_errno
!= errno
)
171 static void _log(const char *pfx
, const char *fmt
, va_list ap
)
174 vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
175 _log_write(pfx
, buf
);
178 void _fatal(const char *file
, int line
, const char *func
,
179 bool do_exit
, const char *fmt
, ...)
184 snprintf(buf
, sizeof(buf
),
185 "@%s:%d in function %s(): %s",
186 file
, line
, func
, fmt
);
189 _log("FATAL", buf
, ap
);
195 void _fatal_perror(const char *file
, int line
, const char *func
,
196 const char *fmt
, ...)
201 vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
203 _fatal(file
, line
, func
, true, "%s: %s", buf
, strerror(errno
));
209 void log_level(const char *pfx
, const char *fmt
, ...)
218 * Logging about specific PgSocket
221 void slog_level(const char *pfx
, const PgSocket
*sock
, const char *fmt
, ...)
225 char *user
, *db
, *host
;
229 db
= sock
->pool
? sock
->pool
->db
->name
: "(nodb)";
230 user
= sock
->auth_user
? sock
->auth_user
->name
: "(nouser)";
231 if (sock
->remote_addr
.is_unix
) {
234 host
= inet_ntoa(sock
->remote_addr
.ip_addr
);
236 port
= sock
->remote_addr
.port
;
239 vsnprintf(buf1
, sizeof(buf1
), fmt
, ap
);
242 snprintf(buf2
, sizeof(buf2
), "%c-%p: %s/%s@%s:%d %s",
243 is_server_socket(sock
) ? 'S' : 'C',
244 sock
, db
, user
, host
, port
, buf1
);
246 _log_write(pfx
, buf2
);
251 * Wrappers for read/write/recv/send that survive interruptions.
254 int safe_read(int fd
, void *buf
, int len
)
258 res
= read(fd
, buf
, len
);
259 if (res
< 0 && errno
== EINTR
)
264 int safe_write(int fd
, const void *buf
, int len
)
268 res
= write(fd
, buf
, len
);
269 if (res
< 0 && errno
== EINTR
)
274 int safe_recv(int fd
, void *buf
, int len
, int flags
)
278 res
= recv(fd
, buf
, len
, flags
);
279 if (res
< 0 && errno
== EINTR
)
282 log_noise("safe_recv(%d, %d) = %s", fd
, len
, strerror(errno
));
283 else if (cf_verbose
> 2)
284 log_noise("safe_recv(%d, %d) = %d", fd
, len
, res
);
288 int safe_send(int fd
, const void *buf
, int len
, int flags
)
292 res
= send(fd
, buf
, len
, flags
);
293 if (res
< 0 && errno
== EINTR
)
296 log_noise("safe_send(%d, %d) = %s", fd
, len
, strerror(errno
));
297 else if (cf_verbose
> 2)
298 log_noise("safe_send(%d, %d) = %d", fd
, len
, res
);
302 int safe_close(int fd
)
306 /* by manpage, the close() could be interruptable
307 although it seems that at least in linux it cannot happen */
311 /* Pending(this is necessary to wait for FIN of a client.) */
312 log_debug("closesocket(%d)",fd
);
313 res
= closesocket(fd
);
315 if (res
< 0 && errno
== EINTR
)
320 int safe_recvmsg(int fd
, struct msghdr
*msg
, int flags
)
324 res
= recvmsg(fd
, msg
, flags
);
325 if (res
< 0 && errno
== EINTR
)
328 log_warning("safe_recvmsg(%d, msg, %d) = %s", fd
, flags
, strerror(errno
));
329 else if (cf_verbose
> 2)
330 log_noise("safe_recvmsg(%d, msg, %d) = %d", fd
, flags
, res
);
334 int safe_sendmsg(int fd
, const struct msghdr
*msg
, int flags
)
337 int msgerr_count
= 0;
339 res
= sendmsg(fd
, msg
, flags
);
340 if (res
< 0 && errno
== EINTR
)
344 log_warning("safe_sendmsg(%d, msg[%d,%d], %d) = %s", fd
,
345 (int)msg
->msg_iov
[0].iov_len
,
346 (int)msg
->msg_controllen
,
347 flags
, strerror(errno
));
349 /* with ancillary data on blocking socket OSX returns
350 * EMSGSIZE instead of blocking. try to solve it by waiting */
351 if (errno
== EMSGSIZE
&& msgerr_count
< 20) {
352 struct timeval tv
= {1, 0};
353 log_warning("trying to sleep a bit");
354 select(0, NULL
, NULL
, NULL
, &tv
);
358 } else if (cf_verbose
> 2)
359 log_noise("safe_sendmsg(%d, msg, %d) = %d", fd
, flags
, res
);
363 static const char *sa2str(const struct sockaddr
*sa
)
365 static char buf
[256];
367 if (sa
->sa_family
== AF_INET
) {
368 struct sockaddr_in
*in
= (struct sockaddr_in
*)sa
;
369 snprintf(buf
, sizeof(buf
), "%s:%d", inet_ntoa(in
->sin_addr
), ntohs(in
->sin_port
));
370 } if (sa
->sa_family
== AF_UNIX
) {
371 struct sockaddr_un
*un
= (struct sockaddr_un
*)sa
;
372 snprintf(buf
, sizeof(buf
), "unix:%s", un
->sun_path
);
374 snprintf(buf
, sizeof(buf
), "sa2str: unknown proto");
379 int safe_connect(int fd
, const struct sockaddr
*sa
, socklen_t sa_len
)
383 res
= connect(fd
, sa
, sa_len
);
384 if (res
< 0 && errno
== EINTR
)
386 if (res
< 0 && (errno
!= EINPROGRESS
|| cf_verbose
> 2))
387 log_noise("connect(%d, %s) = %s", fd
, sa2str(sa
), strerror(errno
));
388 else if (cf_verbose
> 2)
389 log_noise("connect(%d, %s) = %d", fd
, sa2str(sa
), res
);
393 int safe_accept(int fd
, struct sockaddr
*sa
, socklen_t
*sa_len_p
)
397 res
= accept(fd
, sa
, sa_len_p
);
398 if (res
< 0 && errno
== EINTR
)
401 log_noise("safe_accept(%d) = %s", fd
, strerror(errno
));
402 else if (cf_verbose
> 2)
403 log_noise("safe_accept(%d) = %d (%s)", fd
, res
, sa2str(sa
));
408 * Load a file into malloc()-ed C string.
411 char *load_file(const char *fn
)
419 log_error("%s: %s", fn
, strerror(errno
));
423 buf
= malloc(st
.st_size
+ 1);
425 log_error("%s: no mem", fn
);
429 if ((fd
= open(fn
, O_RDONLY
)) < 0) {
430 log_error("%s: %s", fn
, strerror(errno
));
434 if ((res
= safe_read(fd
, buf
, st
.st_size
)) < 0) {
435 log_error("%s: %s", fn
, strerror(errno
));
451 * PostgreSQL MD5 hashing.
454 static void hash2hex(const uint8_t *hash
, char *dst
)
457 static const char hextbl
[] = "0123456789abcdef";
458 for (i
= 0; i
< MD5_DIGEST_LENGTH
; i
++) {
459 *dst
++ = hextbl
[hash
[i
] >> 4];
460 *dst
++ = hextbl
[hash
[i
] & 15];
465 void pg_md5_encrypt(const char *part1
,
466 const char *part2
, size_t part2len
,
470 uint8_t hash
[MD5_DIGEST_LENGTH
];
473 MD5_Update(&ctx
, part1
, strlen(part1
));
474 MD5_Update(&ctx
, part2
, part2len
);
475 MD5_Final(hash
, &ctx
);
477 memcpy(dest
, "md5", 3);
478 hash2hex(hash
, dest
+ 3);
481 /* wrapped for getting random bytes */
482 void get_random_bytes(uint8_t *dest
, int len
)
485 for (i
= 0; i
< len
; i
++)
486 dest
[i
] = random() & 255;
490 * high-precision time
493 static usec_t
get_time_usec(void)
496 gettimeofday(&tv
, NULL
);
497 return (usec_t
)tv
.tv_sec
* USEC
+ tv
.tv_usec
;
501 * cache time, as we don't need sub-second precision
503 static usec_t time_cache
= 0;
505 usec_t
get_cached_time(void)
508 time_cache
= get_time_usec();
512 void reset_time_cache(void)
517 void socket_set_nonblocking(int fd
, int val
)
522 flags
= fcntl(fd
, F_GETFL
, 0);
524 fatal_perror("fcntl(F_GETFL)");
526 /* flip O_NONBLOCK */
530 flags
&= ~O_NONBLOCK
;
533 res
= fcntl(fd
, F_SETFL
, flags
);
535 fatal_perror("fcntl(F_SETFL)");
538 /* set needed socket options */
539 void tune_socket(int sock
, bool is_unix
)
544 /* close fd on exec */
545 res
= fcntl(sock
, F_SETFD
, FD_CLOEXEC
);
547 fatal_perror("fcntl FD_CLOEXEC");
549 /* when no data available, return EAGAIN instead blocking */
550 socket_set_nonblocking(sock
, 1);
553 /* disallow SIGPIPE, if possible */
555 res
= setsockopt(sock
, SOL_SOCKET
, SO_NOSIGPIPE
, &val
, sizeof(val
));
557 fatal_perror("setsockopt SO_NOSIGPIPE");
561 * Following options are for network sockets
566 /* the keepalive stuff needs some poking before enbling */
567 if (cf_tcp_keepalive
) {
568 /* turn on socket keepalive */
570 res
= setsockopt(sock
, SOL_SOCKET
, SO_KEEPALIVE
, &val
, sizeof(val
));
572 fatal_perror("setsockopt SO_KEEPALIVE");
574 /* set count of keepalive packets */
575 if (cf_tcp_keepcnt
> 0) {
576 val
= cf_tcp_keepcnt
;
577 res
= setsockopt(sock
, IPPROTO_TCP
, TCP_KEEPCNT
, &val
, sizeof(val
));
579 fatal_perror("setsockopt TCP_KEEPCNT");
581 /* how long the connection can stay idle before sending keepalive pkts */
582 if (cf_tcp_keepidle
) {
583 val
= cf_tcp_keepidle
;
584 res
= setsockopt(sock
, IPPROTO_TCP
, TCP_KEEPIDLE
, &val
, sizeof(val
));
586 fatal_perror("setsockopt TCP_KEEPIDLE");
588 /* time between packets */
589 if (cf_tcp_keepintvl
) {
590 val
= cf_tcp_keepintvl
;
591 res
= setsockopt(sock
, IPPROTO_TCP
, TCP_KEEPINTVL
, &val
, sizeof(val
));
593 fatal_perror("setsockopt TCP_KEEPINTVL");
597 if (cf_tcp_keepidle
) {
598 val
= cf_tcp_keepidle
;
599 res
= setsockopt(sock
, IPPROTO_TCP
, TCP_KEEPALIVE
, &val
, sizeof(val
));
601 fatal_perror("setsockopt TCP_KEEPALIVE");
607 /* set in-kernel socket buffer size */
608 if (cf_tcp_socket_buffer
) {
609 val
= cf_tcp_socket_buffer
;
610 res
= setsockopt(sock
, SOL_SOCKET
, SO_SNDBUF
, &val
, sizeof(val
));
612 fatal_perror("setsockopt SO_SNDBUF");
613 val
= cf_tcp_socket_buffer
;
614 res
= setsockopt(sock
, SOL_SOCKET
, SO_RCVBUF
, &val
, sizeof(val
));
616 fatal_perror("setsockopt SO_RCVBUF");
620 * Turn off kernel buffering, each send() will be one packet.
623 res
= setsockopt(sock
, IPPROTO_TCP
, TCP_NODELAY
, &val
, sizeof(val
));
625 fatal_perror("setsockopt TCP_NODELAY");
629 * Find a string in comma-separated list.
631 * It does not support space inside tokens.
633 bool strlist_contains(const char *liststr
, const char *str
)
635 int c
, len
= strlen(str
);
636 const char *p
, *listpos
= liststr
;
639 /* find string fragment, later check if actual token */
640 p
= strstr(listpos
, str
);
644 /* move listpos further */
646 /* survive len=0 and avoid unneccesary compare */
650 /* check previous symbol */
653 if (!isspace(c
) && c
!= ',')
657 /* check following symbol */
659 if (c
!= 0 && !isspace(c
) && c
!= ',')
665 const char *format_date(usec_t uval
)
667 static char buf
[128];
668 time_t tval
= uval
/ USEC
;
669 strftime(buf
, sizeof(buf
), "%Y-%m-%d %H:%M:%S", localtime(&tval
));
673 void fill_remote_addr(PgSocket
*sk
, int fd
, bool is_unix
)
675 PgAddr
*dst
= &sk
->remote_addr
;
676 struct sockaddr_in adr
;
677 socklen_t len
= sizeof(adr
);
680 dst
->ip_addr
.s_addr
= INADDR_ANY
;
682 dst
->is_unix
= is_unix
;
684 dst
->port
= cf_listen_port
;
686 err
= getpeername(fd
, (struct sockaddr
*)&adr
, &len
);
688 log_error("fill_remote_addr: getpeername(%d) = %s",
689 fd
, strerror(errno
));
691 dst
->ip_addr
= adr
.sin_addr
;
692 dst
->port
= ntohs(adr
.sin_port
);
697 void fill_local_addr(PgSocket
*sk
, int fd
, bool is_unix
)
699 PgAddr
*dst
= &sk
->local_addr
;
700 struct sockaddr_in adr
;
701 socklen_t len
= sizeof(adr
);
704 dst
->ip_addr
.s_addr
= INADDR_ANY
;
706 dst
->is_unix
= is_unix
;
708 dst
->port
= cf_listen_port
;
710 err
= getsockname(fd
, (struct sockaddr
*)&adr
, &len
);
712 log_error("fill_local_addr: getsockname(%d) = %s",
713 fd
, strerror(errno
));
715 dst
->ip_addr
= adr
.sin_addr
;
716 dst
->port
= ntohs(adr
.sin_port
);
722 * Error handling around evtimer_add() is nasty as the code
723 * may not be called again. As there is fixed number of timers
724 * in pgbouncer, provider safe_evtimer_add() that stores args of
725 * failed calls in static array and retries later.
727 #define TIMER_BACKUP_SLOTS 10
733 static struct timer_slot timer_backup_list
[TIMER_BACKUP_SLOTS
];
734 static int timer_backup_used
= 0;
736 void safe_evtimer_add(struct event
*ev
, struct timeval
*tv
)
739 struct timer_slot
*ts
;
741 res
= evtimer_add(ev
, tv
);
745 if (timer_backup_used
>= TIMER_BACKUP_SLOTS
)
746 fatal_perror("TIMER_BACKUP_SLOTS full");
748 ts
= &timer_backup_list
[timer_backup_used
++];
753 void rescue_timers(void)
755 struct timer_slot
*ts
;
756 while (timer_backup_used
) {
757 ts
= &timer_backup_list
[timer_backup_used
- 1];
758 if (evtimer_add(ts
->ev
, &ts
->tv
) < 0)