2 * tcp_diag.c Module for monitoring TCP sockets.
4 * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
6 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 #include <linux/config.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/fcntl.h>
18 #include <linux/random.h>
19 #include <linux/cache.h>
20 #include <linux/init.h>
21 #include <linux/time.h>
26 #include <net/inet_common.h>
28 #include <linux/inet.h>
29 #include <linux/stddef.h>
31 #include <linux/tcp_diag.h>
43 static struct sock
*tcpnl
;
45 #define TCPDIAG_PUT(skb, attrtype, attrlen) \
46 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
48 static int tcpdiag_fill(struct sk_buff
*skb
, struct sock
*sk
,
49 int ext
, u32 pid
, u32 seq
, u16 nlmsg_flags
)
51 struct inet_sock
*inet
= inet_sk(sk
);
52 struct tcp_sock
*tp
= tcp_sk(sk
);
55 struct tcp_info
*info
= NULL
;
56 struct tcpdiag_meminfo
*minfo
= NULL
;
57 unsigned char *b
= skb
->tail
;
59 nlh
= NLMSG_PUT(skb
, pid
, seq
, TCPDIAG_GETSOCK
, sizeof(*r
));
60 nlh
->nlmsg_flags
= nlmsg_flags
;
62 if (sk
->sk_state
!= TCP_TIME_WAIT
) {
63 if (ext
& (1<<(TCPDIAG_MEMINFO
-1)))
64 minfo
= TCPDIAG_PUT(skb
, TCPDIAG_MEMINFO
, sizeof(*minfo
));
65 if (ext
& (1<<(TCPDIAG_INFO
-1)))
66 info
= TCPDIAG_PUT(skb
, TCPDIAG_INFO
, sizeof(*info
));
68 if (ext
& (1<<(TCPDIAG_CONG
-1))) {
69 size_t len
= strlen(tp
->ca_ops
->name
);
70 strcpy(TCPDIAG_PUT(skb
, TCPDIAG_CONG
, len
+1),
74 r
->tcpdiag_family
= sk
->sk_family
;
75 r
->tcpdiag_state
= sk
->sk_state
;
77 r
->tcpdiag_retrans
= 0;
79 r
->id
.tcpdiag_if
= sk
->sk_bound_dev_if
;
80 r
->id
.tcpdiag_cookie
[0] = (u32
)(unsigned long)sk
;
81 r
->id
.tcpdiag_cookie
[1] = (u32
)(((unsigned long)sk
>> 31) >> 1);
83 if (r
->tcpdiag_state
== TCP_TIME_WAIT
) {
84 struct tcp_tw_bucket
*tw
= (struct tcp_tw_bucket
*)sk
;
85 long tmo
= tw
->tw_ttd
- jiffies
;
89 r
->id
.tcpdiag_sport
= tw
->tw_sport
;
90 r
->id
.tcpdiag_dport
= tw
->tw_dport
;
91 r
->id
.tcpdiag_src
[0] = tw
->tw_rcv_saddr
;
92 r
->id
.tcpdiag_dst
[0] = tw
->tw_daddr
;
93 r
->tcpdiag_state
= tw
->tw_substate
;
95 r
->tcpdiag_expires
= (tmo
*1000+HZ
-1)/HZ
;
96 r
->tcpdiag_rqueue
= 0;
97 r
->tcpdiag_wqueue
= 0;
100 #ifdef CONFIG_IP_TCPDIAG_IPV6
101 if (r
->tcpdiag_family
== AF_INET6
) {
102 ipv6_addr_copy((struct in6_addr
*)r
->id
.tcpdiag_src
,
103 &tw
->tw_v6_rcv_saddr
);
104 ipv6_addr_copy((struct in6_addr
*)r
->id
.tcpdiag_dst
,
108 nlh
->nlmsg_len
= skb
->tail
- b
;
112 r
->id
.tcpdiag_sport
= inet
->sport
;
113 r
->id
.tcpdiag_dport
= inet
->dport
;
114 r
->id
.tcpdiag_src
[0] = inet
->rcv_saddr
;
115 r
->id
.tcpdiag_dst
[0] = inet
->daddr
;
117 #ifdef CONFIG_IP_TCPDIAG_IPV6
118 if (r
->tcpdiag_family
== AF_INET6
) {
119 struct ipv6_pinfo
*np
= inet6_sk(sk
);
121 ipv6_addr_copy((struct in6_addr
*)r
->id
.tcpdiag_src
,
123 ipv6_addr_copy((struct in6_addr
*)r
->id
.tcpdiag_dst
,
128 #define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ
130 if (tp
->pending
== TCP_TIME_RETRANS
) {
131 r
->tcpdiag_timer
= 1;
132 r
->tcpdiag_retrans
= tp
->retransmits
;
133 r
->tcpdiag_expires
= EXPIRES_IN_MS(tp
->timeout
);
134 } else if (tp
->pending
== TCP_TIME_PROBE0
) {
135 r
->tcpdiag_timer
= 4;
136 r
->tcpdiag_retrans
= tp
->probes_out
;
137 r
->tcpdiag_expires
= EXPIRES_IN_MS(tp
->timeout
);
138 } else if (timer_pending(&sk
->sk_timer
)) {
139 r
->tcpdiag_timer
= 2;
140 r
->tcpdiag_retrans
= tp
->probes_out
;
141 r
->tcpdiag_expires
= EXPIRES_IN_MS(sk
->sk_timer
.expires
);
143 r
->tcpdiag_timer
= 0;
144 r
->tcpdiag_expires
= 0;
148 r
->tcpdiag_rqueue
= tp
->rcv_nxt
- tp
->copied_seq
;
149 r
->tcpdiag_wqueue
= tp
->write_seq
- tp
->snd_una
;
150 r
->tcpdiag_uid
= sock_i_uid(sk
);
151 r
->tcpdiag_inode
= sock_i_ino(sk
);
154 minfo
->tcpdiag_rmem
= atomic_read(&sk
->sk_rmem_alloc
);
155 minfo
->tcpdiag_wmem
= sk
->sk_wmem_queued
;
156 minfo
->tcpdiag_fmem
= sk
->sk_forward_alloc
;
157 minfo
->tcpdiag_tmem
= atomic_read(&sk
->sk_wmem_alloc
);
161 tcp_get_info(sk
, info
);
163 if (sk
->sk_state
< TCP_TIME_WAIT
&& tp
->ca_ops
->get_info
)
164 tp
->ca_ops
->get_info(tp
, ext
, skb
);
166 nlh
->nlmsg_len
= skb
->tail
- b
;
171 skb_trim(skb
, b
- skb
->data
);
175 extern struct sock
*tcp_v4_lookup(u32 saddr
, u16 sport
, u32 daddr
, u16 dport
,
177 #ifdef CONFIG_IP_TCPDIAG_IPV6
178 extern struct sock
*tcp_v6_lookup(struct in6_addr
*saddr
, u16 sport
,
179 struct in6_addr
*daddr
, u16 dport
,
182 static inline struct sock
*tcp_v6_lookup(struct in6_addr
*saddr
, u16 sport
,
183 struct in6_addr
*daddr
, u16 dport
,
190 static int tcpdiag_get_exact(struct sk_buff
*in_skb
, const struct nlmsghdr
*nlh
)
194 struct tcpdiagreq
*req
= NLMSG_DATA(nlh
);
197 if (req
->tcpdiag_family
== AF_INET
) {
198 sk
= tcp_v4_lookup(req
->id
.tcpdiag_dst
[0], req
->id
.tcpdiag_dport
,
199 req
->id
.tcpdiag_src
[0], req
->id
.tcpdiag_sport
,
202 #ifdef CONFIG_IP_TCPDIAG_IPV6
203 else if (req
->tcpdiag_family
== AF_INET6
) {
204 sk
= tcp_v6_lookup((struct in6_addr
*)req
->id
.tcpdiag_dst
, req
->id
.tcpdiag_dport
,
205 (struct in6_addr
*)req
->id
.tcpdiag_src
, req
->id
.tcpdiag_sport
,
217 if ((req
->id
.tcpdiag_cookie
[0] != TCPDIAG_NOCOOKIE
||
218 req
->id
.tcpdiag_cookie
[1] != TCPDIAG_NOCOOKIE
) &&
219 ((u32
)(unsigned long)sk
!= req
->id
.tcpdiag_cookie
[0] ||
220 (u32
)((((unsigned long)sk
) >> 31) >> 1) != req
->id
.tcpdiag_cookie
[1]))
224 rep
= alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg
)+
225 sizeof(struct tcpdiag_meminfo
)+
226 sizeof(struct tcp_info
)+64), GFP_KERNEL
);
230 if (tcpdiag_fill(rep
, sk
, req
->tcpdiag_ext
,
231 NETLINK_CB(in_skb
).pid
,
232 nlh
->nlmsg_seq
, 0) <= 0)
235 err
= netlink_unicast(tcpnl
, rep
, NETLINK_CB(in_skb
).pid
, MSG_DONTWAIT
);
241 if (sk
->sk_state
== TCP_TIME_WAIT
)
242 tcp_tw_put((struct tcp_tw_bucket
*)sk
);
249 static int bitstring_match(const u32
*a1
, const u32
*a2
, int bits
)
251 int words
= bits
>> 5;
256 if (memcmp(a1
, a2
, words
<< 2))
266 mask
= htonl((0xffffffff) << (32 - bits
));
268 if ((w1
^ w2
) & mask
)
276 static int tcpdiag_bc_run(const void *bc
, int len
,
277 const struct tcpdiag_entry
*entry
)
281 const struct tcpdiag_bc_op
*op
= bc
;
289 case TCPDIAG_BC_S_GE
:
290 yes
= entry
->sport
>= op
[1].no
;
292 case TCPDIAG_BC_S_LE
:
293 yes
= entry
->dport
<= op
[1].no
;
295 case TCPDIAG_BC_D_GE
:
296 yes
= entry
->dport
>= op
[1].no
;
298 case TCPDIAG_BC_D_LE
:
299 yes
= entry
->dport
<= op
[1].no
;
301 case TCPDIAG_BC_AUTO
:
302 yes
= !(entry
->userlocks
& SOCK_BINDPORT_LOCK
);
304 case TCPDIAG_BC_S_COND
:
305 case TCPDIAG_BC_D_COND
:
307 struct tcpdiag_hostcond
*cond
= (struct tcpdiag_hostcond
*)(op
+1);
310 if (cond
->port
!= -1 &&
311 cond
->port
!= (op
->code
== TCPDIAG_BC_S_COND
?
312 entry
->sport
: entry
->dport
)) {
317 if (cond
->prefix_len
== 0)
320 if (op
->code
== TCPDIAG_BC_S_COND
)
325 if (bitstring_match(addr
, cond
->addr
, cond
->prefix_len
))
327 if (entry
->family
== AF_INET6
&&
328 cond
->family
== AF_INET
) {
329 if (addr
[0] == 0 && addr
[1] == 0 &&
330 addr
[2] == htonl(0xffff) &&
331 bitstring_match(addr
+3, cond
->addr
, cond
->prefix_len
))
350 static int valid_cc(const void *bc
, int len
, int cc
)
353 const struct tcpdiag_bc_op
*op
= bc
;
367 static int tcpdiag_bc_audit(const void *bytecode
, int bytecode_len
)
369 const unsigned char *bc
= bytecode
;
370 int len
= bytecode_len
;
373 struct tcpdiag_bc_op
*op
= (struct tcpdiag_bc_op
*)bc
;
375 //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
377 case TCPDIAG_BC_AUTO
:
378 case TCPDIAG_BC_S_COND
:
379 case TCPDIAG_BC_D_COND
:
380 case TCPDIAG_BC_S_GE
:
381 case TCPDIAG_BC_S_LE
:
382 case TCPDIAG_BC_D_GE
:
383 case TCPDIAG_BC_D_LE
:
384 if (op
->yes
< 4 || op
->yes
> len
+4)
387 if (op
->no
< 4 || op
->no
> len
+4)
390 !valid_cc(bytecode
, bytecode_len
, len
-op
->no
))
394 if (op
->yes
< 4 || op
->yes
> len
+4)
403 return len
== 0 ? 0 : -EINVAL
;
406 static int tcpdiag_dump_sock(struct sk_buff
*skb
, struct sock
*sk
,
407 struct netlink_callback
*cb
)
409 struct tcpdiagreq
*r
= NLMSG_DATA(cb
->nlh
);
411 if (cb
->nlh
->nlmsg_len
> 4 + NLMSG_SPACE(sizeof(*r
))) {
412 struct tcpdiag_entry entry
;
413 struct rtattr
*bc
= (struct rtattr
*)(r
+ 1);
414 struct inet_sock
*inet
= inet_sk(sk
);
416 entry
.family
= sk
->sk_family
;
417 #ifdef CONFIG_IP_TCPDIAG_IPV6
418 if (entry
.family
== AF_INET6
) {
419 struct ipv6_pinfo
*np
= inet6_sk(sk
);
421 entry
.saddr
= np
->rcv_saddr
.s6_addr32
;
422 entry
.daddr
= np
->daddr
.s6_addr32
;
426 entry
.saddr
= &inet
->rcv_saddr
;
427 entry
.daddr
= &inet
->daddr
;
429 entry
.sport
= inet
->num
;
430 entry
.dport
= ntohs(inet
->dport
);
431 entry
.userlocks
= sk
->sk_userlocks
;
433 if (!tcpdiag_bc_run(RTA_DATA(bc
), RTA_PAYLOAD(bc
), &entry
))
437 return tcpdiag_fill(skb
, sk
, r
->tcpdiag_ext
, NETLINK_CB(cb
->skb
).pid
,
438 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
);
441 static int tcpdiag_fill_req(struct sk_buff
*skb
, struct sock
*sk
,
442 struct request_sock
*req
,
445 const struct inet_request_sock
*ireq
= inet_rsk(req
);
446 struct inet_sock
*inet
= inet_sk(sk
);
447 unsigned char *b
= skb
->tail
;
448 struct tcpdiagmsg
*r
;
449 struct nlmsghdr
*nlh
;
452 nlh
= NLMSG_PUT(skb
, pid
, seq
, TCPDIAG_GETSOCK
, sizeof(*r
));
453 nlh
->nlmsg_flags
= NLM_F_MULTI
;
456 r
->tcpdiag_family
= sk
->sk_family
;
457 r
->tcpdiag_state
= TCP_SYN_RECV
;
458 r
->tcpdiag_timer
= 1;
459 r
->tcpdiag_retrans
= req
->retrans
;
461 r
->id
.tcpdiag_if
= sk
->sk_bound_dev_if
;
462 r
->id
.tcpdiag_cookie
[0] = (u32
)(unsigned long)req
;
463 r
->id
.tcpdiag_cookie
[1] = (u32
)(((unsigned long)req
>> 31) >> 1);
465 tmo
= req
->expires
- jiffies
;
469 r
->id
.tcpdiag_sport
= inet
->sport
;
470 r
->id
.tcpdiag_dport
= ireq
->rmt_port
;
471 r
->id
.tcpdiag_src
[0] = ireq
->loc_addr
;
472 r
->id
.tcpdiag_dst
[0] = ireq
->rmt_addr
;
473 r
->tcpdiag_expires
= jiffies_to_msecs(tmo
),
474 r
->tcpdiag_rqueue
= 0;
475 r
->tcpdiag_wqueue
= 0;
476 r
->tcpdiag_uid
= sock_i_uid(sk
);
477 r
->tcpdiag_inode
= 0;
478 #ifdef CONFIG_IP_TCPDIAG_IPV6
479 if (r
->tcpdiag_family
== AF_INET6
) {
480 ipv6_addr_copy((struct in6_addr
*)r
->id
.tcpdiag_src
,
481 &tcp6_rsk(req
)->loc_addr
);
482 ipv6_addr_copy((struct in6_addr
*)r
->id
.tcpdiag_dst
,
483 &tcp6_rsk(req
)->rmt_addr
);
486 nlh
->nlmsg_len
= skb
->tail
- b
;
491 skb_trim(skb
, b
- skb
->data
);
495 static int tcpdiag_dump_reqs(struct sk_buff
*skb
, struct sock
*sk
,
496 struct netlink_callback
*cb
)
498 struct tcpdiag_entry entry
;
499 struct tcpdiagreq
*r
= NLMSG_DATA(cb
->nlh
);
500 struct tcp_sock
*tp
= tcp_sk(sk
);
501 struct listen_sock
*lopt
;
502 struct rtattr
*bc
= NULL
;
503 struct inet_sock
*inet
= inet_sk(sk
);
505 int reqnum
, s_reqnum
;
509 s_reqnum
= cb
->args
[4];
514 entry
.family
= sk
->sk_family
;
516 read_lock_bh(&tp
->accept_queue
.syn_wait_lock
);
518 lopt
= tp
->accept_queue
.listen_opt
;
519 if (!lopt
|| !lopt
->qlen
)
522 if (cb
->nlh
->nlmsg_len
> 4 + NLMSG_SPACE(sizeof(*r
))) {
523 bc
= (struct rtattr
*)(r
+ 1);
524 entry
.sport
= inet
->num
;
525 entry
.userlocks
= sk
->sk_userlocks
;
528 for (j
= s_j
; j
< TCP_SYNQ_HSIZE
; j
++) {
529 struct request_sock
*req
, *head
= lopt
->syn_table
[j
];
532 for (req
= head
; req
; reqnum
++, req
= req
->dl_next
) {
533 struct inet_request_sock
*ireq
= inet_rsk(req
);
535 if (reqnum
< s_reqnum
)
537 if (r
->id
.tcpdiag_dport
!= ireq
->rmt_port
&&
543 #ifdef CONFIG_IP_TCPDIAG_IPV6
544 (entry
.family
== AF_INET6
) ?
545 tcp6_rsk(req
)->loc_addr
.s6_addr32
:
549 #ifdef CONFIG_IP_TCPDIAG_IPV6
550 (entry
.family
== AF_INET6
) ?
551 tcp6_rsk(req
)->rmt_addr
.s6_addr32
:
554 entry
.dport
= ntohs(ireq
->rmt_port
);
556 if (!tcpdiag_bc_run(RTA_DATA(bc
),
557 RTA_PAYLOAD(bc
), &entry
))
561 err
= tcpdiag_fill_req(skb
, sk
, req
,
562 NETLINK_CB(cb
->skb
).pid
,
566 cb
->args
[4] = reqnum
;
575 read_unlock_bh(&tp
->accept_queue
.syn_wait_lock
);
580 static int tcpdiag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
584 struct tcpdiagreq
*r
= NLMSG_DATA(cb
->nlh
);
587 s_num
= num
= cb
->args
[2];
589 if (cb
->args
[0] == 0) {
590 if (!(r
->tcpdiag_states
&(TCPF_LISTEN
|TCPF_SYN_RECV
)))
593 for (i
= s_i
; i
< TCP_LHTABLE_SIZE
; i
++) {
595 struct hlist_node
*node
;
598 sk_for_each(sk
, node
, &tcp_listening_hash
[i
]) {
599 struct inet_sock
*inet
= inet_sk(sk
);
606 if (r
->id
.tcpdiag_sport
!= inet
->sport
&&
610 if (!(r
->tcpdiag_states
&TCPF_LISTEN
) ||
611 r
->id
.tcpdiag_dport
||
615 if (tcpdiag_dump_sock(skb
, sk
, cb
) < 0) {
621 if (!(r
->tcpdiag_states
&TCPF_SYN_RECV
))
624 if (tcpdiag_dump_reqs(skb
, sk
, cb
) < 0) {
642 s_i
= num
= s_num
= 0;
645 if (!(r
->tcpdiag_states
&~(TCPF_LISTEN
|TCPF_SYN_RECV
)))
648 for (i
= s_i
; i
< tcp_ehash_size
; i
++) {
649 struct tcp_ehash_bucket
*head
= &tcp_ehash
[i
];
651 struct hlist_node
*node
;
656 read_lock_bh(&head
->lock
);
659 sk_for_each(sk
, node
, &head
->chain
) {
660 struct inet_sock
*inet
= inet_sk(sk
);
664 if (!(r
->tcpdiag_states
& (1 << sk
->sk_state
)))
666 if (r
->id
.tcpdiag_sport
!= inet
->sport
&&
669 if (r
->id
.tcpdiag_dport
!= inet
->dport
&& r
->id
.tcpdiag_dport
)
671 if (tcpdiag_dump_sock(skb
, sk
, cb
) < 0) {
672 read_unlock_bh(&head
->lock
);
679 if (r
->tcpdiag_states
&TCPF_TIME_WAIT
) {
680 sk_for_each(sk
, node
,
681 &tcp_ehash
[i
+ tcp_ehash_size
].chain
) {
682 struct inet_sock
*inet
= inet_sk(sk
);
686 if (r
->id
.tcpdiag_sport
!= inet
->sport
&&
689 if (r
->id
.tcpdiag_dport
!= inet
->dport
&&
692 if (tcpdiag_dump_sock(skb
, sk
, cb
) < 0) {
693 read_unlock_bh(&head
->lock
);
700 read_unlock_bh(&head
->lock
);
709 static int tcpdiag_dump_done(struct netlink_callback
*cb
)
715 static __inline__
int
716 tcpdiag_rcv_msg(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
718 if (!(nlh
->nlmsg_flags
&NLM_F_REQUEST
))
721 if (nlh
->nlmsg_type
!= TCPDIAG_GETSOCK
)
724 if (NLMSG_LENGTH(sizeof(struct tcpdiagreq
)) > skb
->len
)
727 if (nlh
->nlmsg_flags
&NLM_F_DUMP
) {
728 if (nlh
->nlmsg_len
> 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq
))) {
729 struct rtattr
*rta
= (struct rtattr
*)(NLMSG_DATA(nlh
) + sizeof(struct tcpdiagreq
));
730 if (rta
->rta_type
!= TCPDIAG_REQ_BYTECODE
||
732 rta
->rta_len
> nlh
->nlmsg_len
- NLMSG_SPACE(sizeof(struct tcpdiagreq
)))
734 if (tcpdiag_bc_audit(RTA_DATA(rta
), RTA_PAYLOAD(rta
)))
737 return netlink_dump_start(tcpnl
, skb
, nlh
,
741 return tcpdiag_get_exact(skb
, nlh
);
749 static inline void tcpdiag_rcv_skb(struct sk_buff
*skb
)
752 struct nlmsghdr
* nlh
;
754 if (skb
->len
>= NLMSG_SPACE(0)) {
755 nlh
= (struct nlmsghdr
*)skb
->data
;
756 if (nlh
->nlmsg_len
< sizeof(*nlh
) || skb
->len
< nlh
->nlmsg_len
)
758 err
= tcpdiag_rcv_msg(skb
, nlh
);
759 if (err
|| nlh
->nlmsg_flags
& NLM_F_ACK
)
760 netlink_ack(skb
, nlh
, err
);
764 static void tcpdiag_rcv(struct sock
*sk
, int len
)
767 unsigned int qlen
= skb_queue_len(&sk
->sk_receive_queue
);
769 while (qlen
-- && (skb
= skb_dequeue(&sk
->sk_receive_queue
))) {
770 tcpdiag_rcv_skb(skb
);
775 static int __init
tcpdiag_init(void)
777 tcpnl
= netlink_kernel_create(NETLINK_TCPDIAG
, tcpdiag_rcv
);
783 static void __exit
tcpdiag_exit(void)
785 sock_release(tcpnl
->sk_socket
);
788 module_init(tcpdiag_init
);
789 module_exit(tcpdiag_exit
);
790 MODULE_LICENSE("GPL");