PY3: fix "TabError: inconsistent use of tabs and spaces"
[Samba.git] / script / traffic_summary.pl
blob05c1cf03702b34264e46fbd713c6ffe26d08abda
1 #! /usr/bin/perl
3 # Summarise tshark pdml output into a form suitable for the load test tool
5 # Copyright (C) Catalyst.Net Ltd 2017
7 # Catalyst.Net's contribution was written by Gary Lockyer
8 # <gary@catalyst.net.nz>.
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 3 of the License, or
13 # (at your option) any later version.
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 use warnings;
25 use strict;
27 use Getopt::Long;
28 use Pod::Usage;
30 BEGIN {
31 unless (eval "require XML::Twig") {
32 warn "traffic_summary requires the perl module XML::Twig\n" .
33 "on Ubuntu/Debian releases run\n".
34 " sudo apt install libxml-twig-perl \n".
35 "or install from CPAN\n".
36 "\nThe reported error was:\n$@";
37 exit(1);
42 my %ip_map; # Map of IP address to sequence number
43 my $ip_sequence = 0; # count of unique IP addresses seen
46 my $timestamp; # Packet timestamp
47 my $stream; # Wireshark stream number
48 my $ip_proto; # IP protocol (IANA protocl number)
49 my $source; # source IP address
50 my $dest; # destination address
51 my $proto; # application protocol name
52 my $description; # protocol specific description
53 my %proto_data; # protocol specific data captured for the current packet
54 my $malformed_packet; # Indicates the current packet has errors
55 my $ldap_filter; # cleaned ldap filter
56 my $ldap_attributes; # attributes requested in an ldap query
60 # Dispatch table mapping the wireshark variables of interest to the
61 # functions responsible for processing them
62 my %field_dispatch_table = (
63 'timestamp' => \&timestamp,
64 'ip.src' => \&ip_src,
65 'ipv6.src' => \&ip_src,
66 'ip.dst' => \&ip_dst,
67 'ipv6.dst' => \&ip_dst,
68 'ip.proto' => \&ip_proto,
69 'udp.stream' => \&stream,
70 'tcp.stream' => \&stream,
71 'dns.flags.opcode' => \&field_data,
72 'dns.flags.response' => \&field_data,
73 'netlogon.opnum' => \&field_data,
74 'kerberos.msg_type' => \&field_data,
75 'smb.cmd' => \&field_data,
76 'smb2.cmd' => \&field_data,
77 'ldap.protocolOp' => \&field_data,
78 'gss-api.OID' => \&field_data,
79 'ldap.gssapi_encrypted_payload' => \&field_data,
80 'ldap.baseObject' => \&field_data,
81 'ldap.scope' => \&field_data,
82 'ldap.AttributeDescription' => \&ldap_attribute,
83 'ldap.modification_element' => \&ldap_add_modify,
84 'ldap.AttributeList_item_element' => \&ldap_add_modify,
85 'ldap.operation' => \&field_data,
86 'ldap.authentication' => \&field_data,
87 'lsarpc.opnum' => \&field_data,
88 'samr.opnum' => \&field_data,
89 'dcerpc.pkt_type' => \&field_data,
90 'epm.opnum' => \&field_data,
91 'dnsserver.opnum' => \&field_data,
92 'drsuapi.opnum' => \&field_data,
93 'browser.command' => \&field_data,
94 'smb_netlogon.command' => \&field_data,
95 'srvsvc.opnum' => \&field_data,
96 'nbns.flags.opcode' => \&field_data,
97 'nbns.flags.response' => \&field_data,
98 '_ws.expert.message' => \&field_data,
101 # Dispatch table mapping protocols to the routine responsible for formatting
102 # their output. Protocols not in this table are ignored.
104 my %proto_dispatch_table = (
105 'dns' => sub { return format_opcode( 'dns.flags.response')},
106 'rpc_netlogon' => sub { return format_opcode( 'netlogon.opnum')},
107 'kerberos' => \&format_kerberos,
108 'smb' => sub { return format_opcode( 'smb.cmd')},
109 'smb2' => sub { return format_opcode( 'smb2.cmd')},
110 'ldap' => \&format_ldap,
111 'cldap' => \&format_ldap,
112 'lsarpc' => sub { return format_opcode( 'lsarpc.opnum')},
113 'samr' => sub { return format_opcode( 'samr.opnum')},
114 'dcerpc' => sub { return format_opcode( 'dcerpc.pkt_type')},
115 'epm' => sub { return format_opcode( 'epm.opnum')},
116 'dnsserver' => sub { return format_opcode( 'dnsserver.opnum')},
117 'drsuapi' => sub { return format_opcode( 'drsuapi.opnum')},
118 'browser' => sub { return format_opcode( 'browser.command')},
119 'smb_netlogon' => sub { return format_opcode( 'smb_netlogon.command')},
120 'srvsvc' => sub { return format_opcode( 'srvsvc.opnum')},
121 'nbns' => sub { return format_opcode( 'nbns.flags.response')},
124 # XPath entry to extract the kerberos cname
125 my $kerberos_cname_path =
126 'packet/proto/field[@name = "kerberos.as_req_element"]'
127 . '/field[@name = "kerberos.req_body_element"]'
128 . '/field[@name = "kerberos.cname_element"]'
129 . '/field[@name = "kerberos.name_string"]'
130 . '/field[@name = "kerberos.KerberosString"]';
132 # XPath entry to extract the ldap filter
133 my $ldap_filter_path =
134 'field[@name = "ldap.searchRequest_element"]/field';
137 # Create an XML Twig parser and register the event handlers.
139 my $t = XML::Twig->new(
140 start_tag_handlers => {
141 'packet' => \&packet_start,
143 twig_handlers => {
144 'packet' => \&packet,
145 'proto' => \&protocol,
146 'field' => \&field,
147 $kerberos_cname_path => \&kerberos_cname,
148 $ldap_filter_path => \&ldap_filter,
152 #------------------------------------------------------------------------------
153 # Main loop
155 #------------------------------------------------------------------------------
156 my $help = 0;
157 GetOptions( 'help|h' => \$help) or pod2usage(2);
158 pod2usage(1) if $help;
160 if (@ARGV) {
161 foreach my $file (@ARGV) {
162 eval {
163 $t->parsefile( $file);
165 if ($@) {
166 print STDERR "Unable to process $file, ".
167 "did you run tshark with the -T pdml option?";
170 } else {
171 pod2usage(1) if -t STDIN;
172 eval {
173 $t->parse( \*STDIN);
175 if ($@) {
176 print STDERR "Unable to process input, ".
177 "are you running tshark with the -T pdml option?";
182 #------------------------------------------------------------------------------
183 # New packet detected reset the globals
184 #------------------------------------------------------------------------------
185 sub packet_start
187 my ($t, $packet) = @_;
188 $timestamp = "";
189 $stream = "";
190 $ip_proto = "";
191 $source = "";
192 $dest = "";
193 $description = undef;
194 %proto_data = ();
195 $malformed_packet = undef;
196 $ldap_filter = "";
197 $ldap_attributes = "";
200 #------------------------------------------------------------------------------
201 # Complete packet element parsed from the XML feed
202 # output the protocol summary if required
203 #------------------------------------------------------------------------------
204 sub packet
206 my ($t, $packet) = @_;
208 my $data;
209 if (exists $proto_dispatch_table{$proto}) {
210 if ($malformed_packet) {
211 $data = "\t\t** Malformed Packet ** " . ($proto_data{'_ws.expert.message.show'} || '');
212 } else {
213 my $rsub = $proto_dispatch_table{$proto};
214 $data = &$rsub();
216 print "$timestamp\t$ip_proto\t$stream\t$source\t$dest\t$proto\t$data\n";
218 $t->purge;
221 #------------------------------------------------------------------------------
222 # Complete protocol element parsed from the XML input
223 # Update the protocol name
224 #------------------------------------------------------------------------------
225 sub protocol
227 my ($t, $protocol) = @_;
228 if ($protocol->{att}->{showname}) {
230 # Tag a packet as malformed if the protocol is _ws.malformed
231 # and the hide attribute is not 'yes'
232 if ($protocol->{att}->{name} eq '_ws.malformed'
233 && !($protocol->{att}->{hide} && $protocol->{att}->{hide} eq 'yes')
235 $malformed_packet = 1;
237 # Don't set the protocol name if it's a wireshark malformed
238 # protocol entry, or the packet was truncated during capture
239 my $p = $protocol->{att}->{name};
240 if ($p ne '_ws.malformed' && $p ne '_ws.short') {
241 $proto = $p;
246 #------------------------------------------------------------------------------
247 # Complete field element parsed, extract any data of interest
248 #------------------------------------------------------------------------------
249 sub field
251 my ($t, $field) = @_;
252 my $name = $field->{att}->{name};
254 # Only process the field if it has a corresponding entry in
255 # %field_dispatch_table
256 if (exists $field_dispatch_table{$name}) {
257 my $rsub = $field_dispatch_table{$name};
258 &$rsub( $field);
262 #------------------------------------------------------------------------------
263 # Process a timestamp field element
264 #------------------------------------------------------------------------------
265 sub timestamp
267 my ($field) = @_;
268 $timestamp = $field->{att}->{value};
271 #------------------------------------------------------------------------------
272 # Process a wireshark stream element, used to group a sequence of requests
273 # and responses between two IP addresses
274 #------------------------------------------------------------------------------
275 sub stream
277 my ($field) = @_;
278 $stream = $field->{att}->{show};
281 #------------------------------------------------------------------------------
282 # Process a source ip address field, mapping the IP address to it's
283 # corresponding sequence number.
284 #------------------------------------------------------------------------------
285 sub ip_src
287 my ($field) = @_;
288 $source = map_ip( $field);
291 #------------------------------------------------------------------------------
292 # Process a destination ip address field, mapping the IP address to it's
293 # corresponding sequence number.
294 #------------------------------------------------------------------------------
295 sub ip_dst
297 my ($field) = @_;
298 $dest = map_ip( $field);
301 #------------------------------------------------------------------------------
302 # Process an ip protocol element, extracting IANA protocol number
303 #------------------------------------------------------------------------------
304 sub ip_proto
306 my ($field) = @_;
307 $ip_proto = $field->{att}->{value};
312 #------------------------------------------------------------------------------
313 # Extract an ldap attribute and append it to ldap_attributes
314 #------------------------------------------------------------------------------
315 sub ldap_attribute
317 my ($field) = @_;
318 my $attribute = $field->{att}->{show};
320 if (defined $attribute) {
321 $ldap_attributes .= "," if $ldap_attributes;
322 $ldap_attributes .= $attribute;
326 #------------------------------------------------------------------------------
327 # Process a field element, extract the value, show and showname attributes
328 # and store them in the %proto_data hash.
330 #------------------------------------------------------------------------------
331 sub field_data
333 my ($field) = @_;
334 my $name = $field->{att}->{name};
335 $proto_data{$name.'.value'} = $field->{att}->{value};
336 $proto_data{$name.'.show'} = $field->{att}->{show};
337 $proto_data{$name.'.showname'} = $field->{att}->{showname};
340 #------------------------------------------------------------------------------
341 # Process a kerberos cname element, if the cname ends with a $ it's a machine
342 # name. Otherwise it's a user name.
344 #------------------------------------------------------------------------------
345 sub kerberos_cname
347 my ($t, $field) = @_;
348 my $cname = $field->{att}->{show};
349 my $type;
350 if( $cname =~ /\$$/) {
351 $type = 'machine';
352 } else {
353 $type = 'user';
355 $proto_data{'kerberos.cname.type'} = $type;
359 #------------------------------------------------------------------------------
360 # Process an ldap filter, remove the values but keep the attribute names
361 #------------------------------------------------------------------------------
362 sub ldap_filter
364 my ($t, $field) = @_;
365 if ( $field->{att}->{show} && $field->{att}->{show} =~ /^Filter:/) {
366 my $filter = $field->{att}->{show};
368 # extract and save the objectClass to keep the value
369 my @object_classes;
370 while ( $filter =~ m/\((objectClass=.*?)\)/g) {
371 push @object_classes, $1;
374 # extract and save objectCategory and the top level value
375 my @object_categories;
376 while ( $filter =~ m/(\(objectCategory=.*?,|\(objectCategory=.*?\))/g
378 push @object_categories, $1;
381 # Remove all the values from the attributes
382 # Input
383 # Filter: (nCName=DC=DomainDnsZones,DC=sub1,DC=ad,DC=rh,DC=at,DC=net)
384 # Output
385 # (nCName)
386 $filter =~ s/^Filter:\s*//; # Remove the 'Filter: ' prefix
387 $filter =~ s/=.*?\)/\)/g; # Remove from the = to the first )
389 # Now restore the parts of objectClass and objectCategory that are being
390 # retained
392 for my $cat (@object_categories) {
393 $filter =~ s/\(objectCategory\)/$cat/;
396 for my $class (@object_classes) {
397 $filter =~ s/\(objectClass\)/($class)/;
400 $ldap_filter = $filter;
401 } else {
402 # Ok not an ldap filter so call the default field handler
403 field( $t, $field);
408 #------------------------------------------------------------------------------
409 # Extract the attributes from ldap modification and add requests
410 #------------------------------------------------------------------------------
411 sub ldap_add_modify
413 my ($field) = @_;
414 my $type = $field->first_child('field[@name="ldap.type"]');
415 my $attribute = $type->{att}->{show} if $type;
416 if (defined $attribute) {
417 $ldap_attributes .= "," if $ldap_attributes;
418 $ldap_attributes .= $attribute;
421 #------------------------------------------------------------------------------
422 # Map an IP address to a unique sequence number. Assigning it a sequence number
423 # if one has not already been assigned.
425 #------------------------------------------------------------------------------
426 sub map_ip
428 my ($field) = @_;
429 my $ip = $field->{att}->{show};
430 if ( !exists( $ip_map{$ip})) {
431 $ip_sequence++;
432 $ip_map{$ip} = $ip_sequence;
434 return $ip_map{$ip};
437 #------------------------------------------------------------------------------
438 # Format a protocol operation code for output.
440 #------------------------------------------------------------------------------
441 sub format_opcode
443 my ($name) = @_;
444 my $operation = $proto_data{$name.'.show'};
445 my $description = $proto_data{$name.'.showname'} || '';
447 # Strip off the common prefix text, and the trailing (n).
448 # This tidies up most but not all descriptions.
449 $description =~ s/^[^:]*?: ?// if $description;
450 $description =~ s/^Message is a // if $description;
451 $description =~ s/\(\d+\)\s*$// if $description;
452 $description =~ s/\s*$// if $description;
454 return "$operation\t$description";
457 #------------------------------------------------------------------------------
458 # Format ldap protocol details for output
459 #------------------------------------------------------------------------------
460 sub format_ldap
462 my ($name) = @_;
463 if ( exists( $proto_data{'ldap.protocolOp.show'})
464 || exists( $proto_data{'gss-api.OID.show'})
466 my $operation = $proto_data{'ldap.protocolOp.show'};
467 my $description = $proto_data{'ldap.protocolOp.showname'} || '';
468 my $oid = $proto_data{'gss-api.OID.show'} || '';
469 my $base_object = $proto_data{'ldap.baseObject.show'} || '';
470 my $scope = $proto_data{'ldap.scope.show'} || '';
472 # Now extract operation specific data
473 my $extra;
474 my $extra_desc;
475 $operation = '' if !defined $operation;
476 if ($operation eq 6) {
477 # Modify operation
478 $extra = $proto_data{'ldap.operation.show'};
479 $extra_desc = $proto_data{'ldap.operation.showname'};
480 } elsif ($operation eq 0) {
481 # Bind operation
482 $extra = $proto_data{'ldap.authentication.show'};
483 $extra_desc = $proto_data{'ldap.authentication.showname'};
485 $extra = '' if !defined $extra;
486 $extra_desc = '' if !defined $extra_desc;
489 # strip the values out of the base object
490 if ($base_object) {
491 $base_object =~ s/^<//; # leading '<' if present
492 $base_object =~ s/>$//; # trailing '>' if present
493 $base_object =~ s/=.*?,/,/g; # from = up to the next comma
494 $base_object =~ s/=.*?$//; # from = up to the end of string
497 # strip off the leading prefix on the extra_description
498 # and the trailing (n);
499 $extra_desc =~ s/^[^:]*?: ?// if $extra_desc;
500 $extra_desc =~ s/\(\d+\)\s*$// if $extra_desc;
501 $extra_desc =~ s/\s*$// if $extra_desc;
503 # strip off the common prefix on the description
504 # and the trailing (n);
505 $description =~ s/^[^:]*?: ?// if $description;
506 $description =~ s/\(\d+\)\s*$// if $description;
507 $description =~ s/\s*$// if $description;
509 return "$operation\t$description\t$scope\t$base_object"
510 ."\t$ldap_filter\t$ldap_attributes\t$extra\t$extra_desc\t$oid";
511 } else {
512 return "\t*** Unknown ***";
516 #------------------------------------------------------------------------------
517 # Format kerberos protocol details for output.
518 #------------------------------------------------------------------------------
519 sub format_kerberos
521 my $msg_type = $proto_data{'kerberos.msg_type.show'} || '';
522 my $cname_type = $proto_data{'kerberos.cname.type'} || '';
523 my $description = $proto_data{'kerberos.msg_type.showname'} || '';
525 # Tidy up the description
526 $description =~ s/^[^:]*?: ?// if $description;
527 $description =~ s/\(\d+\)\s*$// if $description;
528 $description =~ s/\s*$// if $description;
529 return "$msg_type\t$description\t$cname_type";
532 =pod
534 =head1 NAME
536 traffic_summary.pl - summarise tshark pdml output
538 =head1 USAGE
540 B<traffic_summary.pl> [FILE...]
542 Summarise samba network traffic from tshark pdml output. Produces a tsv
543 delimited summary of samba activity.
545 To process unencrypted traffic
547 tshark -r capture.file -T pdml | traffic_summary.pl
549 To process encrypted kerberos traffic
551 tshark -r capture.file -K krb5.keytab -o kerberos.decrypt:true -T pdml | traffic_summary.pl
553 To display more detailed documentation, including details of the output format
555 perldoc traffic_summary.pl
557 NOTE: tshark pdml output is very verbose, so it's better to pipe the tshark
558 output directly to traffic_summary, rather than generating
559 intermediate pdml format files.
561 =head1 OPTIONS
562 B<--help> Display usage message and exit.
564 =head1 DESCRIPTION
566 Summarises tshark pdml output into a format suitable for load analysis
567 and input into load generation tools.
569 It reads the pdml input from stdin or the list of files passed on the command line.
572 =head2 Output format
573 The output is tab delimited fields and one line per summarised packet.
575 =head3 Fields
576 B<timestamp> Packet timestamp
577 B<IP protocol> The IANA protocol number
578 B<Wireshark Stream Number> Calculated by wireshark groups related requests and responses
579 B<Source IP> The unique sequence number for the source IP address
580 B<Destination IP> The unique sequence number for the destination IP address
581 B<protocl> The protocol name
582 B<opcode> The protocol operation code
583 B<Description> The protocol or operation description
584 B<extra> Extra protocol specific data, may be more than one field
587 =head2 IP address mapping
588 Rather than capturing and printing the IP addresses. Each unique IP address
589 seen is assigned a sequence number. So the first IP address seen will be 1,
590 the second 2 ...
592 =head2 Packets collected
593 Packets containing the following protocol records are summarised:
595 rpc_netlogon
596 kerberos
598 smb2
599 ldap
600 cldap
601 lsarpc
602 samr
603 dcerpc
605 dnsserver
606 drsuapi
607 browser
608 smb_netlogon
609 srvsvc
610 nbns
612 Any other packets are ignored.
614 In addition to the standard elements extra data is returned for the following
615 protocol record.
617 =head3 kerberos
618 cname_type machine cname ends with a $
619 user cname does not end with a $
621 =head3 ldap
623 scope Query Scope
624 0 - Base
625 1 - One level
626 2 - sub tree
627 base_object ldap base object
628 ldap_filter the ldap filter, attribute names are retained but the values
629 are removed.
630 ldap_attributes ldap attributes, only the names are retained any values are
631 discarded, with the following two exceptions
632 objectClass all the attribute values are retained
633 objectCategory the top level value is retained
634 i.e. everything from the = to the first ,
636 =head3 ldap modifiyRequest
637 In addition to the standard ldap fields the modification type is also captured
639 modify_operator for modifyRequests this contains the modifiy operation
640 0 - add
641 1 - delete
642 2 - replace
643 modify_description a description of the operation if available
645 =head3 modify bindRequest
646 In addition to the standard ldap fields details of the authentication
647 type are captured
649 authentication type 0 - Simple
650 3 - SASL
651 description Description of the authentication mechanism
652 oid GSS-API OID's
653 1.2.840.113554.1.2.2 - Kerberos v5
654 1.2.840.48018.1.2.2 - Kerberos V5
655 (incorrect, used by old Windows versions)
656 1.3.6.1.5.5.2 - SPNEGO
657 1.3.6.1.5.2.5 - IAKERB
658 1.3.6.1.4.1.311.2.2.10 - NTLM SSP
659 1.3.6.1.5.5.14 - SCRAM-SHA-1
660 1.3.6.1.5.5.18 - SCRAM-SHA-256
661 1.3.6.1.5.5.15.1.1.* - GSS-EAP
662 1.3.6.1.5.2.7 - PKU2U
663 1.3.6.1.5.5.1.1 - SPKM-1
664 1.3.6.1.5.5.1.2 - SPKM-2
665 1.3.6.1.5.5.1.3 - SPKM-3
666 1.3.6.1.5.5.9 - LIPKEY
667 1.2.752.43.14.2 - NETLOGON
669 =head1 DEPENDENCIES
670 tshark
671 XML::Twig For Ubuntu libxml-twig-perl, or from CPAN
672 use Getopt::Long
673 use Pod::Usage
676 =head1 Diagnostics
678 =head2 ** Unknown **
679 Unable to determine the operation being performed, for ldap it typically
680 indicates a kerberos encrypted operation.
682 =head2 ** Malformed Packet **
683 tshark indicated that the packet was malformed, for ldap it usually indicates TLS
684 encrypted traffic.
686 =head1 LISENCE AND COPYRIGHT
688 Copyright (C) Catalyst.Net Ltd 2017
690 Catalyst.Net's contribution was written by Gary Lockyer
691 <gary@catalyst.net.nz>.
693 This program is free software; you can redistribute it and/or modify
694 it under the terms of the GNU General Public License as published by
695 the Free Software Foundation; either version 3 of the License, or
696 (at your option) any later version.
698 This program is distributed in the hope that it will be useful,
699 but WITHOUT ANY WARRANTY; without even the implied warranty of
700 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
701 GNU General Public License for more details.
703 You should have received a copy of the GNU General Public License
704 along with this program. If not, see <http://www.gnu.org/licenses/>.
707 =cut