From 1a9fbac03c684f29cff9ac44875bd9504a89f54e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 Mar 2013 10:41:48 +0100 Subject: [PATCH] all: import netsniff-ng 0.5.8-rc0 source We decided to get rid of the old Git history and start a new one for several reasons: *) Allow / enforce only high-quality commits (which was not the case for many commits in the history), have a policy that is more close to the one from the Linux kernel. With high quality commits, we mean code that is logically split into commits and commit messages that are signed-off and have a proper subject and message body. We do not allow automatic Github merges anymore, since they are total bullshit. However, we will either cherry-pick your patches or pull them manually. *) The old archive was about ~27MB for no particular good reason. This basically derived from the bad decision that also some PDF files where stored there. From this moment onwards, no binary objects are allowed to be stored in this repository anymore. The old archive is not wiped away from the Internet. You will still be able to find it, e.g. on git.cryptoism.org etc. Signed-off-by: Daniel Borkmann Signed-off-by: Tobias Klauser --- .gitignore | 42 + AUTHORS | 52 + COPYING | 347 + Documentation/CodingStyle | 833 ++ Documentation/Downstream | 140 + Documentation/KnownIssues | 97 + Documentation/Mirrors | 17 + Documentation/Performance | 278 + Documentation/RelatedWork | 87 + Documentation/Sponsors | 14 + Documentation/SubmittingPatches | 122 + Documentation/Summary | 59 + INSTALL | 160 + Makefile | 307 + README | 58 + REPORTING-BUGS | 18 + astraceroute.c | 1077 +++ astraceroute/.gitignore | 5 + astraceroute/Makefile | 13 + bpf.c | 765 ++ bpf.h | 135 + bpf_comp.c | 43 + bpf_lexer.l | 126 + bpf_parser.y | 673 ++ bpfc.c | 132 + bpfc/.gitignore | 4 + bpfc/Makefile | 12 + built_in.h | 344 + colors.h | 31 + configs/ether.conf | 291 + configs/geoip.conf | 2 + configs/oui.conf | 17557 ++++++++++++++++++++++++++++++++++++++ configs/stddef.h | 197 + configs/tcp.conf | 1100 +++ configs/udp.conf | 1056 +++ cpusched.c | 180 + cpusched.h | 16 + csum.h | 179 + ct_client.c | 440 + ct_server.c | 811 ++ ct_servmgmt.c | 278 + ct_servmgmt.h | 21 + ct_usermgmt.c | 772 ++ ct_usermgmt.h | 49 + curve.c | 281 + curve.h | 218 + curvetun.c | 692 ++ curvetun.h | 42 + curvetun/.gitignore | 7 + curvetun/Makefile | 17 + curvetun/abiname.c | 46 + curvetun/build_nacl.sh | 46 + curvetun/nacl_path.sh | 18 + die.h | 69 + dissector.c | 115 + dissector.h | 64 + dissector_80211.c | 54 + dissector_80211.h | 43 + dissector_eth.c | 219 + dissector_eth.h | 48 + flowtop.c | 1208 +++ flowtop/.gitignore | 4 + flowtop/Makefile | 19 + geoip.c | 595 ++ geoip.h | 29 + hash.c | 169 + hash.h | 86 + ifpps.c | 949 ++ ifpps/.gitignore | 4 + ifpps/Makefile | 6 + ipv4.h | 36 + ipv6.h | 39 + locking.h | 97 + mac80211.c | 223 + mac80211.h | 13 + netsniff-ng.c | 1369 +++ netsniff-ng/.gitignore | 4 + netsniff-ng/Makefile | 50 + oui.c | 104 + oui.h | 19 + patricia.c | 353 + patricia.h | 49 + pcap_io.h | 581 ++ pcap_mm.c | 197 + pcap_rw.c | 79 + pcap_sg.c | 193 + pkt_buff.h | 112 + proto.h | 34 + proto_80211_mac_hdr.c | 3627 ++++++++ proto_arp.c | 163 + proto_ethernet.c | 80 + proto_icmpv4.c | 68 + proto_icmpv6.c | 1641 ++++ proto_igmp.c | 560 ++ proto_ip_authentication_hdr.c | 87 + proto_ip_esp.c | 53 + proto_ipv4.c | 201 + proto_ipv6.c | 112 + proto_ipv6_dest_opts.c | 101 + proto_ipv6_fragm.c | 70 + proto_ipv6_hop_by_hop.c | 100 + proto_ipv6_in_ipv4.c | 25 + proto_ipv6_mobility_hdr.c | 311 + proto_ipv6_no_nxt_hdr.c | 41 + proto_ipv6_routing.c | 163 + proto_lldp.c | 469 + proto_mpls_unicast.c | 109 + proto_none.c | 83 + proto_tcp.c | 155 + proto_udp.c | 89 + proto_vlan.c | 61 + proto_vlan_q_in_q.c | 63 + protos.h | 37 + ring.h | 174 + ring_rx.c | 130 + ring_rx.h | 31 + ring_tx.c | 136 + ring_tx.h | 40 + stun.c | 190 + test/dissector_fuzz.sh | 68 + tprintf.c | 169 + tprintf.h | 27 + trafgen.c | 1064 +++ trafgen/.gitignore | 4 + trafgen/Makefile | 16 + trafgen_conf.h | 55 + trafgen_lexer.l | 152 + trafgen_parser.y | 611 ++ trie.c | 117 + trie.h | 21 + update-oui.py | 81 + xio.c | 248 + xio.h | 22 + xmalloc.c | 151 + xmalloc.h | 35 + xutils.c | 1024 +++ xutils.h | 94 + 137 files changed, 50669 insertions(+) create mode 100644 .gitignore create mode 100644 AUTHORS create mode 100644 COPYING create mode 100644 Documentation/CodingStyle create mode 100644 Documentation/Downstream create mode 100644 Documentation/KnownIssues create mode 100644 Documentation/Mirrors create mode 100644 Documentation/Performance create mode 100644 Documentation/RelatedWork create mode 100644 Documentation/Sponsors create mode 100644 Documentation/SubmittingPatches create mode 100644 Documentation/Summary create mode 100644 INSTALL create mode 100644 Makefile create mode 100644 README create mode 100644 REPORTING-BUGS create mode 100644 astraceroute.c create mode 100644 astraceroute/.gitignore create mode 100644 astraceroute/Makefile create mode 100644 bpf.c create mode 100644 bpf.h create mode 100644 bpf_comp.c create mode 100644 bpf_lexer.l create mode 100644 bpf_parser.y create mode 100644 bpfc.c create mode 100644 bpfc/.gitignore create mode 100644 bpfc/Makefile create mode 100644 built_in.h create mode 100644 colors.h create mode 100644 configs/ether.conf create mode 100644 configs/geoip.conf create mode 100644 configs/oui.conf create mode 100644 configs/stddef.h create mode 100644 configs/tcp.conf create mode 100644 configs/udp.conf create mode 100644 cpusched.c create mode 100644 cpusched.h create mode 100644 csum.h create mode 100644 ct_client.c create mode 100644 ct_server.c create mode 100644 ct_servmgmt.c create mode 100644 ct_servmgmt.h create mode 100644 ct_usermgmt.c create mode 100644 ct_usermgmt.h create mode 100644 curve.c create mode 100644 curve.h create mode 100644 curvetun.c create mode 100644 curvetun.h create mode 100644 curvetun/.gitignore create mode 100644 curvetun/Makefile create mode 100644 curvetun/abiname.c create mode 100755 curvetun/build_nacl.sh create mode 100755 curvetun/nacl_path.sh create mode 100644 die.h create mode 100644 dissector.c create mode 100644 dissector.h create mode 100644 dissector_80211.c create mode 100644 dissector_80211.h create mode 100644 dissector_eth.c create mode 100644 dissector_eth.h create mode 100644 flowtop.c create mode 100644 flowtop/.gitignore create mode 100644 flowtop/Makefile create mode 100644 geoip.c create mode 100644 geoip.h create mode 100644 hash.c create mode 100644 hash.h create mode 100644 ifpps.c create mode 100644 ifpps/.gitignore create mode 100644 ifpps/Makefile create mode 100644 ipv4.h create mode 100644 ipv6.h create mode 100644 locking.h create mode 100644 mac80211.c create mode 100644 mac80211.h create mode 100644 netsniff-ng.c create mode 100644 netsniff-ng/.gitignore create mode 100644 netsniff-ng/Makefile create mode 100644 oui.c create mode 100644 oui.h create mode 100644 patricia.c create mode 100644 patricia.h create mode 100644 pcap_io.h create mode 100644 pcap_mm.c create mode 100644 pcap_rw.c create mode 100644 pcap_sg.c create mode 100644 pkt_buff.h create mode 100644 proto.h create mode 100644 proto_80211_mac_hdr.c create mode 100644 proto_arp.c create mode 100644 proto_ethernet.c create mode 100644 proto_icmpv4.c create mode 100644 proto_icmpv6.c create mode 100644 proto_igmp.c create mode 100644 proto_ip_authentication_hdr.c create mode 100644 proto_ip_esp.c create mode 100644 proto_ipv4.c create mode 100644 proto_ipv6.c create mode 100644 proto_ipv6_dest_opts.c create mode 100644 proto_ipv6_fragm.c create mode 100644 proto_ipv6_hop_by_hop.c create mode 100644 proto_ipv6_in_ipv4.c create mode 100644 proto_ipv6_mobility_hdr.c create mode 100644 proto_ipv6_no_nxt_hdr.c create mode 100644 proto_ipv6_routing.c create mode 100644 proto_lldp.c create mode 100644 proto_mpls_unicast.c create mode 100644 proto_none.c create mode 100644 proto_tcp.c create mode 100644 proto_udp.c create mode 100644 proto_vlan.c create mode 100644 proto_vlan_q_in_q.c create mode 100644 protos.h create mode 100644 ring.h create mode 100644 ring_rx.c create mode 100644 ring_rx.h create mode 100644 ring_tx.c create mode 100644 ring_tx.h create mode 100644 stun.c create mode 100755 test/dissector_fuzz.sh create mode 100644 tprintf.c create mode 100644 tprintf.h create mode 100644 trafgen.c create mode 100644 trafgen/.gitignore create mode 100644 trafgen/Makefile create mode 100644 trafgen_conf.h create mode 100644 trafgen_lexer.l create mode 100644 trafgen_parser.y create mode 100644 trie.c create mode 100644 trie.h create mode 100755 update-oui.py create mode 100644 xio.c create mode 100644 xio.h create mode 100644 xmalloc.c create mode 100644 xmalloc.h create mode 100644 xutils.c create mode 100644 xutils.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..dadfb3d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +# Editor crap (emacs, vim, ...) +*.swp +*.orig +\#*\# +.\#* +.*.sw[a-z] +*.un~ + +# Hidden files, general things +.* +*~ + +# Compiled object files +*.slo +*.lo +*.o + +# Compiled dynamic libraries +*.so + +# Compiled static libraries +*.lai +*.la +*.a + +# Testing folders +test/fuzzing/ + +# Ignore if someone adapts Makefile +Makefile + +# Other documentation ignores +*.md +*.ps + +# Compressed archives +*.tar.bz2 +*.tar.bz +*.tar.xz + +# Excluded from ignorance +!.gitignore diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 00000000..29bab771 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,52 @@ +Maintainer: + + * Daniel Borkmann + * Tobias Klauser + +Major contributors (> 30 commits): + + * Daniel Borkmann + * Emmanuel Roullit + * Markus Amend + * Tobias Klauser + * Christoph Jaeger + * Herbert Haas * + +Minor contributors (<= 30 commits): + + * Scott Moeller + * Jesper Dangaard Brouer + * Jaroslav Škarvada + * Dennis Gilmore + * Dan Horák + * Doug Burks + * Kartik Mistry + * Ulrich Weber + * Teguh + * Markus Kötter + * Jim Binder + * Ronald W. Henderson + * Stefan Seering + * Jon Schipp + * Sibir Chakraborty + +Notes: + +The order of authors with > 30 commits listed here is sorted by contributions +from high to low through ``git log --no-merges $@ | grep Author: | \ +cut -d: -f2 | cut -d\< -f1 | sort | uniq -c | sort -n -r''. The list of minor +contributors is currently unsorted and contains people who have contributed +code ``in some way'' (e.g. on the upstream or maintenance repositories), either +through Git or email. + +Note that we have taken over the maintenance and further development of Herbert +Haas' mausezahn [ˈmauzəˌtsa:n] utility after he passed away in 2011. There were +no Git commit statistics available from the import of his project. The project +is currently in an experimental branch only, but will be fully integrated +soon. (*) + +Want to join the core team? Submit enough great patches over a long time, +implement what's on the TODO file and show an ongoing, active interest in +supporting netsniff-ng. What's in for you? If you ever come to Switzerland, +you get a free beer on Daniel and you can meet some great people involved in +this project. ;) diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..a60c0c72 --- /dev/null +++ b/COPYING @@ -0,0 +1,347 @@ + Note that the only valid version of the GPL as far as this project is + concerned is _this_ particular version of the license (i.e. v2, not v2.2 or + v3.x or whatever), unless explicitly otherwise stated. + + Daniel Borkmann + +----------------------------------------------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle new file mode 100644 index 00000000..31265b48 --- /dev/null +++ b/Documentation/CodingStyle @@ -0,0 +1,833 @@ + The coding conventions of the netsniff-ng toolkit match with the Linux kernel + style guidelines. So here we go with a copy of linux/Documentation/CodingStyle + written by Linus. + + In general, keep this in mind: (i) simplicity, (ii) brevity, (iii) elegance. + You are also obliged to treat files in Documentation/ in same quality as code. + + Daniel Borkmann + +------------------------------------------------------------------------------- + + Linux kernel coding style + +This is a short document describing the preferred coding style for the +linux kernel. Coding style is very personal, and I won't _force_ my +views on anybody, but this is what goes for anything that I have to be +able to maintain, and I'd prefer it for most other things too. Please +at least consider the points made here. + +First off, I'd suggest printing out a copy of the GNU coding standards, +and NOT read it. Burn them, it's a great symbolic gesture. + +Anyway, here goes: + + + Chapter 1: Indentation + +Tabs are 8 characters, and thus indentations are also 8 characters. +There are heretic movements that try to make indentations 4 (or even 2!) +characters deep, and that is akin to trying to define the value of PI to +be 3. + +Rationale: The whole idea behind indentation is to clearly define where +a block of control starts and ends. Especially when you've been looking +at your screen for 20 straight hours, you'll find it a lot easier to see +how the indentation works if you have large indentations. + +Now, some people will claim that having 8-character indentations makes +the code move too far to the right, and makes it hard to read on a +80-character terminal screen. The answer to that is that if you need +more than 3 levels of indentation, you're screwed anyway, and should fix +your program. + +In short, 8-char indents make things easier to read, and have the added +benefit of warning you when you're nesting your functions too deep. +Heed that warning. + +The preferred way to ease multiple indentation levels in a switch statement is +to align the "switch" and its subordinate "case" labels in the same column +instead of "double-indenting" the "case" labels. E.g.: + + switch (suffix) { + case 'G': + case 'g': + mem <<= 30; + break; + case 'M': + case 'm': + mem <<= 20; + break; + case 'K': + case 'k': + mem <<= 10; + /* fall through */ + default: + break; + } + + +Don't put multiple statements on a single line unless you have +something to hide: + + if (condition) do_this; + do_something_everytime; + +Don't put multiple assignments on a single line either. Kernel coding style +is super simple. Avoid tricky expressions. + +Outside of comments, documentation and except in Kconfig, spaces are never +used for indentation, and the above example is deliberately broken. + +Get a decent editor and don't leave whitespace at the end of lines. + + + Chapter 2: Breaking long lines and strings + +Coding style is all about readability and maintainability using commonly +available tools. + +The limit on the length of lines is 80 columns and this is a strongly +preferred limit. + +Statements longer than 80 columns will be broken into sensible chunks. +Descendants are always substantially shorter than the parent and are placed +substantially to the right. The same applies to function headers with a long +argument list. Long strings are as well broken into shorter strings. The +only exception to this is where exceeding 80 columns significantly increases +readability and does not hide information. + +void fun(int a, int b, int c) +{ + if (condition) + printk(KERN_WARNING "Warning this is a long printk with " + "3 parameters a: %u b: %u " + "c: %u \n", a, b, c); + else + next_statement; +} + + Chapter 3: Placing Braces and Spaces + +The other issue that always comes up in C styling is the placement of +braces. Unlike the indent size, there are few technical reasons to +choose one placement strategy over the other, but the preferred way, as +shown to us by the prophets Kernighan and Ritchie, is to put the opening +brace last on the line, and put the closing brace first, thusly: + + if (x is true) { + we do y + } + +This applies to all non-function statement blocks (if, switch, for, +while, do). E.g.: + + switch (action) { + case KOBJ_ADD: + return "add"; + case KOBJ_REMOVE: + return "remove"; + case KOBJ_CHANGE: + return "change"; + default: + return NULL; + } + +However, there is one special case, namely functions: they have the +opening brace at the beginning of the next line, thus: + + int function(int x) + { + body of function + } + +Heretic people all over the world have claimed that this inconsistency +is ... well ... inconsistent, but all right-thinking people know that +(a) K&R are _right_ and (b) K&R are right. Besides, functions are +special anyway (you can't nest them in C). + +Note that the closing brace is empty on a line of its own, _except_ in +the cases where it is followed by a continuation of the same statement, +ie a "while" in a do-statement or an "else" in an if-statement, like +this: + + do { + body of do-loop + } while (condition); + +and + + if (x == y) { + .. + } else if (x > y) { + ... + } else { + .... + } + +Rationale: K&R. + +Also, note that this brace-placement also minimizes the number of empty +(or almost empty) lines, without any loss of readability. Thus, as the +supply of new-lines on your screen is not a renewable resource (think +25-line terminal screens here), you have more empty lines to put +comments on. + +Do not unnecessarily use braces where a single statement will do. + +if (condition) + action(); + +This does not apply if one branch of a conditional statement is a single +statement. Use braces in both branches. + +if (condition) { + do_this(); + do_that(); +} else { + otherwise(); +} + + 3.1: Spaces + +Linux kernel style for use of spaces depends (mostly) on +function-versus-keyword usage. Use a space after (most) keywords. The +notable exceptions are sizeof, typeof, alignof, and __attribute__, which look +somewhat like functions (and are usually used with parentheses in Linux, +although they are not required in the language, as in: "sizeof info" after +"struct fileinfo info;" is declared). + +So use a space after these keywords: + if, switch, case, for, do, while +but not with sizeof, typeof, alignof, or __attribute__. E.g., + s = sizeof(struct file); + +Do not add spaces around (inside) parenthesized expressions. This example is +*bad*: + + s = sizeof( struct file ); + +When declaring pointer data or a function that returns a pointer type, the +preferred use of '*' is adjacent to the data name or function name and not +adjacent to the type name. Examples: + + char *linux_banner; + unsigned long long memparse(char *ptr, char **retptr); + char *match_strdup(substring_t *s); + +Use one space around (on each side of) most binary and ternary operators, +such as any of these: + + = + - < > * / % | & ^ <= >= == != ? : + +but no space after unary operators: + & * + - ~ ! sizeof typeof alignof __attribute__ defined + +no space before the postfix increment & decrement unary operators: + ++ -- + +no space after the prefix increment & decrement unary operators: + ++ -- + +and no space around the '.' and "->" structure member operators. + +Do not leave trailing whitespace at the ends of lines. Some editors with +"smart" indentation will insert whitespace at the beginning of new lines as +appropriate, so you can start typing the next line of code right away. +However, some such editors do not remove the whitespace if you end up not +putting a line of code there, such as if you leave a blank line. As a result, +you end up with lines containing trailing whitespace. + +Git will warn you about patches that introduce trailing whitespace, and can +optionally strip the trailing whitespace for you; however, if applying a series +of patches, this may make later patches in the series fail by changing their +context lines. + + + Chapter 4: Naming + +C is a Spartan language, and so should your naming be. Unlike Modula-2 +and Pascal programmers, C programmers do not use cute names like +ThisVariableIsATemporaryCounter. A C programmer would call that +variable "tmp", which is much easier to write, and not the least more +difficult to understand. + +HOWEVER, while mixed-case names are frowned upon, descriptive names for +global variables are a must. To call a global function "foo" is a +shooting offense. + +GLOBAL variables (to be used only if you _really_ need them) need to +have descriptive names, as do global functions. If you have a function +that counts the number of active users, you should call that +"count_active_users()" or similar, you should _not_ call it "cntusr()". + +Encoding the type of a function into the name (so-called Hungarian +notation) is brain damaged - the compiler knows the types anyway and can +check those, and it only confuses the programmer. No wonder MicroSoft +makes buggy programs. + +LOCAL variable names should be short, and to the point. If you have +some random integer loop counter, it should probably be called "i". +Calling it "loop_counter" is non-productive, if there is no chance of it +being mis-understood. Similarly, "tmp" can be just about any type of +variable that is used to hold a temporary value. + +If you are afraid to mix up your local variable names, you have another +problem, which is called the function-growth-hormone-imbalance syndrome. +See chapter 6 (Functions). + + + Chapter 5: Typedefs + +Please don't use things like "vps_t". + +It's a _mistake_ to use typedef for structures and pointers. When you see a + + vps_t a; + +in the source, what does it mean? + +In contrast, if it says + + struct virtual_container *a; + +you can actually tell what "a" is. + +Lots of people think that typedefs "help readability". Not so. They are +useful only for: + + (a) totally opaque objects (where the typedef is actively used to _hide_ + what the object is). + + Example: "pte_t" etc. opaque objects that you can only access using + the proper accessor functions. + + NOTE! Opaqueness and "accessor functions" are not good in themselves. + The reason we have them for things like pte_t etc. is that there + really is absolutely _zero_ portably accessible information there. + + (b) Clear integer types, where the abstraction _helps_ avoid confusion + whether it is "int" or "long". + + u8/u16/u32 are perfectly fine typedefs, although they fit into + category (d) better than here. + + NOTE! Again - there needs to be a _reason_ for this. If something is + "unsigned long", then there's no reason to do + + typedef unsigned long myflags_t; + + but if there is a clear reason for why it under certain circumstances + might be an "unsigned int" and under other configurations might be + "unsigned long", then by all means go ahead and use a typedef. + + (c) when you use sparse to literally create a _new_ type for + type-checking. + + (d) New types which are identical to standard C99 types, in certain + exceptional circumstances. + + Although it would only take a short amount of time for the eyes and + brain to become accustomed to the standard types like 'uint32_t', + some people object to their use anyway. + + Therefore, the Linux-specific 'u8/u16/u32/u64' types and their + signed equivalents which are identical to standard types are + permitted -- although they are not mandatory in new code of your + own. + + When editing existing code which already uses one or the other set + of types, you should conform to the existing choices in that code. + + (e) Types safe for use in userspace. + + In certain structures which are visible to userspace, we cannot + require C99 types and cannot use the 'u32' form above. Thus, we + use __u32 and similar types in all structures which are shared + with userspace. + +Maybe there are other cases too, but the rule should basically be to NEVER +EVER use a typedef unless you can clearly match one of those rules. + +In general, a pointer, or a struct that has elements that can reasonably +be directly accessed should _never_ be a typedef. + + + Chapter 6: Functions + +Functions should be short and sweet, and do just one thing. They should +fit on one or two screenfuls of text (the ISO/ANSI screen size is 80x24, +as we all know), and do one thing and do that well. + +The maximum length of a function is inversely proportional to the +complexity and indentation level of that function. So, if you have a +conceptually simple function that is just one long (but simple) +case-statement, where you have to do lots of small things for a lot of +different cases, it's OK to have a longer function. + +However, if you have a complex function, and you suspect that a +less-than-gifted first-year high-school student might not even +understand what the function is all about, you should adhere to the +maximum limits all the more closely. Use helper functions with +descriptive names (you can ask the compiler to in-line them if you think +it's performance-critical, and it will probably do a better job of it +than you would have done). + +Another measure of the function is the number of local variables. They +shouldn't exceed 5-10, or you're doing something wrong. Re-think the +function, and split it into smaller pieces. A human brain can +generally easily keep track of about 7 different things, anything more +and it gets confused. You know you're brilliant, but maybe you'd like +to understand what you did 2 weeks from now. + +In source files, separate functions with one blank line. If the function is +exported, the EXPORT* macro for it should follow immediately after the closing +function brace line. E.g.: + +int system_is_up(void) +{ + return system_state == SYSTEM_RUNNING; +} +EXPORT_SYMBOL(system_is_up); + +In function prototypes, include parameter names with their data types. +Although this is not required by the C language, it is preferred in Linux +because it is a simple way to add valuable information for the reader. + + + Chapter 7: Centralized exiting of functions + +Albeit deprecated by some people, the equivalent of the goto statement is +used frequently by compilers in form of the unconditional jump instruction. + +The goto statement comes in handy when a function exits from multiple +locations and some common work such as cleanup has to be done. + +The rationale is: + +- unconditional statements are easier to understand and follow +- nesting is reduced +- errors by not updating individual exit points when making + modifications are prevented +- saves the compiler work to optimize redundant code away ;) + +int fun(int a) +{ + int result = 0; + char *buffer = kmalloc(SIZE); + + if (buffer == NULL) + return -ENOMEM; + + if (condition1) { + while (loop1) { + ... + } + result = 1; + goto out; + } + ... +out: + kfree(buffer); + return result; +} + + Chapter 8: Commenting + +Comments are good, but there is also a danger of over-commenting. NEVER +try to explain HOW your code works in a comment: it's much better to +write the code so that the _working_ is obvious, and it's a waste of +time to explain badly written code. + +Generally, you want your comments to tell WHAT your code does, not HOW. +Also, try to avoid putting comments inside a function body: if the +function is so complex that you need to separately comment parts of it, +you should probably go back to chapter 6 for a while. You can make +small comments to note or warn about something particularly clever (or +ugly), but try to avoid excess. Instead, put the comments at the head +of the function, telling people what it does, and possibly WHY it does +it. + +When commenting the kernel API functions, please use the kernel-doc format. +See the files Documentation/kernel-doc-nano-HOWTO.txt and scripts/kernel-doc +for details. + +Linux style for comments is the C89 "/* ... */" style. +Don't use C99-style "// ..." comments. + +The preferred style for long (multi-line) comments is: + + /* + * This is the preferred style for multi-line + * comments in the Linux kernel source code. + * Please use it consistently. + * + * Description: A column of asterisks on the left side, + * with beginning and ending almost-blank lines. + */ + +It's also important to comment data, whether they are basic types or derived +types. To this end, use just one data declaration per line (no commas for +multiple data declarations). This leaves you room for a small comment on each +item, explaining its use. + + + Chapter 9: You've made a mess of it + +That's OK, we all do. You've probably been told by your long-time Unix +user helper that "GNU emacs" automatically formats the C sources for +you, and you've noticed that yes, it does do that, but the defaults it +uses are less than desirable (in fact, they are worse than random +typing - an infinite number of monkeys typing into GNU emacs would never +make a good program). + +So, you can either get rid of GNU emacs, or change it to use saner +values. To do the latter, you can stick the following in your .emacs file: + +(defun c-lineup-arglist-tabs-only (ignored) + "Line up argument lists by tabs, not spaces" + (let* ((anchor (c-langelem-pos c-syntactic-element)) + (column (c-langelem-2nd-pos c-syntactic-element)) + (offset (- (1+ column) anchor)) + (steps (floor offset c-basic-offset))) + (* (max steps 1) + c-basic-offset))) + +(add-hook 'c-mode-common-hook + (lambda () + ;; Add kernel style + (c-add-style + "linux-tabs-only" + '("linux" (c-offsets-alist + (arglist-cont-nonempty + c-lineup-gcc-asm-reg + c-lineup-arglist-tabs-only)))))) + +(add-hook 'c-mode-hook + (lambda () + (let ((filename (buffer-file-name))) + ;; Enable kernel mode for the appropriate files + (when (and filename + (string-match (expand-file-name "~/src/linux-trees") + filename)) + (setq indent-tabs-mode t) + (c-set-style "linux-tabs-only"))))) + +This will make emacs go better with the kernel coding style for C +files below ~/src/linux-trees. + +But even if you fail in getting emacs to do sane formatting, not +everything is lost: use "indent". + +Now, again, GNU indent has the same brain-dead settings that GNU emacs +has, which is why you need to give it a few command line options. +However, that's not too bad, because even the makers of GNU indent +recognize the authority of K&R (the GNU people aren't evil, they are +just severely misguided in this matter), so you just give indent the +options "-kr -i8" (stands for "K&R, 8 character indents"), or use +"scripts/Lindent", which indents in the latest style. + +"indent" has a lot of options, and especially when it comes to comment +re-formatting you may want to take a look at the man page. But +remember: "indent" is not a fix for bad programming. + + + Chapter 10: Kconfig configuration files + +For all of the Kconfig* configuration files throughout the source tree, +the indentation is somewhat different. Lines under a "config" definition +are indented with one tab, while help text is indented an additional two +spaces. Example: + +config AUDIT + bool "Auditing support" + depends on NET + help + Enable auditing infrastructure that can be used with another + kernel subsystem, such as SELinux (which requires this for + logging of avc messages output). Does not do system-call + auditing without CONFIG_AUDITSYSCALL. + +Features that might still be considered unstable should be defined as +dependent on "EXPERIMENTAL": + +config SLUB + depends on EXPERIMENTAL && !ARCH_USES_SLAB_PAGE_STRUCT + bool "SLUB (Unqueued Allocator)" + ... + +while seriously dangerous features (such as write support for certain +filesystems) should advertise this prominently in their prompt string: + +config ADFS_FS_RW + bool "ADFS write support (DANGEROUS)" + depends on ADFS_FS + ... + +For full documentation on the configuration files, see the file +Documentation/kbuild/kconfig-language.txt. + + + Chapter 11: Data structures + +Data structures that have visibility outside the single-threaded +environment they are created and destroyed in should always have +reference counts. In the kernel, garbage collection doesn't exist (and +outside the kernel garbage collection is slow and inefficient), which +means that you absolutely _have_ to reference count all your uses. + +Reference counting means that you can avoid locking, and allows multiple +users to have access to the data structure in parallel - and not having +to worry about the structure suddenly going away from under them just +because they slept or did something else for a while. + +Note that locking is _not_ a replacement for reference counting. +Locking is used to keep data structures coherent, while reference +counting is a memory management technique. Usually both are needed, and +they are not to be confused with each other. + +Many data structures can indeed have two levels of reference counting, +when there are users of different "classes". The subclass count counts +the number of subclass users, and decrements the global count just once +when the subclass count goes to zero. + +Examples of this kind of "multi-level-reference-counting" can be found in +memory management ("struct mm_struct": mm_users and mm_count), and in +filesystem code ("struct super_block": s_count and s_active). + +Remember: if another thread can find your data structure, and you don't +have a reference count on it, you almost certainly have a bug. + + + Chapter 12: Macros, Enums and RTL + +Names of macros defining constants and labels in enums are capitalized. + +#define CONSTANT 0x12345 + +Enums are preferred when defining several related constants. + +CAPITALIZED macro names are appreciated but macros resembling functions +may be named in lower case. + +Generally, inline functions are preferable to macros resembling functions. + +Macros with multiple statements should be enclosed in a do - while block: + +#define macrofun(a, b, c) \ + do { \ + if (a == 5) \ + do_this(b, c); \ + } while (0) + +Things to avoid when using macros: + +1) macros that affect control flow: + +#define FOO(x) \ + do { \ + if (blah(x) < 0) \ + return -EBUGGERED; \ + } while(0) + +is a _very_ bad idea. It looks like a function call but exits the "calling" +function; don't break the internal parsers of those who will read the code. + +2) macros that depend on having a local variable with a magic name: + +#define FOO(val) bar(index, val) + +might look like a good thing, but it's confusing as hell when one reads the +code and it's prone to breakage from seemingly innocent changes. + +3) macros with arguments that are used as l-values: FOO(x) = y; will +bite you if somebody e.g. turns FOO into an inline function. + +4) forgetting about precedence: macros defining constants using expressions +must enclose the expression in parentheses. Beware of similar issues with +macros using parameters. + +#define CONSTANT 0x4000 +#define CONSTEXP (CONSTANT | 3) + +The cpp manual deals with macros exhaustively. The gcc internals manual also +covers RTL which is used frequently with assembly language in the kernel. + + + Chapter 13: Printing kernel messages + +Kernel developers like to be seen as literate. Do mind the spelling +of kernel messages to make a good impression. Do not use crippled +words like "dont"; use "do not" or "don't" instead. Make the messages +concise, clear, and unambiguous. + +Kernel messages do not have to be terminated with a period. + +Printing numbers in parentheses (%d) adds no value and should be avoided. + +There are a number of driver model diagnostic macros in +which you should use to make sure messages are matched to the right device +and driver, and are tagged with the right level: dev_err(), dev_warn(), +dev_info(), and so forth. For messages that aren't associated with a +particular device, defines pr_debug() and pr_info(). + +Coming up with good debugging messages can be quite a challenge; and once +you have them, they can be a huge help for remote troubleshooting. Such +messages should be compiled out when the DEBUG symbol is not defined (that +is, by default they are not included). When you use dev_dbg() or pr_debug(), +that's automatic. Many subsystems have Kconfig options to turn on -DDEBUG. +A related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to the +ones already enabled by DEBUG. + + + Chapter 14: Allocating memory + +The kernel provides the following general purpose memory allocators: +kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API +documentation for further information about them. + +The preferred form for passing a size of a struct is the following: + + p = kmalloc(sizeof(*p), ...); + +The alternative form where struct name is spelled out hurts readability and +introduces an opportunity for a bug when the pointer variable type is changed +but the corresponding sizeof that is passed to a memory allocator is not. + +Casting the return value which is a void pointer is redundant. The conversion +from void pointer to any other pointer type is guaranteed by the C programming +language. + + + Chapter 15: The inline disease + +There appears to be a common misperception that gcc has a magic "make me +faster" speedup option called "inline". While the use of inlines can be +appropriate (for example as a means of replacing macros, see Chapter 12), it +very often is not. Abundant use of the inline keyword leads to a much bigger +kernel, which in turn slows the system as a whole down, due to a bigger +icache footprint for the CPU and simply because there is less memory +available for the pagecache. Just think about it; a pagecache miss causes a +disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles +that can go into these 5 miliseconds. + +A reasonable rule of thumb is to not put inline at functions that have more +than 3 lines of code in them. An exception to this rule are the cases where +a parameter is known to be a compiletime constant, and as a result of this +constantness you *know* the compiler will be able to optimize most of your +function away at compile time. For a good example of this later case, see +the kmalloc() inline function. + +Often people argue that adding inline to functions that are static and used +only once is always a win since there is no space tradeoff. While this is +technically correct, gcc is capable of inlining these automatically without +help, and the maintenance issue of removing the inline when a second user +appears outweighs the potential value of the hint that tells gcc to do +something it would have done anyway. + + + Chapter 16: Function return values and names + +Functions can return values of many different kinds, and one of the +most common is a value indicating whether the function succeeded or +failed. Such a value can be represented as an error-code integer +(-Exxx = failure, 0 = success) or a "succeeded" boolean (0 = failure, +non-zero = success). + +Mixing up these two sorts of representations is a fertile source of +difficult-to-find bugs. If the C language included a strong distinction +between integers and booleans then the compiler would find these mistakes +for us... but it doesn't. To help prevent such bugs, always follow this +convention: + + If the name of a function is an action or an imperative command, + the function should return an error-code integer. If the name + is a predicate, the function should return a "succeeded" boolean. + +For example, "add work" is a command, and the add_work() function returns 0 +for success or -EBUSY for failure. In the same way, "PCI device present" is +a predicate, and the pci_dev_present() function returns 1 if it succeeds in +finding a matching device or 0 if it doesn't. + +All EXPORTed functions must respect this convention, and so should all +public functions. Private (static) functions need not, but it is +recommended that they do. + +Functions whose return value is the actual result of a computation, rather +than an indication of whether the computation succeeded, are not subject to +this rule. Generally they indicate failure by returning some out-of-range +result. Typical examples would be functions that return pointers; they use +NULL or the ERR_PTR mechanism to report failure. + + + Chapter 17: Don't re-invent the kernel macros + +The header file include/linux/kernel.h contains a number of macros that +you should use, rather than explicitly coding some variant of them yourself. +For example, if you need to calculate the length of an array, take advantage +of the macro + + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +Similarly, if you need to calculate the size of some structure member, use + + #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) + +There are also min() and max() macros that do strict type checking if you +need them. Feel free to peruse that header file to see what else is already +defined that you shouldn't reproduce in your code. + + + Chapter 18: Editor modelines and other cruft + +Some editors can interpret configuration information embedded in source files, +indicated with special markers. For example, emacs interprets lines marked +like this: + +-*- mode: c -*- + +Or like this: + +/* +Local Variables: +compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c" +End: +*/ + +Vim interprets markers that look like this: + +/* vim:set sw=8 noet */ + +Do not include any of these in source files. People have their own personal +editor configurations, and your source files should not override them. This +includes markers for indentation and mode configuration. People may use their +own custom mode, or may have some other magic method for making indentation +work correctly. + + + + Appendix I: References + +The C Programming Language, Second Edition +by Brian W. Kernighan and Dennis M. Ritchie. +Prentice Hall, Inc., 1988. +ISBN 0-13-110362-8 (paperback), 0-13-110370-9 (hardback). +URL: http://cm.bell-labs.com/cm/cs/cbook/ + +The Practice of Programming +by Brian W. Kernighan and Rob Pike. +Addison-Wesley, Inc., 1999. +ISBN 0-201-61586-X. +URL: http://cm.bell-labs.com/cm/cs/tpop/ + +GNU manuals - where in compliance with K&R and this text - for cpp, gcc, +gcc internals and indent, all available from http://www.gnu.org/manual/ + +WG14 is the international standardization working group for the programming +language C, URL: http://www.open-std.org/JTC1/SC22/WG14/ + +Kernel CodingStyle, by greg@kroah.com at OLS 2002: +http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/ + +-- +Last updated on 2007-July-13. diff --git a/Documentation/Downstream b/Documentation/Downstream new file mode 100644 index 00000000..ba22cdd2 --- /dev/null +++ b/Documentation/Downstream @@ -0,0 +1,140 @@ +Maintainer: +/////////// + +netsniff-ng operating system distribution package maintainers are listed here +with the following attributes: 1 - OS distribution top-level site, 2 - OS +distribution netsniff-ng site, M - Maintainers name, W - Maintainers website, +E - Maintainers e-mail, C - Maintainers country. + +We'd hereby like to express a huge thanks to our awesome maintainers! Kudos! +If you are a maintainer for netsniff-ng and not listed here, please contact +us at . + +Debian + * 1: http://www.debian.org/ + * 2: http://packages.debian.org/search?keywords=netsniff-ng + * M: Kartik Mistry + * W: http://people.debian.org/~kartik/ + * E: kartik@debian.org + * C: India + +Fedora / Fedora Security Lab Spin / Red Hat Enterprise Linux + * 1: http://fedoraproject.org/ + * 2: https://admin.fedoraproject.org/pkgdb/acls/name/netsniff-ng + * M: Jaroslav Škarvada + * W: https://admin.fedoraproject.org/pkgdb/users/packages/jskarvad + * E: jskarvad@redhat.com + * C: Czech Republic + +Ubuntu + * 1: http://www.ubuntu.com/ + * 2: https://launchpad.net/ubuntu/+source/netsniff-ng/ + * (pulled from Debian) + +Arch Linux + * 1: http://archlinux.org/ + * 2: http://aur.archlinux.org/packages.php?K=netsniff-ng + * M: Can Celasun + * W: http://durucancelasun.info/ + * E: dcelasun@gmail.com + * C: Turkey + +Linux Mint + * 1: http://www.linuxmint.com + * 2: http://community.linuxmint.com/software/view/netsniff-ng + * (pulled from Debian) + +Gentoo + * 1: http://www.gentoo.org/ + * 2: http://packages.gentoo.org/package/net-analyzer/netsniff-ng + * M: Michael Weber + * W: http://cia.vc/stats/author/xmw + * E: xmw@gentoo.org + * C: Germany + +Sabayon + * 1: http://www.sabayon.org/ + * 2: http://gpo.zugaina.org/net-misc/netsniff-ng + * M: Epinephrine + * E: epinephrineaddict@gmail.com + +Slackware + * 1: http://www.slackware.com/ + * 2: http://www.slackers.it/repository/netsniff-ng/ + * M: Corrado Franco + * W: http://conraid.net/ + * E: conraid@gmail.com + * C: Italy + +openSUSE / SUSE Linux Enterprise + * 1: http://opensuse.org/ + * 2: http://software.opensuse.org/search?baseproject=ALL&p=1&q=netsniff-ng + * M: Pascal Bleser + * W: http://linux01.gwdg.de/~pbleser/ + * E: pascal.bleser@skynet.be + * C: Belgium + +Mageia + * 1: http://www.mageia.org/ + * 2: https://bugs.mageia.org/show_bug.cgi?id=7268 + * M: Matteo Pasotti + * E: pasotti.matteo@gmail.com + * C: Italy + +Mandriva + * 1: http://www.mandriva.com/ + * 2: http://sophie.zarb.org/srpm/Mandriva,cooker,/netsniff-ng + * M: Dmitry Mikhirev + * E: dmikhirev@mandriva.org + * C: Russia + +Trisquel + * 1: http://trisquel.info/ + * 2: http://packages.trisquel.info/slaine/net/netsniff-ng + * (pulled from Debian) + +GRML + * 1: http://grml.org/ + * 2: http://grml.org/changelogs/README-grml-2010.04/ + * M: Michael Prokop + * E: mika@grml.org + * C: Austria + +Alpine Linux + * 1: http://alpinelinux.org/ + * M: Fabian Affolter + * W: http://affolter-engineering.ch + * E: fabian@affolter-engineering.ch + * C: Switzerland + +Network Security Toolkit + * 1: http://networksecuritytoolkit.org/ + * 2: http://networksecuritytoolkit.org/nst/links.html + * M: Ronald W. Henderson + * W: http://www.networksecuritytoolkit.org/nstpro/help/aboutus.html + * E: rwhalb@nycap.rr.com + * C: USA + +Network Forensic Analysis Tool (NFAT, Xplico) + * 1: http://www.xplico.org/ + * 2: http://www.xplico.org/archives/1184 + * M: Gianluca Costa + * E: g.costa@iserm.com + * C: Italy + +Backtrack + * 1: http://backtrack-linux.org/ + * 2: http://redmine.backtrack-linux.org:8080/issues/572 + * E: slyscorpion@gmail.com + +Scientific Linux by Fermilab / CERN + * 1: http://linux.web.cern.ch/linux/scientific.shtml + * E: linux.support@cern.ch + * C: Switzerland + +Security Onion + * 1: http://code.google.com/p/security-onion/ + * 2: http://code.google.com/p/security-onion/wiki/Beta + * M: Doug Burks + * E: doug.burks@gmail.com + * C: USA diff --git a/Documentation/KnownIssues b/Documentation/KnownIssues new file mode 100644 index 00000000..eb17a3f3 --- /dev/null +++ b/Documentation/KnownIssues @@ -0,0 +1,97 @@ +netsniff-ng's known issues: +/////////////////////////// + +Q: When I perform a traffic capture on the Ethernet interface, the PCAP file is + created and packets are received but without 802.1Q header. If I use + tshark, I get all headers but netsniff-ng removes 802.1Q headers. Is that + normal behavior? +A: Yes and no. The way how VLAN headers are handled in PF_PACKET sockets by the + kernel is somewhat problematic [1]. The problem in the Linux kernel is that + some drivers already handle VLAN, others not. Those who handle it have + different implementations, i.e. hardware acceleration and so on. So in some + cases the VLAN tag is even stripped before entering the protocol stack, in + some cases probably not. Bottom line is that the netdev hackers introduced + a "hack" in PF_PACKET so that a VLAN ID is visible in some helper data + structure that is accessible from the RX_RING. And then it gets really messy + in the user space to artificially put the VLAN header back into the right + place. Not mentioning about the resulting performance implications on that + of /all/ libpcap tools since parts of the packet need to be copied for + reassembly. A user reported the following, just to demonstrate this mess: + Some tests were made with two machines, and it seems that results depends on + the driver ... + + 1) AR8131 + * ethtool -k eth0 gives "rx-vlan-offload: on" + -> wireshark gets the vlan header + -> netsniff-ng doesn't get the vlan header + + * ethtool -K eth0 rxvlan off + -> wireshark gets twice the same vlan header (like QinQ even though + I never sent QinQ) + -> netsniff-ng gets the vlan header + + 2) RTL8111/8168B + * ethtool -k eth0 gives "rx-vlan-offload: on" + -> wireshark gets the vlan header + -> netsniff-ng doesn't get the vlan header + + * ethtool -K eth0 rxvlan off + -> wireshark gets the vlan header + -> netsniff-ng doesn't get the vlan header + + Even if we would agree on doing the same workaround as libpcap, we still + will not be able to see QinQ, for instance, due to the fact that only /one/ + VLAN tag is stored in this kernel helper data structure. We think that + there should be a good consensus on the kernel space side about what gets + transferred to the userland. + + [1] http://lkml.indiana.edu/hypermail/linux/kernel/0710.3/3816.html + + Update (28.11.2012): the Linux kernel and also bpfc has built-in support + for hardware accelerated VLAN filtering, even though tags might not be + visible in the payload itself as reported here. However, the filtering + for VLANs works reliable if your NIC supports it. bpfc example for filtering + for any tags: + + _main: + ld #vlanp + jeq #0, drop + ret #-1 + drop: + ret #0 + + Filtering for a particular VLAN tag: + + _main: + ld #vlant + jneq #10, drop + ret #-1 + drop: + ret #0 + + Where 10 is VLAN ID 10 in this example. Or, more pedantic: + + _main: + ld #vlanp + jeq #0, drop + ld #vlant + jneq #10, drop + ret #-1 + drop: + ret #0 + +Q: When I start trafgen, my kernel crashes! What is happening? +A: We have fixed this ``bug'' in the Linux kernel under commit + 7f5c3e3a80e6654cf48dfba7cf94f88c6b505467 (http://bit.ly/PcH5Nd). Either + update your kernel to the latest version, e.g. clone and build it from + git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git or don't + start multiple trafgen instances at once resp. start trafgen with flag -A + to disable temporary socket memory tuning! Although trafgen's mechanism is + written in a correct manner, some probably Linux internal side-effects + cause the tigger of the BUG macro. Why tuning? In general, if not otherwise + specified, the netsniff-ng suite tries to get a good performance on default. + For instance, this includes things like tuning the system's socket memory, + enabling the BPF JIT compiler, migrating the NIC's interrupt affinity and + so on. If you don't want netsniff-ng to do this, look at the relevant cmd + line options that disable them with ``--help'' and explicitly specify them + on the program start. diff --git a/Documentation/Mirrors b/Documentation/Mirrors new file mode 100644 index 00000000..c7d7e795 --- /dev/null +++ b/Documentation/Mirrors @@ -0,0 +1,17 @@ +Mirrors: +//////// + +Official mirrors for the netsniff-ng website: + + * Germany: http://netsniff-ng.{org,net,com} + +Official mirrors for the netsniff-ng Git repository: + + * Czech Republic: git://repo.or.cz/netsniff-ng.git + * United States: git://github.com/gnumaniacs/netsniff-ng.git + * Iceland: git://git.cryptoism.org/pub/git/netsniff-ng.git + +Distribution specific maintenance/release Git repositories: + + * Debian Linux: git://anonscm.debian.org/collab-maint/netsniff-ng.git + * Fedora/RHEL Linux: git://pkgs.fedoraproject.org/netsniff-ng.git diff --git a/Documentation/Performance b/Documentation/Performance new file mode 100644 index 00000000..e51411ae --- /dev/null +++ b/Documentation/Performance @@ -0,0 +1,278 @@ +Hitchhiker's guide to high-performance with netsniff-ng: +//////////////////////////////////////////////////////// + +This is a collection of short notes in random order concerning software +and hardware for optimizing throughput (partly copied or derived from sources +that are mentioned at the end of this file): + +<=== Hardware ====> + +.-=> Use a PCI-X or PCIe server NIC +`-------------------------------------------------------------------------- +Only if it says Gigabit Ethernet on the box of your NIC, that does not +necessarily mean that you will also reach it. Especially on small packet +sizes, you won't reach wire-rate with a PCI adapter built for desktop or +consumer machines. Rather, you should buy a server adapter that has faster +interconnects such as PCIe. Also, make your choice of a server adapter, +whether it has a good support in the kernel. Check the Linux drivers +directory for your targeted chipset and look at the netdev list if the adapter +is updated frequently. Also, check the location/slot of the NIC adapter on +the system motherboard: Our experience resulted in significantly different +measurement values by locating the NIC adapter in different PCIe slots. +Since we did not have schematics for the system motherboard, this was a +trial and error effort. Moreover, check the specifications of the NIC +hardware: is the system bus connector I/O capable of Gigabit Ethernet +frame rate throughput? Also check the network topology: is your network +Gigabit switch capable of switching Ethernet frames at the maximum rate +or is a direct connection of two end-nodes the better solution? Is Ethernet +flow control being used? "ethtool -a eth0" can be used to determine this. +For measurement purposes, you might want to turn it off to increase throughput: + * ethtool -A eth0 autoneg off + * ethtool -A eth0 rx off + * ethtool -A eth0 tx off + +.-=> Use better (faster) hardware +`-------------------------------------------------------------------------- +Before doing software-based fine-tuning, check if you can afford better and +especially faster hardware. For instance, get a fast CPU with lots of cores +or a NUMA architecture with multi-core CPUs and a fast interconnect. If you +dump PCAP files to disc with netsniff-ng, then a fast SSD is appropriate. +If you plan to memory map PCAP files with netsniff-ng, then choose an +appropriate amount of RAM and so on and so forth. + +<=== Software (Linux kernel specific) ====> + +.-=> Use NAPI drivers +`-------------------------------------------------------------------------- +The "New API" (NAPI) is a rework of the packet processing code in the +kernel to improve performance for high speed networking. NAPI provides +two major features: + +Interrupt mitigation: High-speed networking can create thousands of +interrupts per second, all of which tell the system something it already +knew: it has lots of packets to process. NAPI allows drivers to run with +(some) interrupts disabled during times of high traffic, with a +corresponding decrease in system load. + +Packet throttling: When the system is overwhelmed and must drop packets, +it's better if those packets are disposed of before much effort goes into +processing them. NAPI-compliant drivers can often cause packets to be +dropped in the network adaptor itself, before the kernel sees them at all. + +Many recent NIC drivers automatically support NAPI, so you don't need to do +anything. Some drivers need you to explicitly specify NAPI in the kernel +config or on the command line when compiling the driver. If you are unsure, +check your driver documentation. + +.-=> Use a tickless kernel +`-------------------------------------------------------------------------- +The tickless kernel feature allows for on-demand timer interrupts. This +means that during idle periods, fewer timer interrupts will fire, which +should lead to power savings, cooler running systems, and fewer useless +context switches. (Kernel option: CONFIG_NO_HZ=y) + +.-=> Reduce timer interrupts +`-------------------------------------------------------------------------- +You can select the rate at which timer interrupts in the kernel will fire. +When a timer interrupt fires on a CPU, the process running on that CPU is +interrupted while the timer interrupt is handled. Reducing the rate at +which the timer fires allows for fewer interruptions of your running +processes. This option is particularly useful for servers with multiple +CPUs where processes are not running interactively. (Kernel options: +CONFIG_HZ_100=y and CONFIG_HZ=100) + +.-=> Use Intel's I/OAT DMA Engine +`-------------------------------------------------------------------------- +This kernel option enables the Intel I/OAT DMA engine that is present in +recent Xeon CPUs. This option increases network throughput as the DMA +engine allows the kernel to offload network data copying from the CPU to +the DMA engine. This frees up the CPU to do more useful work. + +Check to see if it's enabled: + +[foo@bar]% dmesg | grep ioat +ioatdma 0000:00:08.0: setting latency timer to 64 +ioatdma 0000:00:08.0: Intel(R) I/OAT DMA Engine found, 4 channels, [...] +ioatdma 0000:00:08.0: irq 56 for MSI/MSI-X + +There's also a sysfs interface where you can get some statistics about the +DMA engine. Check the directories under /sys/class/dma/. (Kernel options: +CONFIG_DMADEVICES=y and CONFIG_INTEL_IOATDMA=y and CONFIG_DMA_ENGINE=y and +CONFIG_NET_DMA=y and CONFIG_ASYNC_TX_DMA=y) + +.-=> Use Direct Cache Access (DCA) +`-------------------------------------------------------------------------- +Intel's I/OAT also includes a feature called Direct Cache Access (DCA). +DCA allows a driver to warm a CPU cache. A few NICs support DCA, the most +popular (to my knowledge) is the Intel 10GbE driver (ixgbe). Refer to your +NIC driver documentation to see if your NIC supports DCA. To enable DCA, +a switch in the BIOS must be flipped. Some vendors supply machines that +support DCA, but don't expose a switch for DCA. + +You can check if DCA is enabled: + +[foo@bar]% dmesg | grep dca +dca service started, version 1.8 + +If DCA is possible on your system but disabled you'll see: + +ioatdma 0000:00:08.0: DCA is disabled in BIOS + +Which means you'll need to enable it in the BIOS or manually. (Kernel +option: CONFIG_DCA=y) + +.-=> Throttle NIC Interrupts +`-------------------------------------------------------------------------- +Some drivers allow the user to specify the rate at which the NIC will +generate interrupts. The e1000e driver allows you to pass a command line +option InterruptThrottleRate when loading the module with insmod. For +the e1000e there are two dynamic interrupt throttle mechanisms, specified +on the command line as 1 (dynamic) and 3 (dynamic conservative). The +adaptive algorithm traffic into different classes and adjusts the interrupt +rate appropriately. The difference between dynamic and dynamic conservative +is the rate for the 'Lowest Latency' traffic class, dynamic (1) has a much +more aggressive interrupt rate for this traffic class. + +As always, check your driver documentation for more information. + +With modprobe: insmod e1000e.o InterruptThrottleRate=1 + +.-=> Use Process and IRQ affinity +`-------------------------------------------------------------------------- +Linux allows the user to specify which CPUs processes and interrupt +handlers are bound. + +Processes: You can use taskset to specify which CPUs a process can run on +Interrupt Handlers: The interrupt map can be found in /proc/interrupts, and +the affinity for each interrupt can be set in the file smp_affinity in the +directory for each interrupt under /proc/irq/. + +This is useful because you can pin the interrupt handlers for your NICs +to specific CPUs so that when a shared resource is touched (a lock in the +network stack) and loaded to a CPU cache, the next time the handler runs, +it will be put on the same CPU avoiding costly cache invalidations that +can occur if the handler is put on a different CPU. + +However, reports of up to a 24% improvement can be had if processes and +the IRQs for the NICs the processes get data from are pinned to the same +CPUs. Doing this ensures that the data loaded into the CPU cache by the +interrupt handler can be used (without invalidation) by the process; +extremely high cache locality is achieved. + +NOTE: If netsniff-ng or trafgen is bound to a specific, it automatically +migrates the NIC's IRQ affinity to this CPU to achieve a high cache locality. + +.-=> Tune Socket's memory allocation area +`-------------------------------------------------------------------------- +On default, each socket has a backend memory between 130KB and 160KB on +a x86/x86_64 machine with 4GB RAM. Hence, network packets can be received +on the NIC driver layer, but later dropped at the socket queue due to memory +restrictions. "sysctl -a | grep mem" will display your current memory +settings. To increase maximum and default values of read and write memory +areas, use: + * sysctl -w net.core.rmem_max=8388608 + This sets the max OS receive buffer size for all types of connections. + * sysctl -w net.core.wmem_max=8388608 + This sets the max OS send buffer size for all types of connections. + * sysctl -w net.core.rmem_default=65536 + This sets the default OS receive buffer size for all types of connections. + * sysctl -w net.core.wmem_default=65536 + This sets the default OS send buffer size for all types of connections. + +.-=> Enable Linux' BPF Just-in-Time compiler +`-------------------------------------------------------------------------- +If you're using filtering with netsniff-ng (or tcpdump, Wireshark, ...), you +should activate the Berkeley Packet Filter Just-in-Time compiler. The Linux +kernel has a built-in "virtual machine" that interprets BPF opcodes for +filtering packets. Hence, those small filter applications are applied to +each packet. (Read more about this in the Bpfc document.) The Just-in-Time +compiler is able to 'compile' such an filter application to assembler code +that can directly be run on the CPU instead on the virtual machine. If +netsniff-ng or trafgen detects that the BPF JIT is present on the system, it +automatically enables it. (Kernel option: CONFIG_HAVE_BPF_JIT=y and +CONFIG_BPF_JIT=y) + +.-=> Increase the TX queue length +`-------------------------------------------------------------------------- +There are settings available to regulate the size of the queue between the +kernel network subsystems and the driver for network interface card. Just +as with any queue, it is recommended to size it such that losses do no +occur due to local buffer overflows. Therefore careful tuning is required +to ensure that the sizes of the queues are optimal for your network +connection. + +There are two queues to consider, the txqueuelen; which is related to the +transmit queue size, and the netdev_backlog; which determines the recv +queue size. Users can manually set this queue size using the ifconfig +command on the required device: + +ifconfig eth0 txqueuelen 2000 + +The default of 100 is inadequate for long distance, or high throughput pipes. +For example, on a network with a rtt of 120ms and at Gig rates, a +txqueuelen of at least 10000 is recommended. + +.-=> Increase kernel receiver backlog queue +`-------------------------------------------------------------------------- +For the receiver side, we have a similar queue for incoming packets. This +queue will build up in size when an interface receives packets faster than +the kernel can process them. If this queue is too small (default is 300), +we will begin to loose packets at the receiver, rather than on the network. +One can set this value by: + +sysctl -w net.core.netdev_max_backlog=2000 + +.-=> Use a RAM-based filesystem if possible +`-------------------------------------------------------------------------- +If you have a considerable amount of RAM, you can also think of using a +RAM-based file system such as ramfs for dumping pcap files with netsniff-ng. +This can be useful for small until middle-sized pcap sizes or for pcap probes +that are generated with netsniff-ng. + +<=== Software (netsniff-ng / trafgen specific) ====> + +.-=> Bind netsniff-ng / trafgen to a CPU +`-------------------------------------------------------------------------- +Both tools have a command-line option '--bind-cpu' that can be used like +'--bind-cpu 0' in order to pin the process to a specific CPU. This was +already mentioned earlier in this file. However, netsniff-ng and trafgen are +able to do this without an external tool. Next to this CPU pinning, they also +automatically migrate this CPU's NIC IRQ affinity. Hence, as in '--bind-cpu 0' +netsniff-ng will not be migrated to a different CPU and the NIC's IRQ affinity +will also be moved to CPU 0 to increase cache locality. + +.-=> Use netsniff-ng in silent mode +`-------------------------------------------------------------------------- +Don't print information to the konsole while you want to achieve high-speed, +because this highly slows down the application. Hence, use netsniff-ng's +'--silent' option when recording or replaying PCAP files! + +.-=> Use netsniff-ng's scatter/gather or mmap for PCAP files +`-------------------------------------------------------------------------- +The scatter/gather I/O mode which is default in netsniff-ng can be used to +record large PCAP files and is slower than the memory mapped I/O. However, +you don't have the RAM size as your limit for recording. Use netsniff-ng's +memory mapped I/O option for achieving a higher speed for recording a PCAP, +but with the trade-off that the maximum allowed size is limited. + +.-=> Use static packet configurations in trafgen +`-------------------------------------------------------------------------- +Don't use counters or byte randomization in trafgen configuration file, since +it slows down the packet generation process. Static packet bytes are the fastest +to go with. + +.-=> Generate packets with different txhashes in trafgen +`-------------------------------------------------------------------------- +For 10Gbit/s multiqueue NICs, it might be good to generate packets that result +in different txhashes, thus multiple queues are used in the transmission path +(and therefore high likely also multiple CPUs). + +Sources: +~~~~~~~~ + +* http://www.linuxfoundation.org/collaborate/workgroups/networking/napi +* http://datatag.web.cern.ch/datatag/howto/tcp.html +* http://thread.gmane.org/gmane.linux.network/191115 +* http://bit.ly/3XbBrM +* http://wwwx.cs.unc.edu/~sparkst/howto/network_tuning.php +* http://bit.ly/pUFJxU diff --git a/Documentation/RelatedWork b/Documentation/RelatedWork new file mode 100644 index 00000000..ed7dba88 --- /dev/null +++ b/Documentation/RelatedWork @@ -0,0 +1,87 @@ +Work that relates to netsniff-ng and how we differ from it: +/////////////////////////////////////////////////////////// + +ntop + * W: http://www.ntop.org/ + + The ntop projects offers zero-copy for network packets. Is this approach + significantly different from the already built-in from the Linux kernel? + High likely not. In both cases packets are memory mapped between both address + spaces. The biggest difference is that you get this for free, without + modifying your kernel with netsniff-ng since it uses the kernel's RX_RING + and TX_RING functionality. Unfortunately this is not really mentioned on the + ntop's website. Surely for promotional reasons. For many years the ntop + projects lives on next to the Linux kernel, attempts have been made to + integrate it [1] but discussions got stuck and both sides seem to have no + interest in it anymore, e.g. [2]. Therefore, if you want to use ntop, you are + dependent on ntop's modified drivers that are maintained out of the Linux + kernel's mainline tree. Thus, this will not provide you with the latest + improvements. Also, the Linux kernel's PF_PACKET is maintained by a much bigger + audience, probably better reviewed and optimized. Therefore, also we decided + to go with the Linux kernel's variant. So to keep it short: both approaches + are zero-copy, both have similar performance (if someone tells you something + different, he would lie due to their technical similarities) and we are using + the kernel's built-in variant to reach a broader audience. + + [1] http://lists.openwall.net/netdev/2009/10/14/37 + [2] http://www.spinics.net/lists/netfilter-devel/msg20212.html + +tcpdump + * W: http://www.tcpdump.org/ + + tcpdump is probably the oldest and most famous packet analyzer. It is based on + libpcap and in fact the MIT team that maintains tcpdump also maintains libpcap. + It has been ported to much more architectures and operating systems than + netsniff-ng. However, we don't aim to rebuild or clone tcpdump. We rather focus + on achieving a higher capturing speed by carefully tuning and optimizing our + code. That said doesn't mean that tcpdump people do not take care of it. It + just means that we don't have additional layers of abstractions for being as + portable as possible. This already gives us a smaller code footprint. Also, on + default we perform some system tuning such as remapping the NIC's IRQ affinity + that tcpdump probably would never do due to its generic nature. By generic, we + mean to serve as many different user groups as possible. We rather aim at + serving users for high-speed needs. By that, they have less manual work to do + since it's already performed in the background. Next to this, we also aim at + being a useful networking toolkit rather than only an analyzer. So many other + tools are provided such as trafgen for traffic generation. + +Wireshark/tshark + * W: http://www.wireshark.org/ + + Probably we could tell you the same as in the previous section. I guess it is + safe to say that Wireshark might have the best protocol dissector out there. + However, this is not a free lunch. You pay for it with a performance + degradation, which is quite expensive. It is also based on libpcap (we are not) + and it comes with a graphical user interface, whereas we rather aim at being + used somewhere on a server or middle-box site where you only have access to a + shell, for instance. Again, offline analysis of /large/ pcap files might even + let it hang for a long time. Here netsniff-ng has a better performance also in + capturing pcaps. Again, we furthermore aim at being a toolkit rather than only + an analyzer. + +libpcap + * W: http://www.tcpdump.org/ + + Price question: why don't you rely on libpcap? The answer is quite simple. We + started developing netsniff-ng with its zero-copy capabilities back in 2009 + when libpcap was still doing packet copies between address spaces. Since the + API to the Linux kernel was quite simple, we felt more comfortable using it + directly and bypassing this additional layer of libpcap code. Today we feel + good about this decision, because since the TX_RING functionality was added to + the Linux kernel we have a clean integration of both, RX_RING and TX_RING. + libpcap on the other hand was designed for capturing and not for transmission + of network packets. Therefore, it only uses RX_RING on systems where it's + available but no TX_RING functionality. This would have resulted in a mess in + our code. Additionally, with netsniff-ng, one is able to a more fine grained + tuning of those rings. Why didn't you wrap netsniff-ng around your own library + just like tcpdump and libpcap? Because we are ignorant. If you design a library + than you have to design it well right at the beginning. A library would be a + crappy one if it changes its API ever. Or, if it changes its API, than it has + to keep its old one for the sake of being backwards compatible. Otherwise no + trust in its user or developer base can be achieved. Further, by keeping this + long tail of deprecated functions you will become a code bloat over time. We + wanted to keep this freedom of large-scale refactoring our code and not having + to maintain a stable API to the outer world. This is the whole story behind it. + If you desperately need our internal functionality, you still can feel free to + copy our code as long as your derived code complies with the GPL version 2.0. + So no need to whine. ;-) diff --git a/Documentation/Sponsors b/Documentation/Sponsors new file mode 100644 index 00000000..2d21600f --- /dev/null +++ b/Documentation/Sponsors @@ -0,0 +1,14 @@ +netsniff-ng is partly sponsored by: +/////////////////////////////////// + +Red Hat + * W: http://www.redhat.com/ + +Deutsche Flugsicherung GmbH + * W: https://secais.dfs.de/ + +ETH Zurich: + * W: http://csg.ethz.ch/ + +Max Planck Institute for Human Cognitive and Brain Sciences + * W: http://www.cbs.mpg.de/ diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches new file mode 100644 index 00000000..fbe72c42 --- /dev/null +++ b/Documentation/SubmittingPatches @@ -0,0 +1,122 @@ +Checklist for Patches: +////////////////////// + +Submitting patches should follow this guideline (derived from the Git project): + +If you are familiar with upstream Linux kernel development, then you do not +need to read this file, it's basically the same process. + +* Commits: + +- make sure to comply with the coding guidelines (see CodingStyle) +- make commits of logical units +- check for unnecessary whitespace with "git diff --check" before committing +- do not check in commented out code or unneeded files +- the first line of the commit message should be a short description (50 + characters is the soft limit, see DISCUSSION in git-commit(1)), and should + skip the full stop +- the body should provide a meaningful commit message, which: + . explains the problem the change tries to solve, iow, what is wrong with + the current code without the change. + . justifies the way the change solves the problem, iow, why the result with + the change is better. + . alternate solutions considered but discarded, if any. +- describe changes in imperative mood, e.g. "make xyzzy do frotz" instead of + "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy to do frotz", as + if you are giving orders to the codebase to change its behaviour. +- try to make sure your explanation can be understood without external + resources. Instead of giving a URL to a mailing list archive, summarize the + relevant points of the discussion. +- add a "Signed-off-by: Your Name " line to the commit message + (or just use the option "-s" when committing) to confirm that you agree to + the Developer's Certificate of Origin (see also + http://linux.yyz.us/patch-format.html or below); this is mandatory +- make sure syntax of man-pages is free of errors: podchecker .c + +* For Patches via GitHub: + +- fork the netsniff-ng project on GitHub to your local GitHub account + (https://github.com/gnumaniacs/netsniff-ng) +- make your changes to the latest master branch with respect to the commit + section above +- if you change, add, or remove a command line option or make some other user + interface change, the associated documentation should be updated as well. +- open a pull request on (https://github.com/gnumaniacs/netsniff-ng) and send + a notification to the list (netsniff-ng@googlegroups.com) and CC one of the + maintainers if (and only if) the patch is ready for inclusion. +- if your name is not writable in ASCII, make sure that you send off a message + in the correct encoding. +- add a short description what the patch or patchset is about + +* For Patches via Mail: + +- use "git format-patch -M" to create the patch +- do not PGP sign your patch +- do not attach your patch, but read in the mail body, unless you cannot teach + your mailer to leave the formatting of the patch alone. +- be careful doing cut & paste into your mailer, not to corrupt whitespaces. +- provide additional information (which is unsuitable for the commit message) + between the "---" and the diffstat +- if you change, add, or remove a command line option or make some other user + interface change, the associated documentation should be updated as well. +- if your name is not writable in ASCII, make sure that you send off a message + in the correct encoding. +- send the patch to the list (netsniff-ng@googlegroups.com) and CC one of the + maintainers if (and only if) the patch is ready for inclusion. If you use + git-send-email(1), please test it first by sending email to yourself. + +* What does the 'Signed-off-by' mean? + + It certifies the following (extract from the Linux kernel documentation): + + Developer's Certificate of Origin 1.1 + + By making a contribution to this project, I certify that: + (a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + (b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + (c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified it. + (d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. + + then you just add a line saying + Signed-off-by: Random J Developer + using your real name (sorry, no pseudonyms or anonymous contributions). + +* Example commit: + + Please write good git commit messages. A good commit message looks like this: + + Header line: explaining the commit in one line + + Body of commit message is a few lines of text, explaining things + in more detail, possibly giving some background about the issue + being fixed, etc etc. + + The body of the commit message can be several paragraphs, and + please do proper word-wrap and keep columns shorter than about + 74 characters or so. That way "git log" will show things + nicely even when it's indented. + + Reported-by: whoever-reported-it + Signed-off-by: Your Name + + where that header line really should be meaningful, and really should be + just one line. That header line is what is shown by tools like gitk and + shortlog, and should summarize the change in one readable line of text, + independently of the longer explanation. + +Note that future (0.5.7 onwards) changelogs will include a summary that is +generated by 'git shortlog -n'. Hence, that's why we need you to stick to +the convention. diff --git a/Documentation/Summary b/Documentation/Summary new file mode 100644 index 00000000..2863d60d --- /dev/null +++ b/Documentation/Summary @@ -0,0 +1,59 @@ +Tools: +////// + +The toolkit is split into small, useful utilities that are or are not +necessarily related to each other. Each program for itself fills a gap as +a helper in your daily network debugging, development or audit. + +*netsniff-ng* is a high-performance network analyzer based on packet mmap(2) +mechanisms. It can record pcap files to disc, replay them and also do an +offline and online analysis. Capturing, analysis or replay of raw 802.11 +frames are supported as well. pcap files are also compatible with tcpdump +or Wireshark traces. netsniff-ng processes those pcap traces either in +scatter-gather I/O or by mmap(2) I/O. + +*trafgen* is a high-performance network traffic generator based on packet +mmap(2) mechanisms. It has its own flexible, macro-based low-level packet +configuration language. Injection of raw 802.11 frames are supported as well. +trafgen has a significantly higher speed than mausezahn and comes very close +to pktgen, but runs from user space. pcap traces can also be converted into +a trafgen packet configuration. + +*mausezahn* is a performant high-level packet generator that can run on a +hardware-software appliance and comes with a Cisco-like CLI. It can craft +nearly every possible or impossible packet. Thus, it can be used, for example, +to test network behaviour under strange circumstances (stress test, malformed +packets) or to test hardware-software appliances for several kind of attacks. + +*bpfc* is a Berkeley Packet Filter (BPF) compiler that understands the original +BPF language developed by McCanne and Jacobson. It accepts BPF mnemonics and +converts them into kernel/netsniff-ng readable BPF ``opcodes''. It also +supports undocumented Linux filter extensions. This can especially be useful +for more complicated filters, that high-level filters fail to support. + +*ifpps* is a tool which periodically provides top-like networking and system +statistics from the Linux kernel. It gathers statistical data directly from +procfs files and does not apply any user space traffic monitoring that would +falsify statistics on high packet rates. For wireless, data about link +connectivity is provided as well. + +*flowtop* is a top-like connection tracking tool that can run on an end host +or router. It is able to present TCP, UDP(lite), SCTP, DCCP, ICMP(v6) flows +that have been collected by the kernel's netfilter connection tracking +framework. GeoIP and TCP/SCTP/DCCP state machine information is displayed. +Also, on end hosts flowtop can show PIDs and application names that flows +relate to as well as aggregated packet and byte counter (if available). No +user space traffic monitoring is done, thus all data is gathered by the kernel. + +*curvetun* is a lightweight, high-speed ECDH multiuser VPN for Linux. curvetun +uses the Linux TUN/TAP interface and supports {IPv4,IPv6} over {IPv4,IPv6} with +UDP or TCP as carrier protocols. Packets are encrypted end-to-end by a +symmetric stream cipher (Salsa20) and authenticated by a MAC (Poly1305), where +keys have previously been computed with the ECDH key agreement +protocol (Curve25519). + +*astraceroute* is an autonomous system (AS) trace route utility. Unlike +traceroute or tcptraceroute, it not only display hops, but also their AS +information they belong to as well as GeoIP information and other interesting +things. On default, it uses a TCP probe packet and falls back to ICMP probes +in case no ICMP answer has been received. diff --git a/INSTALL b/INSTALL new file mode 100644 index 00000000..74fc8e46 --- /dev/null +++ b/INSTALL @@ -0,0 +1,160 @@ +Currently only operating systems running on Linux kernels with the option +CONFIG_PACKET_MMAP enabled. This feature can be found even back to the days of +2.4 kernels. Most operating systems ship pre-compiled kernels that have this +config option enabled and even the latest kernel versions got rid of this +option and have this functionality already built-in. However, we recommend a +kernel >= 2.6.31, because the TX_RING is officially integrated since then. In +any case, if you have the possibility, consider getting the latest kernel from +Linus' Git repository, tweak and compile it, and run this one! + +A note for distribution package maintainers can be found at the end of the file. + +What additional tools are required to build netsniff-ng? + + - ccache (optional) + - flex, bison (bpfc, trafgen) + +What libraries are required? + + - libncurses (ifpps, flowtop) + - libGeoIP >=1.4.8 (astraceroute, flowtop, netsniff-ng) + - libz (astraceroute, flowtop, netsniff-ng) + - libnacl (curvetun) + - libnetfilter-conntrack (flowtop) + - libpcap (netsniff-ng, for tcpdump-like filters) + - liburcu (flowtop) + - libnl3 (netsniff-ng, trafgen) + +What additional tools are recommended after the build? + + - cpp (trafgen) + - ntpd (curvetun) + - setcap (all) + +It is common, that these libraries are shipped as distribution packages +for an easy installation. We try to keep this as minimal as possible. + +One-liner installation for *all* dependencies on Debian: + + $ sudo apt-get install ccache flex bison libnl-3-dev \ + libnl-genl-3-dev libgeoip-dev libnetfilter-conntrack-dev \ + libncurses5-dev liburcu-dev libnacl-dev libpcap-dev \ + zlib1g-dev + +One-liner installation for *all* dependencies on Fedora: + + $ sudo yum install ccache flex bison ccache libnl3-devel \ + GeoIP-devel libnetfilter_conntrack-devel ncurses-devel \ + userspace-rcu-devel nacl-devel libpcap-devel zlib-devel + +After downloading the netsniff-ng toolkit, you should change to the +repository root directory: + + $ cd netsniff-ng/ + +The installation (deinstallation) process done by make is fairly simple: + + $ make + # make install + + (# make distclean) + ($ make clean) + (or for both at once: # make mrproper) + +You can also build only a particular tool, e.g.: + + $ make trafgen + # make trafgen_install + + (# make trafgen_distclean) + ($ make trafgen_clean) + +Currently mausezahn is experimental and not included in the default repository +resp. build: + + $ git pull origin with-mausezahn + +This means if you want to use mausezahn, you have to execute 'make mausezahn' +for a build. This will be changed at the time when we have cleaned up and +fixed the imported code. + +If you want to build all tools, but curvetun (i.e. because you don't need +the tunneling software and the NaCl build process lasts quite long): + + $ make allbutcurvetun + # make install_allbutcurvetun + + (# make mrproper) + +In order to build curvetun, libnacl must be built first. A helper script +called build_nacl.sh is there to facilitate this process. If you want to +build NaCl in the directory ~/nacl, the script should be called this way: + + $ cd curvetun + $ ./build_nacl.sh ~/nacl + +There's also an abbreviation for this by simply typing: + + $ make nacl + +This gives an initial output such as "Building NaCl for arch amd64 on host +fuuubar (grab a coffee, this takes a while) ...". If the automatically +detected architecture (such as amd64) is not the one you intend to compile +for, then edit the (cc="gcc") variable within the build_nacl.sh script to +your cross compiler. Yes, we know, the build system of NaCl is a bit of a +pain, so you might check for a pre-built package from your distribution in +case you are not cross compiling. + +If NaCl already has been built on the target, it is quicker to use +nacl_path.sh this way: + + $ cd curvetun + $ ./nacl_path.sh ~/nacl/build/include/x86 ~/nacl/build/lib/x86 + +When done, netsniff-ng's build infrastructure will read those evironment +variables in order to get the needed paths to NaCl. + +If you're unsure with any make targets, check out: make help + +In order to run the toolkit as a normal user, set the following privilege +separation after the build/installation: + + $ sudo setcap cap_net_raw,cap_ipc_lock,cap_sys_admin,cap_net_admin=eip {toolname} + +For cross-compiling netsniff-ng, the process is faily simple. Assuming you +want to build netsniff-ng for the Microblaze architecture, update the PATH +variable first, e.g.: + + $ export PATH=/microblazeel-unknown-linux-gnu/bin:$PATH + +And then, build the toolkit like this: + + $ make CROSS_COMPILE=microblazeel-unknown-linux-gnu- \ + CROSS_LD_LIBRARY_PATH= + +Note that some adaptations might be necessary regarding the CFLAGS, since not +all might be supported by a different architecture. + +For doing a debug build of the toolkit with less optimizations and non-stripped +symbols, do: + + $ make DEBUG=1 + +For debugging the build system, full commands are shown if every make target is +executed with: + + $ make Q= + +Concerning packaging the toolkit for a Linux distribution, by default, +netsniff-ng has some architecture-specific tuning options enabled that don't +belong into a package binary of a distribution. Hence, you might want to adapt +some build-related things before starting to package the toolkit. All +necessary things (e.g., CFLAGS,WFLAGS) can be found in Makefile. Hence, +you need to adapt it there. You can then build and install the toolkit into +a prefixed path like: + + $ make PREFIX= + $ make PREFIX= install + +Thanks for maintaining netsniff-ng in your distribution. Further questions +will be answered on the public mainling list. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..a211137e --- /dev/null +++ b/Makefile @@ -0,0 +1,307 @@ +# netsniff-ng build system +# Copyright 2012 - 2013 Daniel Borkmann +# Subject to the GNU GPL, version 2. + +VERSION = 0 +PATCHLEVEL = 5 +SUBLEVEL = 8 +EXTRAVERSION = -rc0 +NAME = Ziggomatic + +TOOLS = netsniff-ng trafgen astraceroute flowtop ifpps bpfc curvetun + +# For packaging purposes, prefix can define a different path. +PREFIX ?= + +# Debugging option +ifeq ("$(origin DEBUG)", "command line") + DEBUG := 1 +else + DEBUG := 0 +endif + +# Disable if you don't want it +CCACHE = ccache + +# Location of installation paths. +BINDIR = $(PREFIX)/usr/bin +SBINDIR = $(PREFIX)/usr/sbin +INCDIR = $(PREFIX)/usr/include +ETCDIR = $(PREFIX)/etc +ETCDIRE = $(ETCDIR)/netsniff-ng +DOCDIR = $(PREFIX)/usr/share/doc +DOCDIRE = $(DOCDIR)/netsniff-ng + +# Shut up make, helper warnings, parallel compilation! +MAKEFLAGS += --no-print-directory +MAKEFLAGS += -rR +MAKEFLAGS += --warn-undefined-variables +MAKEFLAGS += --jobs=$(shell grep "^processor" /proc/cpuinfo | wc -l) + +# For packaging purposes, you might want to disable O3+arch tuning +CFLAGS = -fstack-protector +ifeq ($(DEBUG), 1) + CFLAGS += -g + CFLAGS += -O2 +else + CFLAGS += -march=native + CFLAGS += -mtune=native + CFLAGS += -O3 + CFLAGS += -fpie + CFLAGS += -pipe + CFLAGS += -fomit-frame-pointer +endif +CFLAGS += --param=ssp-buffer-size=4 +CFLAGS += -fno-strict-aliasing +CFLAGS += -fexceptions +CFLAGS += -fasynchronous-unwind-tables +CFLAGS += -fno-delete-null-pointer-checks +CFLAGS += -D_FORTIFY_SOURCE=2 +CFLAGS += -D_REENTRANT +CFLAGS += -D_FILE_OFFSET_BITS=64 +CFLAGS += -D_LARGEFILE_SOURCE +CFLAGS += -D_LARGEFILE64_SOURCE +ifneq ($(wildcard /usr/include/linux/net_tstamp.h),) + CFLAGS += -D__WITH_HARDWARE_TIMESTAMPING +endif +CFLAGS += -DVERSION_STRING=\"$(VERSION_STRING)\" +CFLAGS += -std=gnu99 + +WFLAGS = -Wall +WFLAGS += -Wformat=2 +WFLAGS += -Wmissing-prototypes +WFLAGS += -Wdeclaration-after-statement +WFLAGS += -Werror-implicit-function-declaration +WFLAGS += -Wstrict-prototypes +WFLAGS += -Wundef +WFLAGS += -Wimplicit-int + +WFLAGS_EXTRA = -Wno-unused-result +WFLAGS_EXTRA += -Wmissing-parameter-type +WFLAGS_EXTRA += -Wtype-limits +WFLAGS_EXTRA += -Wclobbered +WFLAGS_EXTRA += -Wmissing-field-initializers +WFLAGS_EXTRA += -Woverride-init +WFLAGS_EXTRA += -Wold-style-declaration +WFLAGS_EXTRA += -Wignored-qualifiers +WFLAGS_EXTRA += -Wempty-body +WFLAGS_EXTRA += -Wuninitialized + +CFLAGS += $(WFLAGS) -I. +CPPFLAGS = +ifeq ("$(origin CROSS_LD_LIBRARY_PATH)", "command line") + LDFLAGS = -L$(CROSS_LD_LIBRARY_PATH) +else + LDFLAGS = +endif + +ALL_CFLAGS = $(CFLAGS) +ALL_LDFLAGS = $(LDFLAGS) +TARGET_ARCH = +LEX_FLAGS = +YAAC_FLAGS = + +Q = @ + +LD = $(Q)echo -e " LD\t$@" && $(CCACHE) $(CROSS_COMPILE)gcc +CCNQ = $(CCACHE) $(CROSS_COMPILE)gcc +CC = $(Q)echo -e " CC\t$<" && $(CCNQ) +MKDIR = $(Q)echo -e " MKDIR\t$@" && mkdir +ifeq ($(DEBUG), 1) + STRIP = $(Q)true +else + STRIP = $(Q)echo -e " STRIP\t$@" && $(CROSS_COMPILE)strip +endif +LEX = $(Q)echo -e " LEX\t$<" && flex +YAAC = $(Q)echo -e " YAAC\t$<" && bison +INST = echo -e " INST\t$(1)" && install -d $(2) && \ + install --mode=644 -DC $(1) $(2)/$(shell basename $(1)) +ifeq ("$(origin PREFIX)", "command line") + INSTX = echo -e " INST\t$(1)" && install -d $(2) && \ + install -C $(1) $(2)/$(shell basename $(1)) +else + INSTX = echo -e " INST\t$(1)" && install -C $(1) $(2)/$(shell basename $(1)) +endif +RM = echo -e " RM\t$(1)" && rm -rf $(1) +RMDIR = echo -e " RM\t$(1)" && rmdir --ignore-fail-on-non-empty $(1) 2> /dev/null || true +GIT_ARCHIVE = git archive --prefix=netsniff-ng-$(VERSION_STRING)/ $(VERSION_STRING) | \ + $(1) > ../netsniff-ng-$(VERSION_STRING).tar.$(2) +GIT_TAG = git tag -a $(VERSION_STRING) -m "$(VERSION_STRING) release" + +export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION +export CROSS_COMPILE + +VERSION_STRING = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) +VERSION_SHORT = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL) + +bold = $(shell tput bold) +normal = $(shell tput sgr0) + +ifndef NACL_LIB_DIR +ifndef NACL_INC_DIR + $(info $(bold)NACL_LIB_DIR/NACL_INC_DIR is undefined, build libnacl first for curvetun!$(normal)) +endif +endif + +ifeq ("$(origin CROSS_COMPILE)", "command line") + WHAT := Cross compiling +else + WHAT := Building +endif + +build_showinfo: + $(Q)echo "$(bold)$(WHAT) netsniff-ng toolkit ($(VERSION_STRING)) for" \ + $(shell $(CCNQ) -dumpmachine)":$(normal)" +clean_showinfo: + $(Q)echo "$(bold)Cleaning netsniff-ng toolkit ($(VERSION_STRING)):$(normal)" + +%.yy.o: %.l + $(LEX) -P $(shell perl -wlne 'print $$1 if /lex-func-prefix:\s([a-z]+)/' $<) \ + -o $(BUILD_DIR)/$(shell basename $< .l).yy.c $(LEX_FLAGS) $< +%.tab.o: %.y + $(YAAC) -p $(shell perl -wlne 'print $$1 if /yaac-func-prefix:\s([a-z]+)/' $<) \ + -o $(BUILD_DIR)/$(shell basename $< .y).tab.c $(YAAC_FLAGS) -d $< + +.PHONY: all toolkit $(TOOLS) clean %_prehook %_distclean %_clean %_install tag tags cscope +.FORCE: +.DEFAULT_GOAL := all +.DEFAULT: +.IGNORE: %_clean_custom %_install_custom +.NOTPARALLEL: $(TOOLS) + +DOC_FILES = Summary RelatedWork Performance KnownIssues Sponsors SubmittingPatches CodingStyle + +NCONF_FILES = ether.conf tcp.conf udp.conf oui.conf geoip.conf + +all: build_showinfo toolkit +allbutcurvetun: $(filter-out curvetun,$(TOOLS)) +allbutmausezahn: $(filter-out mausezahn,$(TOOLS)) +toolkit: $(TOOLS) +install: install_all +install_all: $(foreach tool,$(TOOLS),$(tool)_install) + $(Q)$(foreach file,$(DOC_FILES),$(call INST,Documentation/$(file),$(DOCDIRE));) +install_allbutcurvetun: $(foreach tool,$(filter-out curvetun,$(TOOLS)),$(tool)_install) + $(Q)$(foreach file,$(DOC_FILES),$(call INST,Documentation/$(file),$(DOCDIRE));) +install_allbutmausezahn: $(foreach tool,$(filter-out mausezahn,$(TOOLS)),$(tool)_install) + $(Q)$(foreach file,$(DOC_FILES),$(call INST,Documentation/$(file),$(DOCDIRE));) +clean mostlyclean: $(foreach tool,$(TOOLS),$(tool)_clean) +realclean distclean clobber: $(foreach tool,$(TOOLS),$(tool)_distclean) + $(Q)$(foreach file,$(DOC_FILES),$(call RM,$(DOCDIRE)/$(file));) + $(Q)$(call RMDIR,$(DOCDIRE)) + $(Q)$(call RMDIR,$(ETCDIRE)) +mrproper: clean distclean + +define TOOL_templ + include $(1)/Makefile + $(1) $(1)%: BUILD_DIR := $(1) + $(1)_prehook: + $(Q)echo "$(bold)$(WHAT) $(1):$(normal)" + $(1): $(1)_prehook $$($(1)-lex) $$($(1)-yaac) $$(patsubst %.o,$(1)/%.o,$$($(1)-objs)) + $(1)_clean: $(1)_clean_custom + $(Q)$$(call RM,$(1)/*.o $(1)/$(1)) + $(1)_install: $(1)_install_custom + $(Q)$$(call INSTX,$(1)/$(1),$$(SBINDIR)) + $(1)_distclean: $(1)_distclean_custom + $(Q)$$(call RM,$$(SBINDIR)/$(1)) + $(1)/%.yy.o: $(1)/%.yy.c + $$(CC) $$(ALL_CFLAGS) -o $$@ -c $$< + $(1)/%.tab.o: $(1)/%.tab.c + $$(CC) $$(ALL_CFLAGS) -o $$@ -c $$< + $(1)/%.o: %.c + $$(CC) $$(ALL_CFLAGS) -o $$@ -c $$< +endef + +$(foreach tool,$(TOOLS),$(eval $(call TOOL_templ,$(tool)))) + +%:: ; + +netsniff-ng: ALL_CFLAGS += -I$(INCDIR)/libnl3/ -D__WITH_PROTOS -D__WITH_TCPDUMP_LIKE_FILTER +trafgen: ALL_CFLAGS += -I.. -I$(INCDIR)/libnl3/ -D__WITH_PROTOS +bpfc: ALL_CFLAGS += -I.. +curvetun: ALL_CFLAGS += -I ${NACL_INC_DIR} +curvetun: ALL_LDFLAGS += -L ${NACL_LIB_DIR} + +bpfc_clean_custom: + $(Q)$(call RM,$(BUILD_DIR)/*.h $(BUILD_DIR)/*.c) +trafgen_clean_custom: + $(Q)$(call RM,$(BUILD_DIR)/*.h $(BUILD_DIR)/*.c) + +netsniff-ng_distclean_custom flowtop_distclean_custom: + $(Q)$(foreach file,$(NCONF_FILES),$(call RM,$(ETCDIRE)/$(file));) + $(Q)$(call RMDIR,$(ETCDIRE)) +trafgen_distclean_custom: + $(Q)$(call RM,$(ETCDIRE)/stddef.h) + $(Q)$(call RMDIR,$(ETCDIRE)) +astraceroute_distclean_custom: + $(Q)$(call RM,$(ETCDIRE)/geoip.conf) + $(Q)$(call RMDIR,$(ETCDIRE)) + +netsniff-ng_install_custom flowtop_install_custom: + $(Q)$(foreach file,$(NCONF_FILES),$(call INST,configs/$(file),$(ETCDIRE));) +trafgen_install_custom: + $(Q)$(call INST,configs/stddef.h,$(ETCDIRE)) +astraceroute_install_custom: + $(Q)$(call INST,configs/geoip.conf,$(ETCDIRE)) + +$(TOOLS): WFLAGS += $(WFLAGS_EXTRA) +$(TOOLS): + $(LD) $(ALL_LDFLAGS) -o $@/$@ $@/*.o $($@-libs) + $(STRIP) $@/$@ + +nacl: + $(Q)echo "$(bold)$(WHAT) $@:$(normal)" + $(Q)cd curvetun/ && ./build_nacl.sh ~/nacl + $(Q)source ~/.bashrc + +tarball.gz: ; $(call GIT_ARCHIVE,gzip,gz) +tarball.bz2: ; $(call GIT_ARCHIVE,bzip2,bz2) +tarball.xz: ; $(call GIT_ARCHIVE,xz,xz) +tarball: tarball.gz tarball.bz2 tarball.xz + +tag: + $(GIT_TAG) + +FIND_SOURCE_FILES = ( git ls-files '*.[hcS]' 2>/dev/null || \ + find . \( -name .git -type d -prune \) \ + -o \( -name '*.[hcS]' -type f -print \) ) + +tags ctags: + $(Q)$(call RM,tags) + $(FIND_SOURCE_FILES) | xargs ctags -a + +cscope: + $(Q)$(call RM,cscope*) + $(FIND_SOURCE_FILES) | xargs cscope -b + +help: + $(Q)echo "$(bold)Available tools from the toolkit:$(normal)" + $(Q)echo " :={$(TOOLS)}" + $(Q)echo "$(bold)Targets for building the toolkit:$(normal)" + $(Q)echo " all|toolkit - Build the whole toolkit" + $(Q)echo " allbutcurvetun - Build all except curvetun" + $(Q)echo " - Build only one of the tools" + $(Q)echo "$(bold)Targets for cleaning the toolkit's build files:$(normal)" + $(Q)echo " clean|mostlyclean - Remove all build files" + $(Q)echo " _clean - Remove only one of the tool's files" + $(Q)echo "$(bold)Targets for installing the toolkit:$(normal)" + $(Q)echo " install - Install the whole toolkit" + $(Q)echo " _install - Install only one of the tools" + $(Q)echo "$(bold)Targets for removing the toolkit:$(normal)" + $(Q)echo " realclean|distclean|clobber - Remove the whole toolkit from the system" + $(Q)echo " _distclean - Remove only one of the tools" + $(Q)echo " mrproper - Remove build and install files" + $(Q)echo "$(bold)Hacking/development targets:$(normal)" + $(Q)echo " tag - Generate Git tag of current version" + $(Q)echo " tarball - Generate tarball of latest version" + $(Q)echo " tags - Generate sparse ctags" + $(Q)echo " cscope - Generate cscope files" + $(Q)echo "$(bold)Misc targets:$(normal)" + $(Q)echo " nacl - Execute the build_nacl script" + $(Q)echo " help - Show this help" + $(Q)echo "$(bold)Available parameters:$(normal)" + $(Q)echo " DEBUG=1 - Enable debugging" + $(Q)echo " PREFIX=/path - Install path prefix" + $(Q)echo " CROSS_COMPILE=/path-prefix - Kernel-like cross-compiling prefix" + $(Q)echo " CROSS_LD_LIBRARY_PATH=/path - Library search path for cross-compiling" + $(Q)echo " CC=cgcc - Use sparse compiler wrapper" + $(Q)echo " Q= - Show verbose garbage" diff --git a/README b/README new file mode 100644 index 00000000..fdf591fb --- /dev/null +++ b/README @@ -0,0 +1,58 @@ +////////////////////////////////////////////////////////////////////////////// + + netsniff-ng - the packet sniffing beast + +\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ + . . +netsniff-ng is a free, performant /( )\ +Linux network analyzer and .' {______} '. +networking toolkit. If you will, \ ^, ,^ / +the Swiss army knife for network |'O\ /O'| _.<0101011>-- +packets. > `' '` < / + ) ,.==., ( | +Web: http://netsniff-ng.org .-(|/--~~--\|)-' + ( ___ +The gain of performance is \__.=|___E +reached by built-in zero-copy +mechanisms, so that on packet reception and transmission the kernel does not +need to copy packets from kernel space to user space, and vice versa. + +The netsniff-ng toolkit's primary usage goal is to facilitate a network +developer's / hacker's daily Linux plumbing. It can be used for network +development, debugging, analysis, auditing or network reconnaissance. It +consists of the following fixed set of utilities: + + * netsniff-ng: a zero-copy packet analyzer, pcap capturing/replaying tool + * trafgen: a multithreaded low-level zero-copy network packet generator + * mausezahn: high-level packet generator for HW/SW appliances with Cisco-CLI + * ifpps: a top-like kernel networking and system statistics tool + * curvetun: a lightweight curve25519-based multiuser IP tunnel + * astraceroute: an autonomous system trace route and DPI testing utility + * flowtop: a top-like netfilter connection tracking tool + * bpfc: a Berkeley Packet Filter compiler with Linux extensions + +Have a look at the Documentation/ folder for further information, also at known +issues under Documentation/KnownIssues. Carefully read the INSTALL document for +the next steps in building netsniff-ng. Note that the toolkit is still quite +young and under heavy development, not yet feature complete and in a quality +level where we're satisfied with (i.e. for mausezahn). However, we're on a good +way towards tackling all these goals. + +The netsniff-ng toolkit is an open source project covered by the GNU General +Public License, version 2.0. For any questions or feedback about netsniff-ng +you are welcome to leave us a message at . + +netsniff-ng is non-profit and provided in the hope, that it is found useful. +The current project status can be considered as "working". In general, all tools +have been tested by us to a great extend including their command-line options. +In fact, many of our tools are used in a lot of production systems. However, we +give no guarantee that our tools are free of bugs! If you spot some issues, +contact us as described in REPORTING-BUGS. Also, have a look at our FAQ [2] for +answering your questions. This project has received support from companies and +institutions listed in Documentation/Sponsors. Thanks for contributing, we're +thrilled to provide you with netsniff-ng! + +Happy packet hacking! + + [1] http://netsniff-ng.org/ + [2] http://netsniff-ng.org/faq.html diff --git a/REPORTING-BUGS b/REPORTING-BUGS new file mode 100644 index 00000000..6d10ce78 --- /dev/null +++ b/REPORTING-BUGS @@ -0,0 +1,18 @@ +For reporting bugs send an email to the list. + +If you use Fedora or have a RHEL subscription, you can also report bugs to: + + * https://bugzilla.redhat.com/ + +If you use Debian Linux, we might also process / track bugs there: + + * http://bugs.debian.org/cgi-bin/pkgreport.cgi?src=netsniff-ng + +In any way, you'll get a reply from us. Please do not contact individual +developers directly in case of netsniff-ng issues or patches, but rather +always our mailing list. By this, you're not wasting time of a single +developer and increase your chances of getting a reply from us. + +In general, we are also highly interested in how you use the toolkit, what +problems you are trying to sovle and what kind of things you would like to have +improved. So feel free to drop us some feature requests as well. diff --git a/astraceroute.c b/astraceroute.c new file mode 100644 index 00000000..a8c289b2 --- /dev/null +++ b/astraceroute.c @@ -0,0 +1,1077 @@ +/* + * netsniff-ng - the packet sniffing beast + * Copyright 2011 - 2013 Daniel Borkmann. + * Subject to the GPL, version 2. + */ + +#define _BSD_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf.h" +#include "die.h" +#include "tprintf.h" +#include "pkt_buff.h" +#include "proto.h" +#include "xmalloc.h" +#include "xio.h" +#include "csum.h" +#include "geoip.h" +#include "xutils.h" +#include "ring_rx.h" +#include "built_in.h" + +struct ctx { + char *host, *port, *dev, *payload; + int init_ttl, max_ttl, dns_resolv, queries, timeout, totlen, rcvlen; + int syn, ack, ecn, fin, psh, rst, urg, tos, nofrag, proto, show; + int sd_len, dport, latitude; +}; + +struct proto_ops { + int (*assembler)(uint8_t *packet, size_t len, int ttl, int proto, + const struct ctx *ctx, const struct sockaddr *dst, + const struct sockaddr *src); + const struct sock_filter *filter; + unsigned int flen; + unsigned int min_len_tcp, min_len_icmp; + int (*check)(uint8_t *packet, size_t len, int ttl, int id, + const struct sockaddr *src); + void (*handler)(uint8_t *packet, size_t len, int dns_resolv, + int latitude); +}; + +sig_atomic_t sigint = 0; + +static int assemble_ipv4(uint8_t *packet, size_t len, int ttl, int proto, + const struct ctx *ctx, const struct sockaddr *dst, + const struct sockaddr *src); +static int assemble_ipv6(uint8_t *packet, size_t len, int ttl, int proto, + const struct ctx *ctx, const struct sockaddr *dst, + const struct sockaddr *src); +static int check_ipv4(uint8_t *packet, size_t len, int ttl, int id, + const struct sockaddr *ss); +static void handle_ipv4(uint8_t *packet, size_t len, int dns_resolv, + int latitude); +static int check_ipv6(uint8_t *packet, size_t len, int ttl, int id, + const struct sockaddr *ss); +static void handle_ipv6(uint8_t *packet, size_t len, int dns_resolv, + int latitude); + +static const char *short_options = "H:p:nNf:m:i:d:q:x:SAEFPURt:Gl:hv46X:ZuL"; +static const struct option long_options[] = { + {"host", required_argument, NULL, 'H'}, + {"port", required_argument, NULL, 'p'}, + {"init-ttl", required_argument, NULL, 'f'}, + {"max-ttl", required_argument, NULL, 'm'}, + {"dev", required_argument, NULL, 'd'}, + {"num-probes", required_argument, NULL, 'q'}, + {"timeout", required_argument, NULL, 'x'}, + {"tos", required_argument, NULL, 't'}, + {"payload", required_argument, NULL, 'X'}, + {"totlen", required_argument, NULL, 'l'}, + {"numeric", no_argument, NULL, 'n'}, + {"latitude", no_argument, NULL, 'L'}, + {"update", no_argument, NULL, 'u'}, + {"dns", no_argument, NULL, 'N'}, + {"ipv4", no_argument, NULL, '4'}, + {"ipv6", no_argument, NULL, '6'}, + {"syn", no_argument, NULL, 'S'}, + {"ack", no_argument, NULL, 'A'}, + {"urg", no_argument, NULL, 'U'}, + {"fin", no_argument, NULL, 'F'}, + {"psh", no_argument, NULL, 'P'}, + {"rst", no_argument, NULL, 'R'}, + {"ecn-syn", no_argument, NULL, 'E'}, + {"show-packet", no_argument, NULL, 'Z'}, + {"nofrag", no_argument, NULL, 'G'}, + {"version", no_argument, NULL, 'v'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} +}; + +static const struct sock_filter ipv4_icmp_type_11[] = { + { 0x28, 0, 0, 0x0000000c }, /* ldh [12] */ + { 0x15, 0, 8, 0x00000800 }, /* jneq #0x800, drop */ + { 0x30, 0, 0, 0x00000017 }, /* ldb [23] */ + { 0x15, 0, 6, 0x00000001 }, /* jneq #0x1, drop */ + { 0x28, 0, 0, 0x00000014 }, /* ldh [20] */ + { 0x45, 4, 0, 0x00001fff }, /* jset #0x1fff, drop */ + { 0xb1, 0, 0, 0x0000000e }, /* ldxb 4*([14]&0xf) */ + { 0x50, 0, 0, 0x0000000e }, /* ldb [x + 14] */ + { 0x15, 0, 1, 0x0000000b }, /* jneq #0xb, drop */ + { 0x06, 0, 0, 0xffffffff }, /* ret #-1 */ + { 0x06, 0, 0, 0x00000000 }, /* drop: ret #0 */ +}; + +static const struct sock_filter ipv6_icmp6_type_3[] = { + { 0x28, 0, 0, 0x0000000c }, /* ldh [12] */ + { 0x15, 0, 5, 0x000086dd }, /* jneq #0x86dd, drop */ + { 0x30, 0, 0, 0x00000014 }, /* ldb [20] */ + { 0x15, 0, 3, 0x0000003a }, /* jneq #0x3a, drop */ + { 0x30, 0, 0, 0x00000036 }, /* ldb [54] */ + { 0x15, 0, 1, 0x00000003 }, /* jneq #0x3, drop */ + { 0x06, 0, 0, 0xffffffff }, /* ret #-1 */ + { 0x06, 0, 0, 0x00000000 }, /* drop: ret #0 */ +}; + +static const struct proto_ops af_ops[] = { + [IPPROTO_IP] = { + .assembler = assemble_ipv4, + .handler = handle_ipv4, + .check = check_ipv4, + .filter = ipv4_icmp_type_11, + .flen = array_size(ipv4_icmp_type_11), + .min_len_tcp = sizeof(struct iphdr) + sizeof(struct tcphdr), + .min_len_icmp = sizeof(struct iphdr) + sizeof(struct icmphdr), + }, + [IPPROTO_IPV6] = { + .assembler = assemble_ipv6, + .handler = handle_ipv6, + .check = check_ipv6, + .filter = ipv6_icmp6_type_3, + .flen = array_size(ipv6_icmp6_type_3), + .min_len_tcp = sizeof(struct ip6_hdr) + sizeof(struct tcphdr), + .min_len_icmp = sizeof(struct ip6_hdr) + sizeof(struct icmp6hdr), + }, +}; + +static void signal_handler(int number) +{ + switch (number) { + case SIGINT: + sigint = 1; + default: + break; + } +} + +static void help(void) +{ + printf("\nastraceroute %s, autonomous system trace route utility\n", VERSION_STRING); + puts("http://www.netsniff-ng.org\n\n" + "Usage: astraceroute [options]\n" + "Options:\n" + " -H|--host Host/IPv4/IPv6 to lookup AS route to\n" + " -p|--port Hosts port to lookup AS route to\n" + " -i|-d|--dev Networking device, e.g. eth0\n" + " -f|--init-ttl Set initial TTL\n" + " -m|--max-ttl Set maximum TTL (def: 30)\n" + " -q|--num-probes Number of max probes for each hop (def: 2)\n" + " -x|--timeout Probe response timeout in sec (def: 3)\n" + " -X|--payload Specify a payload string to test DPIs\n" + " -l|--totlen Specify total packet len\n" + " -4|--ipv4 Use IPv4-only requests\n" + " -6|--ipv6 Use IPv6-only requests\n" + " -n|--numeric Do not do reverse DNS lookup for hops\n" + " -u|--update Update GeoIP databases\n" + " -L|--latitude Show latitude and longtitude\n" + " -N|--dns Do a reverse DNS lookup for hops\n" + " -S|--syn Set TCP SYN flag\n" + " -A|--ack Set TCP ACK flag\n" + " -F|--fin Set TCP FIN flag\n" + " -P|--psh Set TCP PSH flag\n" + " -U|--urg Set TCP URG flag\n" + " -R|--rst Set TCP RST flag\n" + " -E|--ecn-syn Send ECN SYN packets (RFC3168)\n" + " -t|--tos Set the IP TOS field\n" + " -G|--nofrag Set do not fragment bit\n" + " -Z|--show-packet Show returned packet on each hop\n" + " -v|--version Print version\n" + " -h|--help Print this help\n\n" + "Examples:\n" + " IPv4 trace of AS with TCP SYN probe (this will most-likely pass):\n" + " astraceroute -i eth0 -N -S -H netsniff-ng.org\n" + " IPv4 trace of AS with TCP ECN SYN probe:\n" + " astraceroute -i eth0 -N -E -H netsniff-ng.org\n" + " IPv4 trace of AS with TCP FIN probe:\n" + " astraceroute -i eth0 -N -F -H netsniff-ng.org\n" + " IPv4 trace of AS with Xmas probe:\n" + " astraceroute -i eth0 -N -FPU -H netsniff-ng.org\n" + " IPv4 trace of AS with Null probe with ASCII payload:\n" + " astraceroute -i eth0 -N -H netsniff-ng.org -X \"censor-me\" -Z\n" + " IPv6 trace of AS up to www.6bone.net:\n" + " astraceroute -6 -i eth0 -S -E -N -H www.6bone.net\n\n" + "Note:\n" + " If the TCP probe did not give any results, then astraceroute will\n" + " automatically probe for classic ICMP packets! To gather more\n" + " information about astraceroute's fetched AS numbers, see e.g.\n" + " http://bgp.he.net/AS!\n\n" + "Please report bugs to \n" + "Copyright (C) 2011-2013 Daniel Borkmann \n" + "Swiss federal institute of technology (ETH Zurich)\n" + "License: GNU GPL version 2.0\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n"); + die(); +} + +static void version(void) +{ + printf("\nastraceroute %s, autonomous system trace route utility\n", VERSION_STRING); + puts("http://www.netsniff-ng.org\n\n" + "Please report bugs to \n" + "Copyright (C) 2011-2013 Daniel Borkmann \n" + "Swiss federal institute of technology (ETH Zurich)\n" + "License: GNU GPL version 2.0\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n"); + die(); +} + +static void __assemble_data(uint8_t *packet, size_t len, const char *payload) +{ + int i; + + if (payload == NULL) { + for (i = 0; i < len; ++i) + packet[i] = (uint8_t) rand(); + } else { + int lmin = min(len, strlen(payload)); + + for (i = 0; i < lmin; ++i) + packet[i] = (uint8_t) payload[i]; + for (i = lmin; i < len; ++i) + packet[i] = (uint8_t) rand(); + } +} + +static void __assemble_icmp4(uint8_t *packet, size_t len) +{ + struct icmphdr *icmph = (struct icmphdr *) packet; + + bug_on(len < sizeof(struct icmphdr)); + + icmph->type = ICMP_ECHO; + icmph->code = 0; + icmph->checksum = 0; +} + +static void __assemble_icmp6(uint8_t *packet, size_t len) +{ + struct icmp6hdr *icmp6h = (struct icmp6hdr *) packet; + + bug_on(len < sizeof(struct icmp6hdr)); + + icmp6h->icmp6_type = ICMPV6_ECHO_REQUEST; + icmp6h->icmp6_code = 0; + icmp6h->icmp6_cksum = 0; +} + +static void __assemble_tcp(uint8_t *packet, size_t len, int syn, int ack, + int urg, int fin, int rst, int psh, int ecn, + int dport) +{ + struct tcphdr *tcph = (struct tcphdr *) packet; + + bug_on(len < sizeof(struct tcphdr)); + + tcph->source = htons((uint16_t) rand()); + tcph->dest = htons((uint16_t) dport); + + tcph->seq = htonl(rand()); + tcph->ack_seq = (!!ack ? htonl(rand()) : 0); + + tcph->doff = 5; + + tcph->syn = !!syn; + tcph->ack = !!ack; + tcph->urg = !!urg; + tcph->fin = !!fin; + tcph->rst = !!rst; + tcph->psh = !!psh; + tcph->ece = !!ecn; + tcph->cwr = !!ecn; + + tcph->window = htons((uint16_t) (100 + (rand() % 65435))); + tcph->urg_ptr = (!!urg ? htons((uint16_t) rand()) : 0); + tcph->check = 0; +} + +static int assemble_ipv4(uint8_t *packet, size_t len, int ttl, int proto, + const struct ctx *ctx, const struct sockaddr *dst, + const struct sockaddr *src) +{ + uint8_t *data; + size_t data_len, off_next = 0; + struct iphdr *iph = (struct iphdr *) packet; + + bug_on(!src || !dst); + bug_on(src->sa_family != PF_INET || dst->sa_family != PF_INET); + bug_on(len < sizeof(*iph) + min(sizeof(struct tcphdr), + sizeof(struct icmphdr))); + + iph->ihl = 5; + iph->version = 4; + iph->tos = (uint8_t) ctx->tos; + + iph->tot_len = htons((uint16_t) len); + iph->id = htons((uint16_t) rand()); + + iph->frag_off = ctx->nofrag ? IP_DF : 0; + iph->ttl = (uint8_t) ttl; + + iph->saddr = ((const struct sockaddr_in *) src)->sin_addr.s_addr; + iph->daddr = ((const struct sockaddr_in *) dst)->sin_addr.s_addr; + + iph->protocol = (uint8_t) proto; + + data = packet + sizeof(*iph); + data_len = len - sizeof(*iph); + + switch (proto) { + case IPPROTO_TCP: + __assemble_tcp(data, data_len, ctx->syn, ctx->ack, ctx->urg, + ctx->fin, ctx->rst, ctx->psh, ctx->ecn, ctx->dport); + off_next = sizeof(struct tcphdr); + break; + case IPPROTO_ICMP: + __assemble_icmp4(data, data_len); + off_next = sizeof(struct icmphdr); + break; + default: + bug(); + } + + data = packet + sizeof(*iph) + off_next; + data_len = len - sizeof(*iph) - off_next; + + __assemble_data(data, data_len, ctx->payload); + + iph->check = csum((unsigned short *) packet, ntohs(iph->tot_len) >> 1); + + return ntohs(iph->id); +} + +static int assemble_ipv6(uint8_t *packet, size_t len, int ttl, int proto, + const struct ctx *ctx, const struct sockaddr *dst, + const struct sockaddr *src) +{ + uint8_t *data; + size_t data_len, off_next = 0; + struct ip6_hdr *ip6h = (struct ip6_hdr *) packet; + + bug_on(!src || !dst); + bug_on(src->sa_family != PF_INET6 || dst->sa_family != PF_INET6); + bug_on(len < sizeof(*ip6h) + min(sizeof(struct tcphdr), + sizeof(struct icmp6hdr))); + + ip6h->ip6_flow = htonl(rand() & 0x000fffff); + ip6h->ip6_vfc = 0x60; + + ip6h->ip6_plen = htons((uint16_t) len - sizeof(*ip6h)); + ip6h->ip6_nxt = (uint8_t) proto; + ip6h->ip6_hlim = (uint8_t) ttl; + + memcpy(&ip6h->ip6_src, &(((const struct sockaddr_in6 *) + src)->sin6_addr), sizeof(ip6h->ip6_src)); + memcpy(&ip6h->ip6_dst, &(((const struct sockaddr_in6 *) + dst)->sin6_addr), sizeof(ip6h->ip6_dst)); + + data = packet + sizeof(*ip6h); + data_len = len - sizeof(*ip6h); + + switch (proto) { + case IPPROTO_TCP: + __assemble_tcp(data, data_len, ctx->syn, ctx->ack, ctx->urg, + ctx->fin, ctx->rst, ctx->psh, ctx->ecn, ctx->dport); + off_next = sizeof(struct tcphdr); + break; + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + __assemble_icmp6(data, data_len); + off_next = sizeof(struct icmp6hdr); + break; + default: + bug(); + } + + data = packet + sizeof(*ip6h) + off_next; + data_len = len - sizeof(*ip6h) - off_next; + + __assemble_data(data, data_len, ctx->payload); + + return ntohl(ip6h->ip6_flow) & 0x000fffff; +} + +static int check_ipv4(uint8_t *packet, size_t len, int ttl, int id, + const struct sockaddr *ss) +{ + struct iphdr *iph = (struct iphdr *) packet; + struct iphdr *iph_inner; + struct icmphdr *icmph; + + if (iph->protocol != IPPROTO_ICMP) + return -EINVAL; + if (iph->daddr != ((const struct sockaddr_in *) ss)->sin_addr.s_addr) + return -EINVAL; + + icmph = (struct icmphdr *) (packet + sizeof(struct iphdr)); + if (icmph->type != ICMP_TIME_EXCEEDED) + return -EINVAL; + if (icmph->code != ICMP_EXC_TTL) + return -EINVAL; + + iph_inner = (struct iphdr *) (packet + sizeof(struct iphdr) + + sizeof(struct icmphdr)); + if (ntohs(iph_inner->id) != id) + return -EINVAL; + + return len; +} + +static void handle_ipv4(uint8_t *packet, size_t len, int dns_resolv, int latitude) +{ + char hbuff[256]; + struct iphdr *iph = (struct iphdr *) packet; + struct sockaddr_in sd; + struct hostent *hent; + const char *as, *country, *city; + + memset(hbuff, 0, sizeof(hbuff)); + memset(&sd, 0, sizeof(sd)); + sd.sin_family = PF_INET; + sd.sin_addr.s_addr = iph->saddr; + + getnameinfo((struct sockaddr *) &sd, sizeof(sd), + hbuff, NI_MAXHOST, NULL, 0, NI_NUMERICHOST); + + as = geoip4_as_name(sd); + country = geoip4_country_name(sd); + city = geoip4_city_name(sd); + + if (dns_resolv) { + hent = gethostbyaddr(&sd.sin_addr, sizeof(sd.sin_addr), PF_INET); + if (hent) + printf(" %s (%s)", hent->h_name, hbuff); + else + printf(" %s", hbuff); + } else { + printf(" %s", hbuff); + } + if (as) + printf(" in %s", as); + if (country) { + printf(" in %s", country); + if (city) + printf(", %s", city); + } + if (latitude) + printf(" (%f/%f)", geoip4_latitude(sd), geoip4_longitude(sd)); +} + +static int check_ipv6(uint8_t *packet, size_t len, int ttl, int id, + const struct sockaddr *ss) +{ + struct ip6_hdr *ip6h = (struct ip6_hdr *) packet; + struct ip6_hdr *ip6h_inner; + struct icmp6hdr *icmp6h; + + if (ip6h->ip6_nxt != 0x3a) + return -EINVAL; + if (memcmp(&ip6h->ip6_dst, &(((const struct sockaddr_in6 *) + ss)->sin6_addr), sizeof(ip6h->ip6_dst))) + return -EINVAL; + + icmp6h = (struct icmp6hdr *) (packet + sizeof(*ip6h)); + if (icmp6h->icmp6_type != ICMPV6_TIME_EXCEED) + return -EINVAL; + if (icmp6h->icmp6_code != ICMPV6_EXC_HOPLIMIT) + return -EINVAL; + + ip6h_inner = (struct ip6_hdr *) (packet + sizeof(*ip6h) + sizeof(*icmp6h)); + if ((ntohl(ip6h_inner->ip6_flow) & 0x000fffff) != id) + return -EINVAL; + + return len; +} + +static void handle_ipv6(uint8_t *packet, size_t len, int dns_resolv, int latitude) +{ + char hbuff[256]; + struct ip6_hdr *ip6h = (struct ip6_hdr *) packet; + struct sockaddr_in6 sd; + struct hostent *hent; + const char *as, *country, *city; + + memset(hbuff, 0, sizeof(hbuff)); + memset(&sd, 0, sizeof(sd)); + sd.sin6_family = PF_INET6; + memcpy(&sd.sin6_addr, &ip6h->ip6_src, sizeof(ip6h->ip6_src)); + + getnameinfo((struct sockaddr *) &sd, sizeof(sd), + hbuff, NI_MAXHOST, NULL, 0, NI_NUMERICHOST); + + as = geoip6_as_name(sd); + country = geoip6_country_name(sd); + city = geoip6_city_name(sd); + + if (dns_resolv) { + hent = gethostbyaddr(&sd.sin6_addr, sizeof(sd.sin6_addr), PF_INET6); + if (hent) + printf(" %s (%s)", hent->h_name, hbuff); + else + printf(" %s", hbuff); + } else { + printf(" %s", hbuff); + } + if (as) + printf(" in %s", as); + if (country) { + printf(" in %s", country); + if (city) + printf(", %s", city); + } + if (latitude) + printf(" (%f/%f)", geoip6_latitude(sd), geoip6_longitude(sd)); +} + +static void show_trace_info(struct ctx *ctx, const struct sockaddr_storage *ss, + const struct sockaddr_storage *sd) +{ + char hbuffs[256], hbuffd[256]; + + memset(hbuffd, 0, sizeof(hbuffd)); + getnameinfo((struct sockaddr *) sd, sizeof(*sd), + hbuffd, sizeof(hbuffd), NULL, 0, NI_NUMERICHOST); + + memset(hbuffs, 0, sizeof(hbuffs)); + getnameinfo((struct sockaddr *) ss, sizeof(*ss), + hbuffs, sizeof(hbuffs), NULL, 0, NI_NUMERICHOST); + + printf("AS path IPv%d TCP trace from %s to %s:%s (%s) with len %d " + "Bytes, %u max hops\n", ctx->proto == IPPROTO_IP ? 4 : 6, + hbuffs, hbuffd, ctx->port, ctx->host, ctx->totlen, ctx->max_ttl); + + printf("Using flags SYN:%d,ACK:%d,ECN:%d,FIN:%d,PSH:%d,RST:%d,URG:%d\n", + ctx->syn, ctx->ack, ctx->ecn, ctx->fin, ctx->psh, ctx->rst, ctx->urg); + + if (ctx->payload) + printf("With payload: \'%s\'\n", ctx->payload); +} + +static int get_remote_fd(struct ctx *ctx, struct sockaddr_storage *ss, + struct sockaddr_storage *sd) +{ + int fd = -1, ret, one = 1; + struct addrinfo hints, *ahead, *ai; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_flags = AI_NUMERICSERV; + + ret = getaddrinfo(ctx->host, ctx->port, &hints, &ahead); + if (ret < 0) + panic("Cannot get address info!\n"); + + for (ai = ahead; ai != NULL && fd < 0; ai = ai->ai_next) { + if (!((ai->ai_family == PF_INET6 && ctx->proto == IPPROTO_IPV6) || + (ai->ai_family == PF_INET && ctx->proto == IPPROTO_IP))) + continue; + + fd = socket(ai->ai_family, SOCK_RAW, IPPROTO_RAW); + if (fd < 0) + continue; + + memset(ss, 0, sizeof(*ss)); + ret = device_address(ctx->dev, ai->ai_family, ss); + if (ret < 0) + panic("Cannot get own device address!\n"); + + ret = bind(fd, (struct sockaddr *) ss, sizeof(*ss)); + if (ret < 0) + panic("Cannot bind socket!\n"); + + memset(sd, 0, sizeof(*sd)); + memcpy(sd, ai->ai_addr, ai->ai_addrlen); + + ctx->sd_len = ai->ai_addrlen; + ctx->dport = strtoul(ctx->port, NULL, 10); + + ret = setsockopt(fd, ctx->proto, IP_HDRINCL, &one, sizeof(one)); + if (ret < 0) + panic("Kernel does not support IP_HDRINCL!\n"); + + if (ai->ai_family == PF_INET6) { + struct sockaddr_in6 *sd6 = (struct sockaddr_in6 *) sd; + + sd6->sin6_port = 0; + } + + break; + } + + freeaddrinfo(ahead); + + if (fd < 0) + panic("Cannot create socket! Does remote " + "support IPv%d?!\n", + ctx->proto == IPPROTO_IP ? 4 : 6); + + return fd; +} + +static void inject_filter(struct ctx *ctx, int fd) +{ + struct sock_fprog bpf_ops; + + enable_kernel_bpf_jit_compiler(); + + memset(&bpf_ops, 0, sizeof(bpf_ops)); + bpf_ops.filter = (struct sock_filter *) af_ops[ctx->proto].filter; + bpf_ops.len = af_ops[ctx->proto].flen; + + bpf_attach_to_sock(fd, &bpf_ops); +} + +static int __process_node(struct ctx *ctx, int fd, int fd_cap, int ttl, + int inner_proto, uint8_t *pkt_snd, uint8_t *pkt_rcv, + const struct sockaddr_storage *ss, + const struct sockaddr_storage *sd, struct timeval *diff) +{ + int pkt_id, ret, timeout; + struct pollfd pfd; + struct timeval start, end; + + prepare_polling(fd_cap, &pfd); + + memset(pkt_snd, 0, ctx->totlen); + pkt_id = af_ops[ctx->proto].assembler(pkt_snd, ctx->totlen, ttl, + inner_proto, ctx, + (const struct sockaddr *) sd, + (const struct sockaddr *) ss); + + ret = sendto(fd, pkt_snd, ctx->totlen, 0, (struct sockaddr *) sd, + ctx->sd_len); + if (ret < 0) + panic("sendto failed: %s\n", strerror(errno)); + + bug_on(gettimeofday(&start, NULL)); + + timeout = (ctx->timeout > 0 ? ctx->timeout : 2) * 1000; + + ret = poll(&pfd, 1, timeout); + if (ret > 0 && pfd.revents & POLLIN && sigint == 0) { + bug_on(gettimeofday(&end, NULL)); + if (diff) + timersub(&end, &start, diff); + + ret = recvfrom(fd_cap, pkt_rcv, ctx->rcvlen, 0, NULL, NULL); + if (ret < sizeof(struct ethhdr) + af_ops[ctx->proto].min_len_icmp) + return -EIO; + + return af_ops[ctx->proto].check(pkt_rcv + sizeof(struct ethhdr), + ret - sizeof(struct ethhdr), ttl, + pkt_id, (const struct sockaddr *) ss); + } else { + return -EIO; + } + + return 0; +} + +static void timerdiv(const unsigned long divisor, const struct timeval *tv, + struct timeval *result) +{ + uint64_t x = ((uint64_t) tv->tv_sec * 1000 * 1000 + tv->tv_usec) / divisor; + + result->tv_sec = x / 1000 / 1000; + result->tv_usec = x % (1000 * 1000); +} + +static int timevalcmp(const void *t1, const void *t2) +{ + if (timercmp((struct timeval *) t1, (struct timeval *) t2, <)) + return -1; + if (timercmp((struct timeval *) t1, (struct timeval *) t2, >)) + return 1; + + return 0; +} + +static int __process_time(struct ctx *ctx, int fd, int fd_cap, int ttl, + int inner_proto, uint8_t *pkt_snd, uint8_t *pkt_rcv, + const struct sockaddr_storage *ss, + const struct sockaddr_storage *sd) +{ + int good = 0, i, j = 0, ret = -EIO, idx, ret_good = -EIO; + struct timeval probes[9], *tmp, sum, res; + uint8_t *trash = xmalloc(ctx->rcvlen); + char *cwait[] = { "-", "\\", "|", "/" }; + const char *proto_short[] = { + [IPPROTO_TCP] = "t", + [IPPROTO_ICMP] = "i", + [IPPROTO_ICMPV6] = "i", + }; + + memset(probes, 0, sizeof(probes)); + for (i = 0; i < array_size(probes) && sigint == 0; ++i) { + ret = __process_node(ctx, fd, fd_cap, ttl, inner_proto, + pkt_snd, good == 0 ? pkt_rcv : trash, + ss, sd, &probes[i]); + if (ret > 0) { + if (good == 0) + ret_good = ret; + good++; + } + + if (good == 0 && ctx->queries == i) + break; + + usleep(50000); + + printf("\r%2d: %s", ttl, cwait[j++]); + fflush(stdout); + if (j >= array_size(cwait)) + j = 0; + } + + if (good == 0) { + xfree(trash); + return -EIO; + } + + tmp = xmalloc(sizeof(struct timeval) * good); + for (i = j = 0; i < array_size(probes); ++i) { + if (probes[i].tv_sec == 0 && probes[i].tv_usec == 0) + continue; + tmp[j].tv_sec = probes[i].tv_sec; + tmp[j].tv_usec = probes[i].tv_usec; + j++; + } + + qsort(tmp, j, sizeof(struct timeval), timevalcmp); + + printf("\r%2d: %s[", ttl, proto_short[inner_proto]); + idx = j / 2; + switch (j % 2) { + case 0: + timeradd(&tmp[idx], &tmp[idx - 1], &sum); + timerdiv(2, &sum, &res); + if (res.tv_sec > 0) + printf("%lu sec ", res.tv_sec); + printf("%7lu us", res.tv_usec); + break; + case 1: + if (tmp[idx].tv_sec > 0) + printf("%lu sec ", tmp[idx].tv_sec); + printf("%7lu us", tmp[idx].tv_usec); + break; + default: + bug(); + } + printf("]"); + + xfree(tmp); + xfree(trash); + + return ret_good; +} + +static int __probe_remote(struct ctx *ctx, int fd, int fd_cap, int ttl, + uint8_t *pkt_snd, uint8_t *pkt_rcv, + const struct sockaddr_storage *ss, + const struct sockaddr_storage *sd, + int inner_proto) +{ + int ret = -EIO, tries = ctx->queries; + + while (tries-- > 0 && sigint == 0) { + ret = __process_time(ctx, fd, fd_cap, ttl, inner_proto, + pkt_snd, pkt_rcv, ss, sd); + if (ret < 0) + continue; + + af_ops[ctx->proto].handler(pkt_rcv + sizeof(struct ethhdr), + ret - sizeof(struct ethhdr), + ctx->dns_resolv, ctx->latitude); + if (ctx->show) { + struct pkt_buff *pkt; + + printf("\n"); + pkt = pkt_alloc(pkt_rcv, ret); + hex_ascii(pkt); + tprintf_flush(); + pkt_free(pkt); + } + + break; + } + + return ret; +} + +static int __process_ttl(struct ctx *ctx, int fd, int fd_cap, int ttl, + uint8_t *pkt_snd, uint8_t *pkt_rcv, + const struct sockaddr_storage *ss, + const struct sockaddr_storage *sd) +{ + int ret = -EIO, i; + const int inner_protos[] = { + IPPROTO_TCP, + IPPROTO_ICMP, + }; + + printf("%2d: ", ttl); + fflush(stdout); + + for (i = 0; i < array_size(inner_protos) && sigint == 0; ++i) { + ret = __probe_remote(ctx, fd, fd_cap, ttl, pkt_snd, pkt_rcv, ss, sd, + inner_protos[i]); + if (ret > 0) + break; + } + + if (ret <= 0) + printf("\r%2d: ?[ no answer]", ttl); + if (ctx->show == 0) + printf("\n"); + if (ctx->show && ret <= 0) + printf("\n\n"); + + fflush(stdout); + return 0; +} + +static int main_trace(struct ctx *ctx) +{ + int fd, fd_cap, ifindex, ttl; + struct ring dummy_ring; + struct sockaddr_storage ss, sd; + uint8_t *pkt_snd, *pkt_rcv; + + fd = get_remote_fd(ctx, &ss, &sd); + fd_cap = pf_socket(); + + inject_filter(ctx, fd_cap); + + ifindex = device_ifindex(ctx->dev); + bind_rx_ring(fd_cap, &dummy_ring, ifindex); + + if (ctx->totlen < af_ops[ctx->proto].min_len_tcp) { + ctx->totlen = af_ops[ctx->proto].min_len_tcp; + if (ctx->payload) + ctx->totlen += strlen(ctx->payload); + } + + ctx->rcvlen = device_mtu(ctx->dev) - sizeof(struct ethhdr); + if (ctx->totlen >= ctx->rcvlen) + panic("Packet len exceeds device MTU!\n"); + + pkt_snd = xmalloc(ctx->totlen); + pkt_rcv = xmalloc(ctx->rcvlen); + + show_trace_info(ctx, &ss, &sd); + + for (ttl = ctx->init_ttl; ttl <= ctx->max_ttl && sigint == 0; ++ttl) + __process_ttl(ctx, fd, fd_cap, ttl, pkt_snd, pkt_rcv, + &ss, &sd); + + xfree(pkt_snd); + xfree(pkt_rcv); + + close(fd_cap); + close(fd); + + return 0; +} + +int main(int argc, char **argv) +{ + int c, opt_index, ret; + struct ctx ctx; + + setfsuid(getuid()); + setfsgid(getgid()); + + srand(time(NULL)); + + memset(&ctx, 0, sizeof(ctx)); + ctx.init_ttl = 1; + ctx.max_ttl = 30; + ctx.queries = 2; + ctx.timeout = 2; + ctx.proto = IPPROTO_IP; + ctx.payload = NULL; + ctx.dev = xstrdup("eth0"); + ctx.port = xstrdup("80"); + + while ((c = getopt_long(argc, argv, short_options, long_options, + &opt_index)) != EOF) { + switch (c) { + case 'h': + help(); + break; + case 'v': + version(); + break; + case 'u': + update_geoip(); + die(); + break; + case 'H': + ctx.host = xstrdup(optarg); + break; + case 'p': + if (ctx.port) + xfree(ctx.port); + ctx.port = xstrdup(optarg); + break; + case 'n': + ctx.dns_resolv = 0; + break; + case '4': + ctx.proto = IPPROTO_IP; + break; + case '6': + ctx.proto = IPPROTO_IPV6; + break; + case 'Z': + ctx.show = 1; + break; + case 'N': + ctx.dns_resolv = 1; + break; + case 'f': + ctx.init_ttl = atoi(optarg); + if (ctx.init_ttl <= 0) + help(); + break; + case 'm': + ctx.max_ttl = atoi(optarg); + if (ctx.max_ttl <= 0) + help(); + break; + case 'i': + case 'd': + free(ctx.dev); + ctx.dev = xstrdup(optarg); + break; + case 'q': + ctx.queries = atoi(optarg); + if (ctx.queries <= 0) + help(); + break; + case 'x': + ctx.timeout = atoi(optarg); + if (ctx.timeout <= 0) + help(); + break; + case 'L': + ctx.latitude = 1; + break; + case 'S': + ctx.syn = 1; + break; + case 'A': + ctx.ack = 1; + break; + case 'F': + ctx.fin = 1; + break; + case 'U': + ctx.urg = 1; + break; + case 'P': + ctx.psh = 1; + break; + case 'R': + ctx.rst = 1; + break; + case 'E': + ctx.syn = 1; + ctx.ecn = 1; + break; + case 't': + ctx.tos = atoi(optarg); + if (ctx.tos < 0) + help(); + break; + case 'G': + ctx.nofrag = 1; + break; + case 'X': + ctx.payload = xstrdup(optarg); + break; + case 'l': + ctx.totlen = atoi(optarg); + if (ctx.totlen <= 0) + help(); + break; + case '?': + switch (optopt) { + case 'H': + case 'p': + case 'f': + case 'm': + case 'i': + case 'd': + case 'q': + case 'x': + case 'X': + case 't': + case 'l': + panic("Option -%c requires an argument!\n", + optopt); + default: + if (isprint(optopt)) + printf("Unknown option character `0x%X\'!\n", optopt); + die(); + } + default: + break; + } + } + + if (argc < 3 || !ctx.host || !ctx.port || ctx.init_ttl > ctx.max_ttl || + ctx.init_ttl > MAXTTL || ctx.max_ttl > MAXTTL) + help(); + + if (!device_up_and_running(ctx.dev)) + panic("Networking device not up and running!\n"); + if (device_mtu(ctx.dev) <= ctx.totlen) + panic("Packet larger than device MTU!\n"); + + register_signal(SIGHUP, signal_handler); + register_signal(SIGINT, signal_handler); + + tprintf_init(); + init_geoip(1); + + ret = main_trace(&ctx); + + destroy_geoip(); + tprintf_cleanup(); + + free(ctx.dev); + free(ctx.host); + free(ctx.port); + free(ctx.payload); + + return ret; +} diff --git a/astraceroute/.gitignore b/astraceroute/.gitignore new file mode 100644 index 00000000..63434023 --- /dev/null +++ b/astraceroute/.gitignore @@ -0,0 +1,5 @@ +*.* + +!.gitignore +!Makefile +!build_geoip.sh diff --git a/astraceroute/Makefile b/astraceroute/Makefile new file mode 100644 index 00000000..91f0a9e5 --- /dev/null +++ b/astraceroute/Makefile @@ -0,0 +1,13 @@ +astraceroute-libs = -lGeoIP \ + -lpthread \ + -lz + +astraceroute-objs = xmalloc.o \ + xio.o \ + xutils.o \ + proto_none.o \ + tprintf.o \ + bpf.o \ + geoip.o \ + ring_rx.o \ + astraceroute.o diff --git a/bpf.c b/bpf.c new file mode 100644 index 00000000..dc7e3cba --- /dev/null +++ b/bpf.c @@ -0,0 +1,765 @@ +/* + * netsniff-ng - the packet sniffing beast + * Copyright 2009 - 2012 Daniel Borkmann. + * Copyright 2009, 2010 Emmanuel Roullit. + * Copyright 1990-1996 The Regents of the University of + * California. All rights reserved. (3-clause BSD license) + * Subject to the GPL, version 2. + */ + +#include +#include +#include +#include +#include +#include + +#include "bpf.h" +#include "xmalloc.h" +#include "xutils.h" +#include "die.h" + +#define EXTRACT_SHORT(packet) \ + ((unsigned short) ntohs(*(unsigned short *) packet)) +#define EXTRACT_LONG(packet) \ + (ntohl(*(unsigned long *) packet)) + +#ifndef BPF_MEMWORDS +# define BPF_MEMWORDS 16 +#endif + +#define BPF_LD_B (BPF_LD | BPF_B) +#define BPF_LD_H (BPF_LD | BPF_H) +#define BPF_LD_W (BPF_LD | BPF_W) +#define BPF_LDX_B (BPF_LDX | BPF_B) +#define BPF_LDX_W (BPF_LDX | BPF_W) +#define BPF_JMP_JA (BPF_JMP | BPF_JA) +#define BPF_JMP_JEQ (BPF_JMP | BPF_JEQ) +#define BPF_JMP_JGT (BPF_JMP | BPF_JGT) +#define BPF_JMP_JGE (BPF_JMP | BPF_JGE) +#define BPF_JMP_JSET (BPF_JMP | BPF_JSET) +#define BPF_ALU_ADD (BPF_ALU | BPF_ADD) +#define BPF_ALU_SUB (BPF_ALU | BPF_SUB) +#define BPF_ALU_MUL (BPF_ALU | BPF_MUL) +#define BPF_ALU_DIV (BPF_ALU | BPF_DIV) +#define BPF_ALU_MOD (BPF_ALU | BPF_MOD) +#define BPF_ALU_NEG (BPF_ALU | BPF_NEG) +#define BPF_ALU_AND (BPF_ALU | BPF_AND) +#define BPF_ALU_OR (BPF_ALU | BPF_OR) +#define BPF_ALU_XOR (BPF_ALU | BPF_XOR) +#define BPF_ALU_LSH (BPF_ALU | BPF_LSH) +#define BPF_ALU_RSH (BPF_ALU | BPF_RSH) +#define BPF_MISC_TAX (BPF_MISC | BPF_TAX) +#define BPF_MISC_TXA (BPF_MISC | BPF_TXA) + +static const char *op_table[] = { + [BPF_LD_B] = "ldb", + [BPF_LD_H] = "ldh", + [BPF_LD_W] = "ld", + [BPF_LDX] = "ldx", + [BPF_LDX_B] = "ldxb", + [BPF_ST] = "st", + [BPF_STX] = "stx", + [BPF_JMP_JA] = "ja", + [BPF_JMP_JEQ] = "jeq", + [BPF_JMP_JGT] = "jgt", + [BPF_JMP_JGE] = "jge", + [BPF_JMP_JSET] = "jset", + [BPF_ALU_ADD] = "add", + [BPF_ALU_SUB] = "sub", + [BPF_ALU_MUL] = "mul", + [BPF_ALU_DIV] = "div", + [BPF_ALU_MOD] = "mod", + [BPF_ALU_NEG] = "neg", + [BPF_ALU_AND] = "and", + [BPF_ALU_OR] = "or", + [BPF_ALU_XOR] = "xor", + [BPF_ALU_LSH] = "lsh", + [BPF_ALU_RSH] = "rsh", + [BPF_RET] = "ret", + [BPF_MISC_TAX] = "tax", + [BPF_MISC_TXA] = "txa", +}; + +void bpf_dump_op_table(void) +{ + int i; + for (i = 0; i < array_size(op_table); ++i) { + if (op_table[i]) + printf("%s\n", op_table[i]); + } +} + +static const char *bpf_dump_linux_k(uint32_t k) +{ + switch (k) { + default: + return "[%d]"; + case SKF_AD_OFF + SKF_AD_PROTOCOL: + return "#proto"; + case SKF_AD_OFF + SKF_AD_PKTTYPE: + return "#type"; + case SKF_AD_OFF + SKF_AD_IFINDEX: + return "#ifidx"; + case SKF_AD_OFF + SKF_AD_NLATTR: + return "#nla"; + case SKF_AD_OFF + SKF_AD_NLATTR_NEST: + return "#nlan"; + case SKF_AD_OFF + SKF_AD_MARK: + return "#mark"; + case SKF_AD_OFF + SKF_AD_QUEUE: + return "#queue"; + case SKF_AD_OFF + SKF_AD_HATYPE: + return "#hatype"; + case SKF_AD_OFF + SKF_AD_RXHASH: + return "#rxhash"; + case SKF_AD_OFF + SKF_AD_CPU: + return "#cpu"; + case SKF_AD_OFF + SKF_AD_VLAN_TAG: + return "#vlant"; + case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: + return "#vlanp"; + } +} + +static char *__bpf_dump(const struct sock_filter bpf, int n) +{ + int v; + const char *fmt, *op; + static char image[256]; + char operand[64]; + + v = bpf.k; + switch (bpf.code) { + default: + op = "unimp"; + fmt = "0x%x"; + v = bpf.code; + break; + case BPF_RET | BPF_K: + op = op_table[BPF_RET]; + fmt = "#0x%x"; + break; + case BPF_RET | BPF_A: + op = op_table[BPF_RET]; + fmt = "a"; + break; + case BPF_RET | BPF_X: + op = op_table[BPF_RET]; + fmt = "x"; + break; + case BPF_LD_W | BPF_ABS: + op = op_table[BPF_LD_W]; + fmt = bpf_dump_linux_k(bpf.k); + break; + case BPF_LD_H | BPF_ABS: + op = op_table[BPF_LD_H]; + fmt = bpf_dump_linux_k(bpf.k); + break; + case BPF_LD_B | BPF_ABS: + op = op_table[BPF_LD_B]; + fmt = bpf_dump_linux_k(bpf.k); + break; + case BPF_LD_W | BPF_LEN: + op = op_table[BPF_LD_W]; + fmt = "#len"; + break; + case BPF_LD_W | BPF_IND: + op = op_table[BPF_LD_W]; + fmt = "[x + %d]"; + break; + case BPF_LD_H | BPF_IND: + op = op_table[BPF_LD_H]; + fmt = "[x + %d]"; + break; + case BPF_LD_B | BPF_IND: + op = op_table[BPF_LD_B]; + fmt = "[x + %d]"; + break; + case BPF_LD | BPF_IMM: + op = op_table[BPF_LD_W]; + fmt = "#0x%x"; + break; + case BPF_LDX | BPF_IMM: + op = op_table[BPF_LDX]; + fmt = "#0x%x"; + break; + case BPF_LDX_B | BPF_MSH: + op = op_table[BPF_LDX_B]; + fmt = "4*([%d]&0xf)"; + break; + case BPF_LD | BPF_MEM: + op = op_table[BPF_LD_W]; + fmt = "M[%d]"; + break; + case BPF_LDX | BPF_MEM: + op = op_table[BPF_LDX]; + fmt = "M[%d]"; + break; + case BPF_ST: + op = op_table[BPF_ST]; + fmt = "M[%d]"; + break; + case BPF_STX: + op = op_table[BPF_STX]; + fmt = "M[%d]"; + break; + case BPF_JMP_JA: + op = op_table[BPF_JMP_JA]; + fmt = "%d"; + v = n + 1 + bpf.k; + break; + case BPF_JMP_JGT | BPF_K: + op = op_table[BPF_JMP_JGT]; + fmt = "#0x%x"; + break; + case BPF_JMP_JGE | BPF_K: + op = op_table[BPF_JMP_JGE]; + fmt = "#0x%x"; + break; + case BPF_JMP_JEQ | BPF_K: + op = op_table[BPF_JMP_JEQ]; + fmt = "#0x%x"; + break; + case BPF_JMP_JSET | BPF_K: + op = op_table[BPF_JMP_JSET]; + fmt = "#0x%x"; + break; + case BPF_JMP_JGT | BPF_X: + op = op_table[BPF_JMP_JGT]; + fmt = "x"; + break; + case BPF_JMP_JGE | BPF_X: + op = op_table[BPF_JMP_JGE]; + fmt = "x"; + break; + case BPF_JMP_JEQ | BPF_X: + op = op_table[BPF_JMP_JEQ]; + fmt = "x"; + break; + case BPF_JMP_JSET | BPF_X: + op = op_table[BPF_JMP_JSET]; + fmt = "x"; + break; + case BPF_ALU_ADD | BPF_X: + op = op_table[BPF_ALU_ADD]; + fmt = "x"; + break; + case BPF_ALU_SUB | BPF_X: + op = op_table[BPF_ALU_SUB]; + fmt = "x"; + break; + case BPF_ALU_MUL | BPF_X: + op = op_table[BPF_ALU_MUL]; + fmt = "x"; + break; + case BPF_ALU_DIV | BPF_X: + op = op_table[BPF_ALU_DIV]; + fmt = "x"; + break; + case BPF_ALU_MOD | BPF_X: + op = op_table[BPF_ALU_MOD]; + fmt = "x"; + break; + case BPF_ALU_AND | BPF_X: + op = op_table[BPF_ALU_AND]; + fmt = "x"; + break; + case BPF_ALU_OR | BPF_X: + op = op_table[BPF_ALU_OR]; + fmt = "x"; + break; + case BPF_ALU_XOR | BPF_X: + op = op_table[BPF_ALU_XOR]; + fmt = "x"; + break; + case BPF_ALU_LSH | BPF_X: + op = op_table[BPF_ALU_LSH]; + fmt = "x"; + break; + case BPF_ALU_RSH | BPF_X: + op = op_table[BPF_ALU_RSH]; + fmt = "x"; + break; + case BPF_ALU_ADD | BPF_K: + op = op_table[BPF_ALU_ADD]; + fmt = "#%d"; + break; + case BPF_ALU_SUB | BPF_K: + op = op_table[BPF_ALU_SUB]; + fmt = "#%d"; + break; + case BPF_ALU_MUL | BPF_K: + op = op_table[BPF_ALU_MUL]; + fmt = "#%d"; + break; + case BPF_ALU_DIV | BPF_K: + op = op_table[BPF_ALU_DIV]; + fmt = "#%d"; + break; + case BPF_ALU_MOD | BPF_K: + op = op_table[BPF_ALU_MOD]; + fmt = "#%d"; + break; + case BPF_ALU_AND | BPF_K: + op = op_table[BPF_ALU_AND]; + fmt = "#0x%x"; + break; + case BPF_ALU_OR | BPF_K: + op = op_table[BPF_ALU_OR]; + fmt = "#0x%x"; + break; + case BPF_ALU_XOR | BPF_K: + op = op_table[BPF_ALU_XOR]; + fmt = "#0x%x"; + break; + case BPF_ALU_LSH | BPF_K: + op = op_table[BPF_ALU_LSH]; + fmt = "#%d"; + break; + case BPF_ALU_RSH | BPF_K: + op = op_table[BPF_ALU_RSH]; + fmt = "#%d"; + break; + case BPF_ALU_NEG: + op = op_table[BPF_ALU_NEG]; + fmt = ""; + break; + case BPF_MISC_TAX: + op = op_table[BPF_MISC_TAX]; + fmt = ""; + break; + case BPF_MISC_TXA: + op = op_table[BPF_MISC_TXA]; + fmt = ""; + break; + } + + slprintf_nocheck(operand, sizeof(operand), fmt, v); + slprintf_nocheck(image, sizeof(image), + (BPF_CLASS(bpf.code) == BPF_JMP && + BPF_OP(bpf.code) != BPF_JA) ? + " L%d: %s %s, L%d, L%d" : " L%d: %s %s", + n, op, operand, n + 1 + bpf.jt, n + 1 + bpf.jf); + return image; +} + +void bpf_dump_all(struct sock_fprog *bpf) +{ + int i; + for (i = 0; i < bpf->len; ++i) + printf("%s\n", __bpf_dump(bpf->filter[i], i)); +} + +void bpf_attach_to_sock(int sock, struct sock_fprog *bpf) +{ + int ret; + + if (bpf->filter[0].code == BPF_RET && + bpf->filter[0].k == 0xFFFFFFFF) + return; + + ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, + bpf, sizeof(*bpf)); + if (ret < 0) + panic("Cannot attach filter to socket!\n"); +} + +void bpf_detach_from_sock(int sock) +{ + int ret, empty = 0; + + ret = setsockopt(sock, SOL_SOCKET, SO_DETACH_FILTER, + &empty, sizeof(empty)); + if (ret < 0) + panic("Cannot detach filter from socket!\n"); +} + +int enable_kernel_bpf_jit_compiler(void) +{ + int fd; + ssize_t ret; + char *file = "/proc/sys/net/core/bpf_jit_enable"; + + fd = open(file, O_WRONLY); + if (fd < 0) + return -1; + + ret = write(fd, "1", strlen("1")); + + close(fd); + return ret; +} + +int __bpf_validate(const struct sock_fprog *bpf) +{ + uint32_t i, from; + const struct sock_filter *p; + + if (!bpf) + return 0; + if (bpf->len < 1) + return 0; + + for (i = 0; i < bpf->len; ++i) { + p = &bpf->filter[i]; + switch (BPF_CLASS(p->code)) { + /* Check that memory operations use valid addresses. */ + case BPF_LD: + case BPF_LDX: + switch (BPF_MODE(p->code)) { + case BPF_IMM: + break; + case BPF_ABS: + case BPF_IND: + case BPF_MSH: + /* There's no maximum packet data size + * in userland. The runtime packet length + * check suffices. + */ + break; + case BPF_MEM: + if (p->k >= BPF_MEMWORDS) + return 0; + break; + case BPF_LEN: + break; + default: + return 0; + } + break; + case BPF_ST: + case BPF_STX: + if (p->k >= BPF_MEMWORDS) + return 0; + break; + case BPF_ALU: + switch (BPF_OP(p->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_MUL: + case BPF_OR: + case BPF_XOR: + case BPF_AND: + case BPF_LSH: + case BPF_RSH: + case BPF_NEG: + break; + case BPF_DIV: + case BPF_MOD: + /* Check for constant division by 0 (undefined + * for div and mod). + */ + if (BPF_RVAL(p->code) == BPF_K && p->k == 0) + return 0; + break; + default: + return 0; + } + break; + case BPF_JMP: + /* Check that jumps are within the code block, + * and that unconditional branches don't go + * backwards as a result of an overflow. + * Unconditional branches have a 32-bit offset, + * so they could overflow; we check to make + * sure they don't. Conditional branches have + * an 8-bit offset, and the from address is <= + * BPF_MAXINSNS, and we assume that BPF_MAXINSNS + * is sufficiently small that adding 255 to it + * won't overflow. + * + * We know that len is <= BPF_MAXINSNS, and we + * assume that BPF_MAXINSNS is < the maximum size + * of a u_int, so that i + 1 doesn't overflow. + * + * For userland, we don't know that the from + * or len are <= BPF_MAXINSNS, but we know that + * from <= len, and, except on a 64-bit system, + * it's unlikely that len, if it truly reflects + * the size of the program we've been handed, + * will be anywhere near the maximum size of + * a u_int. We also don't check for backward + * branches, as we currently support them in + * userland for the protochain operation. + */ + from = i + 1; + switch (BPF_OP(p->code)) { + case BPF_JA: + if (from + p->k >= bpf->len) + return 0; + break; + case BPF_JEQ: + case BPF_JGT: + case BPF_JGE: + case BPF_JSET: + if (from + p->jt >= bpf->len || + from + p->jf >= bpf->len) + return 0; + break; + default: + return 0; + } + break; + case BPF_RET: + break; + case BPF_MISC: + break; + default: + return 0; + } + } + + return BPF_CLASS(bpf->filter[bpf->len - 1].code) == BPF_RET; +} + +uint32_t bpf_run_filter(const struct sock_fprog * fcode, uint8_t * packet, + size_t plen) +{ + /* XXX: caplen == len */ + uint32_t A, X; + uint32_t k; + struct sock_filter *bpf; + int32_t mem[BPF_MEMWORDS] = { 0, }; + + if (fcode == NULL || fcode->filter == NULL || fcode->len == 0) + return 0xFFFFFFFF; + + A = 0; + X = 0; + + bpf = fcode->filter; + --bpf; + while (1) { + ++bpf; + switch (bpf->code) { + default: + return 0; + case BPF_RET | BPF_K: + return (uint32_t) bpf->k; + case BPF_RET | BPF_A: + return (uint32_t) A; + case BPF_LD_W | BPF_ABS: + /* No Linux extensions supported here! */ + k = bpf->k; + if (k + sizeof(int32_t) > plen) + return 0; + A = EXTRACT_LONG(&packet[k]); + continue; + case BPF_LD_H | BPF_ABS: + /* No Linux extensions supported here! */ + k = bpf->k; + if (k + sizeof(short) > plen) + return 0; + A = EXTRACT_SHORT(&packet[k]); + continue; + case BPF_LD_B | BPF_ABS: + /* No Linux extensions supported here! */ + k = bpf->k; + if (k >= plen) + return 0; + A = packet[k]; + continue; + case BPF_LD_W | BPF_LEN: + A = plen; + continue; + case BPF_LDX_W | BPF_LEN: + X = plen; + continue; + case BPF_LD_W | BPF_IND: + k = X + bpf->k; + if (k + sizeof(int32_t) > plen) + return 0; + A = EXTRACT_LONG(&packet[k]); + continue; + case BPF_LD_H | BPF_IND: + k = X + bpf->k; + if (k + sizeof(short) > plen) + return 0; + A = EXTRACT_SHORT(&packet[k]); + continue; + case BPF_LD_B | BPF_IND: + k = X + bpf->k; + if (k >= plen) + return 0; + A = packet[k]; + continue; + case BPF_LDX_B | BPF_MSH: + k = bpf->k; + if (k >= plen) + return 0; + X = (packet[bpf->k] & 0xf) << 2; + continue; + case BPF_LD | BPF_IMM: + A = bpf->k; + continue; + case BPF_LDX | BPF_IMM: + X = bpf->k; + continue; + case BPF_LD | BPF_MEM: + A = mem[bpf->k]; + continue; + case BPF_LDX | BPF_MEM: + X = mem[bpf->k]; + continue; + case BPF_ST: + mem[bpf->k] = A; + continue; + case BPF_STX: + mem[bpf->k] = X; + continue; + case BPF_JMP_JA: + bpf += bpf->k; + continue; + case BPF_JMP_JGT | BPF_K: + bpf += (A > bpf->k) ? bpf->jt : bpf->jf; + continue; + case BPF_JMP_JGE | BPF_K: + bpf += (A >= bpf->k) ? bpf->jt : bpf->jf; + continue; + case BPF_JMP_JEQ | BPF_K: + bpf += (A == bpf->k) ? bpf->jt : bpf->jf; + continue; + case BPF_JMP_JSET | BPF_K: + bpf += (A & bpf->k) ? bpf->jt : bpf->jf; + continue; + case BPF_JMP_JGT | BPF_X: + bpf += (A > X) ? bpf->jt : bpf->jf; + continue; + case BPF_JMP_JGE | BPF_X: + bpf += (A >= X) ? bpf->jt : bpf->jf; + continue; + case BPF_JMP_JEQ | BPF_X: + bpf += (A == X) ? bpf->jt : bpf->jf; + continue; + case BPF_JMP_JSET | BPF_X: + bpf += (A & X) ? bpf->jt : bpf->jf; + continue; + case BPF_ALU_ADD | BPF_X: + A += X; + continue; + case BPF_ALU_SUB | BPF_X: + A -= X; + continue; + case BPF_ALU_MUL | BPF_X: + A *= X; + continue; + case BPF_ALU_DIV | BPF_X: + if (X == 0) + return 0; + A /= X; + continue; + case BPF_ALU_MOD | BPF_X: + if (X == 0) + return 0; + A %= X; + continue; + case BPF_ALU_AND | BPF_X: + A &= X; + continue; + case BPF_ALU_OR | BPF_X: + A |= X; + continue; + case BPF_ALU_XOR | BPF_X: + A ^= X; + continue; + case BPF_ALU_LSH | BPF_X: + A <<= X; + continue; + case BPF_ALU_RSH | BPF_X: + A >>= X; + continue; + case BPF_ALU_ADD | BPF_K: + A += bpf->k; + continue; + case BPF_ALU_SUB | BPF_K: + A -= bpf->k; + continue; + case BPF_ALU_MUL | BPF_K: + A *= bpf->k; + continue; + case BPF_ALU_DIV | BPF_K: + A /= bpf->k; + continue; + case BPF_ALU_MOD | BPF_K: + A %= bpf->k; + continue; + case BPF_ALU_AND | BPF_K: + A &= bpf->k; + continue; + case BPF_ALU_OR | BPF_K: + A |= bpf->k; + continue; + case BPF_ALU_XOR | BPF_K: + A ^= bpf->k; + continue; + case BPF_ALU_LSH | BPF_K: + A <<= bpf->k; + continue; + case BPF_ALU_RSH | BPF_K: + A >>= bpf->k; + continue; + case BPF_ALU_NEG: + A = -A; + continue; + case BPF_MISC_TAX: + X = A; + continue; + case BPF_MISC_TXA: + A = X; + continue; + } + } +} + +void bpf_parse_rules(char *rulefile, struct sock_fprog *bpf, uint32_t link_type) +{ + int ret; + char buff[256]; + struct sock_filter sf_single = { 0x06, 0, 0, 0xFFFFFFFF }; + FILE *fp; + + if (rulefile == NULL) { + bpf->len = 1; + bpf->filter = xmalloc(sizeof(sf_single)); + fmemcpy(&bpf->filter[0], &sf_single, sizeof(sf_single)); + return; + } + + fp = fopen(rulefile, "r"); + if (!fp) { + bpf_try_compile(rulefile, bpf, link_type); + return; + } + + fmemset(buff, 0, sizeof(buff)); + while (fgets(buff, sizeof(buff), fp) != NULL) { + buff[sizeof(buff) - 1] = 0; + if (buff[0] != '{') { + fmemset(buff, 0, sizeof(buff)); + continue; + } + + fmemset(&sf_single, 0, sizeof(sf_single)); + ret = sscanf(buff, "{ 0x%x, %u, %u, 0x%08x },", + (unsigned int *) &sf_single.code, + (unsigned int *) &sf_single.jt, + (unsigned int *) &sf_single.jf, + (unsigned int *) &sf_single.k); + if (ret != 4) + panic("BPF syntax error!\n"); + + bpf->len++; + bpf->filter = xrealloc(bpf->filter, 1, + bpf->len * sizeof(sf_single)); + + fmemcpy(&bpf->filter[bpf->len - 1], &sf_single, + sizeof(sf_single)); + fmemset(buff, 0, sizeof(buff)); + } + + fclose(fp); + + if (__bpf_validate(bpf) == 0) + panic("This is not a valid BPF program!\n"); +} diff --git a/bpf.h b/bpf.h new file mode 100644 index 00000000..84cf0b87 --- /dev/null +++ b/bpf.h @@ -0,0 +1,135 @@ +/* + * netsniff-ng - the packet sniffing beast + * Copyright 2009, 2010 Daniel Borkmann. + * Subject to the GPL, version 2. + */ + +#ifndef BPF_I_H +#define BPF_I_H + +#include +#include +#include + +#include "xmalloc.h" + +extern void bpf_dump_op_table(void); +extern void bpf_dump_all(struct sock_fprog *bpf); +extern int __bpf_validate(const struct sock_fprog *bpf); +extern uint32_t bpf_run_filter(const struct sock_fprog *bpf, uint8_t *packet, + size_t plen); +extern void bpf_attach_to_sock(int sock, struct sock_fprog *bpf); +extern void bpf_detach_from_sock(int sock); +extern int enable_kernel_bpf_jit_compiler(void); +extern void bpf_parse_rules(char *rulefile, struct sock_fprog *bpf, uint32_t link_type); +#ifdef __WITH_TCPDUMP_LIKE_FILTER +extern void bpf_try_compile(const char *rulefile, struct sock_fprog *bpf, + uint32_t link_type); +#else +static inline void bpf_try_compile(const char *rulefile, struct sock_fprog *bpf, + uint32_t link_type) +{ + panic("Cannot open file %s!\n", rulefile); +} +#endif + +static inline void bpf_release(struct sock_fprog *bpf) +{ + free(bpf->filter); +} + +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 +#define BPF_H 0x08 +#define BPF_B 0x10 + +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_MOD 0x90 +#define BPF_XOR 0xa0 + +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 + +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +/* ret - BPF_K and BPF_X also apply */ +#define BPF_RVAL(code) ((code) & 0x18) +#define BPF_A 0x10 + +#define BPF_MISCOP(code) ((code) & 0xf8) +#define BPF_TAX 0x00 +#define BPF_TXA 0x80 + +#ifndef SKF_AD_OFF +# define SKF_AD_OFF (-0x1000) +#endif +#ifndef SKF_AD_PROTOCOL +# define SKF_AD_PROTOCOL 0 +#endif +#ifndef SKF_AD_PKTTYPE +# define SKF_AD_PKTTYPE 4 +#endif +#ifndef SKF_AD_IFINDEX +# define SKF_AD_IFINDEX 8 +#endif +#ifndef SKF_AD_NLATTR +# define SKF_AD_NLATTR 12 +#endif +#ifndef SKF_AD_NLATTR_NEST +# define SKF_AD_NLATTR_NEST 16 +#endif +#ifndef SKF_AD_MARK +# define SKF_AD_MARK 20 +#endif +#ifndef SKF_AD_QUEUE +# define SKF_AD_QUEUE 24 +#endif +#ifndef SKF_AD_HATYPE +# define SKF_AD_HATYPE 28 +#endif +#ifndef SKF_AD_RXHASH +# define SKF_AD_RXHASH 32 +#endif +#ifndef SKF_AD_CPU +# define SKF_AD_CPU 36 +#endif +#ifndef SKF_AD_VLAN_TAG +# define SKF_AD_VLAN_TAG 44 +#endif +#ifndef SKF_AD_VLAN_TAG_PRESENT +# define SKF_AD_VLAN_TAG_PRESENT 48 +#endif + +#endif /* BPF_I_H */ diff --git a/bpf_comp.c b/bpf_comp.c new file mode 100644 index 00000000..27f7a000 --- /dev/null +++ b/bpf_comp.c @@ -0,0 +1,43 @@ +/* + * netsniff-ng - the packet sniffing beast + * Copyright 2013 Daniel Borkmann. + * Subject to the GPL, version 2. + */ + +#include +#include + +#include "xmalloc.h" +#include "bpf.h" +#include "die.h" + +void bpf_try_compile(const char *rulefile, struct sock_fprog *bpf, uint32_t link_type) +{ + int i, ret; + const struct bpf_insn *ins; + struct sock_filter *out; + struct bpf_program _bpf; + + ret = pcap_compile_nopcap(65535, link_type, &_bpf, rulefile, 1, 0xffffffff); + if (ret < 0) + panic("Cannot compile filter %s\n", rulefile); + + bpf->len = _bpf.bf_len; + bpf->filter = xrealloc(bpf->filter, 1, bpf->len * sizeof(*out)); + + for (i = 0, ins = _bpf.bf_insns, out = bpf->filter; i < bpf->len; ++i, ++ins, ++out) { + + out->code = ins->code; + out->jt = ins->jt; + out->jf = ins->jf; + out->k = ins->k; + + if (out->code == 0x06 && out->k > 0) + out->k = 0xFFFFFFFF; + } + + pcap_freecode(&_bpf); + + if (__bpf_validate(bpf) == 0) + panic("This is not a valid BPF program!\n"); +} diff --git a/bpf_lexer.l b/bpf_lexer.l new file mode 100644 index 00000000..d4b6947b --- /dev/null +++ b/bpf_lexer.l @@ -0,0 +1,126 @@ +/* + * netsniff-ng - the packet sniffing beast + * By Daniel Borkmann + * Copyright 2012 Daniel Borkmann , + * Swiss federal institute of technology (ETH Zurich) + * Subject to the GPL, version 2. + */ + +/* lex-func-prefix: yy */ + +%{ + +#include +#include +#include +#include + +#include "bpf_parser.tab.h" +#include "xmalloc.h" + +extern void yyerror(const char *); + +%} + +%option align +%option nounput +%option noyywrap +%option noreject +%option 8bit +%option caseless +%option noinput +%option nodefault + +number_oct ([0][0-9]+) +number_hex ([0][x][a-fA-F0-9]+) +number_bin ([0][b][0-1]+) +number_dec (([0])|([-+]?[1-9][0-9]*)) + +label [a-zA-Z_][a-zA-Z0-9_]+ + +%% + +"ldb" { return OP_LDB; } +"ldh" { return OP_LDH; } +"ld" { return OP_LD; } +"ldi" { return OP_LDI; } +"ldx" { return OP_LDX; } +"ldxi" { return OP_LDXI; } +"ldxb" { return OP_LDXB; } +"st" { return OP_ST; } +"stx" { return OP_STX; } +"jmp"|"ja" { return OP_JMP; } +"jeq" { return OP_JEQ; } +"jneq"|"jne" { return OP_JNEQ; } +"jlt" { return OP_JLT; } +"jle" { return OP_JLE; } +"jgt" { return OP_JGT; } +"jge" { return OP_JGE; } +"jset" { return OP_JSET; } +"add" { return OP_ADD; } +"sub" { return OP_SUB; } +"mul" { return OP_MUL; } +"div" { return OP_DIV; } +"mod" { return OP_MOD; } +"neg" { return OP_NEG; } +"and" { return OP_AND; } +"xor" { return OP_XOR; } +"or" { return OP_OR; } +"lsh" { return OP_LSH; } +"rsh" { return OP_RSH; } +"ret" { return OP_RET; } +"tax" { return OP_TAX; } +"txa" { return OP_TXA; } + +"#"?("len"|"pktlen") { return K_PKT_LEN; } +"#"?("pto"|"proto") { return K_PROTO; } +"#"?("type") { return K_TYPE; } +"#"?("ifx"|"ifidx") { return K_IFIDX; } +"#"?("nla") { return K_NLATTR; } +"#"?("nlan") { return K_NLATTR_NEST; } +"#"?("mark") { return K_MARK; } +"#"?("que"|"queue"|"Q") { return K_QUEUE; } +"#"?("hat"|"hatype") { return K_HATYPE; } +"#"?("rxh"|"rxhash") { return K_RXHASH; } +"#"?("cpu") { return K_CPU; } +"#"?("vlant"|"vlan_tci") { return K_VLANT; } +"#"?("vlana"|"vlan_acc") { return K_VLANP; } +"#"?("vlanp") { return K_VLANP; } + +":" { return ':'; } +"," { return ','; } +"#" { return '#'; } +"[" { return '['; } +"]" { return ']'; } +"(" { return '('; } +")" { return ')'; } +"x" { return 'x'; } +"a" { return 'a'; } +"+" { return '+'; } +"M" { return 'M'; } +"*" { return '*'; } +"&" { return '&'; } + +{number_hex} { yylval.number = strtoul(yytext, NULL, 16); + return number; } + +{number_oct} { yylval.number = strtol(yytext + 1, NULL, 8); + return number; } + +{number_bin} { yylval.number = strtol(yytext + 2, NULL, 2); + return number; } + +{number_dec} { yylval.number = strtol(yytext, NULL, 10); + return number; } + +{label} { yylval.label = xstrdup(yytext); + return label; } + +"/*"([^\*]|\*[^/])*"*/" { /* NOP */ } +";"[^\n]* {/* NOP */} +"\n" { yylineno++; } +[ \t]+ {/* NOP */ } +. { printf("Unknown character '%s'", yytext); + yyerror("lex Unknown character"); } + +%% diff --git a/bpf_parser.y b/bpf_parser.y new file mode 100644 index 00000000..4c35e07e --- /dev/null +++ b/bpf_parser.y @@ -0,0 +1,673 @@ +/* + * netsniff-ng - the packet sniffing beast + * By Daniel Borkmann + * Copyright 2011 Daniel Borkmann , + * Swiss federal institute of technology (ETH Zurich) + * Subject to the GPL, version 2. + */ + +/* yaac-func-prefix: yy */ + +%{ + +#include +#include +#include +#include +#include +#include + +#include "bpf.h" +#include "xmalloc.h" +#include "bpf_parser.tab.h" +#include "built_in.h" +#include "die.h" + +#define MAX_INSTRUCTIONS 4096 + +int compile_filter(char *file, int verbose, int bypass, int decimal); + +static int curr_instr = 0; + +static struct sock_filter out[MAX_INSTRUCTIONS]; + +static char *labels[MAX_INSTRUCTIONS]; + +static char *labels_jt[MAX_INSTRUCTIONS]; +static char *labels_jf[MAX_INSTRUCTIONS]; +static char *labels_k[MAX_INSTRUCTIONS]; + +#define YYERROR_VERBOSE 0 +#define YYDEBUG 0 +#define YYENABLE_NLS 1 +#define YYLTYPE_IS_TRIVIAL 1 +#define ENABLE_NLS 1 + +extern FILE *yyin; +extern int yylex(void); +extern void yyerror(const char *); +extern int yylineno; +extern char *yytext; + +static inline void check_max_instr(void) +{ + if (curr_instr >= MAX_INSTRUCTIONS) + panic("Exceeded maximal number of instructions!\n"); +} + +static inline void set_curr_instr(uint16_t code, uint8_t jt, uint8_t jf, uint32_t k) +{ + check_max_instr(); + + out[curr_instr].code = code; + out[curr_instr].jt = jt; + out[curr_instr].jf = jf; + out[curr_instr].k = k; + + curr_instr++; +} + +static inline void set_curr_label(char *label) +{ + check_max_instr(); + + labels[curr_instr] = label; +} + +#define JTL 1 +#define JFL 2 +#define JKL 3 + +static inline void set_jmp_label(char *label, int which) +{ + check_max_instr(); + + switch (which) { + case JTL: + labels_jt[curr_instr] = label; + break; + case JFL: + labels_jf[curr_instr] = label; + break; + case JKL: + labels_k[curr_instr] = label; + break; + default: + bug(); + } +} + +static int find_intr_offset_or_panic(char *label_to_search) +{ + int i, max = curr_instr, ret = -ENOENT; + + bug_on(!label_to_search); + + for (i = 0; i < max; ++i) { + if (labels[i] != NULL) { + /* Both are \0-terminated! */ + if (!strcmp(label_to_search, labels[i])) { + ret = i; + break; + } + } + } + + if (ret == -ENOENT) + panic("No such label!\n"); + + return ret; +} + +%} + +%union { + char *label; + long int number; +} + +%token OP_LDB OP_LDH OP_LD OP_LDX OP_ST OP_STX OP_JMP OP_JEQ OP_JGT OP_JGE +%token OP_JSET OP_ADD OP_SUB OP_MUL OP_DIV OP_AND OP_OR OP_XOR OP_LSH OP_RSH +%token OP_RET OP_TAX OP_TXA OP_LDXB OP_MOD OP_NEG OP_JNEQ OP_JLT OP_JLE OP_LDI +%token OP_LDXI + +%token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE +%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP + +%token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' + +%token number label + +%type number +%type