From 8dd5a3d8301e15d96f39e5c1b53347ed2b340249 Mon Sep 17 00:00:00 2001 From: Peter Palfrader Date: Tue, 2 Sep 2008 09:42:55 +0000 Subject: [PATCH] Add my scripts to dump directories to contrib svn:r16736 --- contrib/directory-archive/crontab.sample | 3 + contrib/directory-archive/fetch-all | 77 ++++++++++++++ contrib/directory-archive/fetch-all-functions | 72 +++++++++++++ contrib/directory-archive/fetch-all-v3 | 109 ++++++++++++++++++++ contrib/directory-archive/sort-into-month-folder | 74 ++++++++++++++ contrib/directory-archive/tar-them-up | 125 +++++++++++++++++++++++ 6 files changed, 460 insertions(+) create mode 100644 contrib/directory-archive/crontab.sample create mode 100755 contrib/directory-archive/fetch-all create mode 100644 contrib/directory-archive/fetch-all-functions create mode 100755 contrib/directory-archive/fetch-all-v3 create mode 100755 contrib/directory-archive/sort-into-month-folder create mode 100755 contrib/directory-archive/tar-them-up diff --git a/contrib/directory-archive/crontab.sample b/contrib/directory-archive/crontab.sample new file mode 100644 index 0000000000..e2821aa938 --- /dev/null +++ b/contrib/directory-archive/crontab.sample @@ -0,0 +1,3 @@ +10 * * * * cd projects/tor-v2dir && ./fetch-all-v3 +40 * * * * cd projects/tor-v2dir && ./fetch-all +15 3 6 * * cd projects/tor-v2dir && ./sort-into-month-folder > /dev/null && ./tar-them-up last > /dev/null diff --git a/contrib/directory-archive/fetch-all b/contrib/directory-archive/fetch-all new file mode 100755 index 0000000000..745c0609b4 --- /dev/null +++ b/contrib/directory-archive/fetch-all @@ -0,0 +1,77 @@ +#!/bin/bash + +# Download all current v2 directory status documents, then download +# the descriptors and extra info documents. + +# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +TZ=UTC +export TZ + +DIRSERVERS="" +DIRSERVERS="$DIRSERVERS 86.59.21.38:80" # tor26 +DIRSERVERS="$DIRSERVERS 128.31.0.34:9031" # moria1 +DIRSERVERS="$DIRSERVERS 128.31.0.34:9032" # moria2 +#DIRSERVERS="$DIRSERVERS 140.247.60.64:80" # lefkada +DIRSERVERS="$DIRSERVERS 194.109.206.212:80" # dizum +DATEDIR=$(date "+%Y/%m/%d") +TIME=$(date "+%Y%m%d-%H%M%S") + +. fetch-all-functions + +statuses="" +for dirserver in $DIRSERVERS; do + authorities=$(wget -q -O - http://$dirserver/tor/status/all | egrep '^fingerprint ' | awk '{print $2}') + if [ "$authorities" == "" ]; then + echo "Did not get a list of authorities from $dirserver, going to next" 2>&1 + continue + fi + + dir="status/$DATEDIR" + [ -d "$dir" ] || mkdir -p "$dir" + + authprefix="$dir/$TIME-" + for fp in $authorities; do + wget -q -O "$authprefix$fp" http://$dirserver/tor/status/fp/"$fp" + bzip2 "$authprefix$fp" + statuses="$statuses $authprefix$fp.bz2" + done + if [ "$statuses" == "" ]; then + echo "Did not get any statuses from $dirserver, going to next" 2>&1 + continue + else + break + fi +done + +if [ "$statuses" = "" ]; then + echo "No statuses available" 2>&1 + exit 1 +fi + +digests=$( for i in ` bzcat $statuses | awk '$1 == "r" {printf "%s===\n", $4}' | sort -u `; do + echo $i | \ + base64-decode | \ + perl -e 'undef $/; $a=<>; print unpack("H\*", $a),"\n";'; + done ) +for digest in $digests; do + fetch_digest "$digest" "server-descriptor" +done diff --git a/contrib/directory-archive/fetch-all-functions b/contrib/directory-archive/fetch-all-functions new file mode 100644 index 0000000000..6d5a0e469d --- /dev/null +++ b/contrib/directory-archive/fetch-all-functions @@ -0,0 +1,72 @@ +#!/bin/bash + +# function used by fetch-all* to download server descriptors and +# extra info documents + +# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +fetch_digest() { + local digest + local objecttype + local urlpart + local pathpart + local target + local targetdir + local dirserver + local ei + + digest="$1" + objecttype="$2" + if [ "$objecttype" = "server-descriptor" ] ; then + urlpart="server" + pathpart="server-descriptor" + elif [ "$objecttype" = "extra-info" ] ; then + urlpart="extra" + pathpart="extra-info" + else + echo "Called fetch_digest with illegal objecttype '$objecttype'" >&2 + exit 1 + fi + target=$( echo $digest | sed -e 's#^\(.\)\(.\)#'"$pathpart"'/\1/\2/\1\2#' ) + targetdir=$( dirname $target ) + [ -d "$targetdir" ] || mkdir -p "$targetdir" + if ! [ -e "$target" ]; then + for dirserver in $DIRSERVERS; do + wget -q -O "$target" http://$dirserver/tor/$urlpart/d/"$digest" || rm -f "$target" + if [ -s "$target" ]; then + if egrep '^opt extra-info-digest ' "$target" > /dev/null; then + ei=$( egrep '^opt extra-info-digest ' "$target" | awk '{print $3}' | tr 'A-F' 'a-f' ) + fetch_digest "$ei" "extra-info" + elif egrep '^extra-info-digest ' "$target" > /dev/null; then + ei=$( egrep '^extra-info-digest ' "$target" | awk '{print $2}' | tr 'A-F' 'a-f' ) + fetch_digest "$ei" "extra-info" + fi + break + else + rm -f "$target" + fi + done + fi + #if ! [ -e "$target" ]; then + # echo "$objecttype $digest" >> failed + #fi +} diff --git a/contrib/directory-archive/fetch-all-v3 b/contrib/directory-archive/fetch-all-v3 new file mode 100755 index 0000000000..fe07ad7ef9 --- /dev/null +++ b/contrib/directory-archive/fetch-all-v3 @@ -0,0 +1,109 @@ +#!/bin/bash + +# Download all current v3 directory status votes and the consensus document, +# then download the descriptors and extra info documents. + +# Copyright (c) 2005, 2006, 2007, 2008 Peter Palfrader +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +TZ=UTC +export TZ + +DIRSERVERS="" +DIRSERVERS="$DIRSERVERS 86.59.21.38:80" # tor26 +DIRSERVERS="$DIRSERVERS 128.31.0.34:9031" # moria1 +DIRSERVERS="$DIRSERVERS 216.224.124.114:9030" # ides +DIRSERVERS="$DIRSERVERS 88.198.7.215:80" # gabelmoo +#DIRSERVERS="$DIRSERVERS 140.247.60.64:80" # lefkada +DIRSERVERS="$DIRSERVERS 194.109.206.212:80" # dizum +DIRSERVERS="$DIRSERVERS 128.31.0.34:9032" # moria2 +TIME=$(date "+%Y%m%d-%H%M%S") + +. fetch-all-functions + +consensus="" +tmpdir="consensus/tmp" +[ -d "$tmpdir" ] || mkdir -p "$tmpdir" +for dirserver in $DIRSERVERS; do + wget -q -O "$tmpdir/$TIME-consensus" http://$dirserver/tor/status-vote/current/consensus + if [ "$?" != 0 ]; then + rm -f "$tmpdir/$TIME-consensus" + continue + fi + + freshconsensus="$tmpdir/$TIME-consensus" + + timestamp=$(awk '$1=="valid-after" {printf "%s-%s", $2, $3}' < "$freshconsensus") + datedir=$(awk '$1=="valid-after" {printf "%s", $2}' < "$freshconsensus" | tr '-' '/') + dir="consensus/$datedir" + [ -d "$dir" ] || mkdir -p "$dir" + + + consensus="$dir/$timestamp-consensus.bz2" + if ! [ -e "$consensus" ]; then + # the consensus is new, or at least we don't have it yet + bzip2 "$freshconsensus" + mv "$freshconsensus.bz2" "$consensus" + break + fi + + rm -f "$freshconsensus" + echo "Consensus from $timestamp (gotten from $dirserver) already exists!" >&2 + # maybe there is a newer one on a different authority, so try again. +done + +if [ "$consensus" = "" ]; then + echo "No consensus available" 2>&1 + exit 1 +fi + + +votes=$(bzcat $consensus | awk '$1 == "vote-digest" {print $2}') +for vote in $votes; do + for dirserver in $DIRSERVERS; do + wget -q -O "$dir/$TIME-vote-$vote" http://$dirserver/tor/status-vote/current/d/$vote + if [ "$?" != 0 ]; then + rm -f "$dir/$TIME-vote-$vote" + continue + fi + break + done + if [ -e "$dir/$TIME-vote-$vote" ]; then + voteridentity=$(awk '$1=="fingerprint" {print $2}' < "$dir/$TIME-vote-$vote") + if [ -e "$dir/$timestamp-vote-$voteridentity-$vote.bz2" ]; then + echo "Vote $vote from $voteridentity already exists!" >&2 + rm -f "$dir/$TIME-vote-$vote" + continue; + fi + mv "$dir/$TIME-vote-$vote" "$dir/$timestamp-vote-$voteridentity-$vote" + bzip2 "$dir/$timestamp-vote-$voteridentity-$vote" + else + echo "Failed to get vote $vote!" >&2 + fi +done + +digests=$( for i in ` bzcat $consensus | awk '$1 == "r" {printf "%s===\n", $4}' | sort -u `; do + echo $i | \ + base64-decode | \ + perl -e 'undef $/; $a=<>; print unpack("H\*", $a),"\n";'; + done ) +for digest in $digests; do + fetch_digest "$digest" "server-descriptor" +done diff --git a/contrib/directory-archive/sort-into-month-folder b/contrib/directory-archive/sort-into-month-folder new file mode 100755 index 0000000000..95033c58df --- /dev/null +++ b/contrib/directory-archive/sort-into-month-folder @@ -0,0 +1,74 @@ +#!/usr/bin/perl -w + +# Sort dumped consensuses, statuses, descriptors etc into per-month folders. + +# Copyright (c) 2006, 2007, 2008 Peter Palfrader +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +use strict; +use File::Find; +use File::Basename; +use File::stat; +use Time::Local; + + +my $cutofftime; + + +sub wanted() { + return unless -f; + my $mtime = stat($_)->mtime; + return if $mtime >= $cutofftime; + + my (undef,undef,undef,undef,$mon,$year,undef,undef,undef) = gmtime $mtime; + + my $bn = basename $_; + my $dn = dirname $_; + my @path = split /\//, $dn; + $path[0] .= sprintf 's-%4d-%02d', 1900+$year, $mon+1; + $dn = join '/', @path; + + if (! -d $dn) { + my $p = '.'; + for my $component (@path) { + $p .= '/'.$component; + if (! -d $p) { + mkdir $p or die ("Cannot mkdir $p: $!\n"); + }; + }; + }; + + print "$_ -> $dn/$bn\n"; + rename $_, $dn.'/'.$bn or die ("Cannot rename $_ to $dn/$bn: $!\n"); +}; + +my (undef,undef,undef,undef,$mon,$year,undef,undef,undef) = gmtime(time - 5*24*3600); +$cutofftime = timegm(0,0,0,1,$mon,$year); +find( { + wanted => \&wanted, + no_chdir => 1 + }, + 'server-descriptor'); + +find( { + wanted => \&wanted, + no_chdir => 1 + }, + 'extra-info'); diff --git a/contrib/directory-archive/tar-them-up b/contrib/directory-archive/tar-them-up new file mode 100755 index 0000000000..2e0f6ec03f --- /dev/null +++ b/contrib/directory-archive/tar-them-up @@ -0,0 +1,125 @@ +#!/bin/sh + +# Tar up dumped consensuses, statuses, descriptors etc from per-month folders +# into per-month tarballs. + +# Copyright (c) 2006, 2007, 2008 Peter Palfrader +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set -e +set -x +set -u + +usage() { + echo "Usage: $0 " >&2 + echo " $0 last (does last month)" >&2 + exit 1 +} + +if [ -z "${1:-}" ]; then + usage +fi + +if [ "$1" = "last" ]; then + year=`date --date="last month" +'%Y'` + month=`date --date="last month" +'%m'` +elif [ -z "${2:-}" ]; then + usage +else + year="$1" + month="$2" +fi + +if [ "$year" -lt 2000 ] || [ "$year" -gt 2020 ] || + [ "$month" -lt 1 ] || [ "$month" -gt 12 ] || + [ "`echo -n $month | wc -c`" != 2 ]; then + usage +fi + + +this_year=`date --utc +'%Y'` +this_month=`date --utc +'%m'` + +if [ "`date -d $this_year-$this_month-01 +%s`" -le "`date -d $year-$month-01 +%s`" ]; then + echo "Date in the future or current month?" >&2 + exit 1 +fi + + + + + +for file in \ + "extra-infos-$year-$month.tar.bz2" \ + "server-descriptors-$year-$month.tar.bz2" \ + "consensuses-$year-$month.tar.bz2" \ + "statuses-$year-$month.tar.bz2" \ + ; do + if [ -e "$file" ]; then + echo "$file already exists" >&2 + exit 1 + fi +done + +for dir in \ + "extra-infos-$year-$month" \ + "server-descriptors-$year-$month" \ + "consensus/$year/$month" \ + "status/$year/$month" \ + ; do + if ! [ -d "$dir" ]; then + echo "$dir not found" >&2 + exit 1 + fi +done + +for dir in \ + "consensuses-$year-$month" \ + "statuses-$year-$month" \ + ; do + if [ -e "$dir" ]; then + echo "$dir already exists" >&2 + exit 1 + fi +done + +for kind in consensus status; do + mv "$kind"/$year/$month "$kind"es-$year-$month + find "$kind"es-$year-$month -type f -name '*.bz2' -print0 | xargs -0 bunzip2 -v + tar cjvf "$kind"es-$year-$month.tar.bz2 "$kind"es-$year-$month + rm -rf "$kind"es-$year-$month +done + +for kind in extra-infos server-descriptors; do + tar cjvf "$kind"-$year-$month.tar.bz2 "$kind"-$year-$month + rm -rf "$kind"-$year-$month +done + + + +for kind in consensus status; do + t="$kind"es-$year-$month.tar.bz2 + ! [ -e Archive/"$t" ] && mv "$t" Archive +done + +for kind in extra-infos server-descriptors; do + t="$kind"-$year-$month.tar.bz2 + ! [ -e Archive/"$t" ] && mv "$t" Archive +done -- 2.11.4.GIT