2 # extractor <prefix> <reportfile> <source1> <source2> ...
3 # must be run in directory of sources
4 # will process each source according to its configuration file
5 # and save new clean text (if available) in same directory
6 # with <prefix> as start of name and <reportfile> for reporting
7 # time of earliest next polling (for job planning) and other information
10 .
$SCRIPTDIR/constants.sh
11 tmpf1
=$TMPDIR/extractor1$$.tmp
12 tmpf2
=$TMPDIR/extractor2$$.tmp
14 minpoll
=${MAXPOLLING:-999999}
17 # only one character for prefix
18 prefix
=`echo $1 | sed -e 's/\(.\).*/\1/'`
21 suffix
=${SOURCEFILESUFFIX:-.txt}
23 logf
=`logfile extractor-$prefix`
24 logit
$logf extractor working
at $starttime:
29 # ignore directories without readable configuration file
30 if test -r $SOURCECONFIG
32 # read status values or set default ones
33 if test -r $SOURCESTATUS
35 index
=`configread $SOURCESTATUS INDEX`
36 lasthash
=`configread $SOURCESTATUS LASTHASH`
37 lastpoll
=`configread $SOURCESTATUS LASTPOLL`
39 # for index, use source configuration or global start value
40 minindex
=`configread $SOURCECONFIG MININDEX`
41 minindex
=${minindex:-$MININDEX}
42 maxindex
=`configread $SOURCECONFIG MAXINDEX`
43 maxindex
=${maxindex:-$MAXINDEX}
44 index
=${index:-$minindex}
45 lasthash
=${lasthash:-nil}
46 lastpoll
=${lastpoll:-0}
48 # get polling interval, set to maximum if unset
49 polling
=`configread $SOURCECONFIG POLLING` || polling
=$MAXPOLLING
50 #echo : lastpoll $lastpoll : pollinterval $polling : now $now
51 # calculate time of next polling
52 polling
=`expr $lastpoll + $polling - $now`
54 if test $polling -le 0
56 logit
$logf $source needs polling
57 # source descriptor (url, file, etc)
58 desc
=`configread $SOURCECONFIG SOURCE` || desc
=file:///dev
/null
59 prio
=`configread $SOURCECONFIG PRIORITY` || prio
=$DEFAULTPRIORITY
60 recipe
=`configread $SOURCECONFIG RECIPE`
61 if test ! -x "$recipe"
63 logit
$logf $source ignoring non-executable recipe
$recipe
66 if $SCRIPTDIR/fetcher
"$desc" > $tmpf1
68 $recipe <$tmpf1 >$tmpf2
69 newhash
=`hashfunction $tmpf2`
70 if test $lasthash != $newhash
72 duration
=`sed -e '1,/^---/d' $tmpf2 | $SCRIPTDIR/morse/sniptime $SPEED`
73 stname
="$prefix$prio$index$suffix"
84 index
=`expr $index + 1`
85 if test $index -gt $maxindex
88 logit
$logf $source fetched with new
hash $newhash
92 logit
$logf $source has same
hash $newhash
95 logit
$logf $source fetching or filtering failed
97 # re-read polling interval
98 polling
=`configread $SOURCECONFIG POLLING` || polling
=$MAXPOLLING
100 # calculate future minimal polling time
101 if test $minpoll -gt $polling
102 then minpoll
=$polling
104 # save current status
105 cat <<EOT >$SOURCESTATUS
110 else logit
$logf $source no configfile found
, ignoring
112 # go back to channel directory
116 # save closest next polling time for controller
117 echo NEXTPOLL
`expr $starttime + $minpoll` > $reportfile