Released as 20240522 ('Tbilisi')
[parallel.git] / src / testurls
blob8fa97449383cdb8d950f81a2007a1de62a4a29cf
1 #!/bin/bash
3 # Copyright (C) 2022-2024 Ole Tange, http://ole.tange.dk and Free
4 # Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <https://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
21 # SPDX-FileCopyrightText: 2022-2024 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc.
22 # SPDX-License-Identifier: GPL-3.0-or-later
24 grep -hv '(dead)' ../src/* |
25 # Merge lines ending in .
26 perl -pe 's{(http\S+\.)\s}{$1}s' |
27 # Grep out URLs
28 grep -h -Po 'https?://[^ $$<>")}]+' |
29 # Remove anchor
30 perl -pe 's/#.*//' |
31 perl -pe 's/(&gt|\{).*//;s/\\-/-/g;s/\\n//g;s/&amp;/&/g;s/&#.*//;s/\s*//;'"s/'.*//;" |
32 # Remove spacing
33 perl -pe 's/\s*$/\n/' |
34 perl -pe 's/&quot;$//g; s/&$//g; s/\\//g;' |
35 grep -Ev 'parallel-(20)?$$|coolwebsite.biz' |
36 sort -u |
37 egrep -v 'example.com|##|\*\(' |
38 parallel -j0 --timeout 10 --bar --tag --joblog joblog --retries 3 neno wget -l1 -Q1 -U "'"'"'"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"'"'"'" '{=$_=Q($_)=}'