beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / luasocket / etc / check-links.lua
blobd2e4266e809943c812740f180ccdcbdda9083cec
1 -----------------------------------------------------------------------------
2 -- Little program that checks links in HTML files, using coroutines and
3 -- non-blocking I/O via the dispatcher module.
4 -- LuaSocket sample files
5 -- Author: Diego Nehab
6 -----------------------------------------------------------------------------
7 local url = require("socket.url")
8 local dispatch = require("dispatch")
9 local http = require("socket.http")
10 dispatch.TIMEOUT = 10
12 -- make sure the user knows how to invoke us
13 arg = arg or {}
14 if table.getn(arg) < 1 then
15 print("Usage:\n luasocket check-links.lua [-n] {<url>}")
16 exit()
17 end
19 -- '-n' means we are running in non-blocking mode
20 if arg[1] == "-n" then
21 -- if non-blocking I/O was requested, use real dispatcher interface
22 table.remove(arg, 1)
23 handler = dispatch.newhandler("coroutine")
24 else
25 -- if using blocking I/O, use fake dispatcher interface
26 handler = dispatch.newhandler("sequential")
27 end
29 local nthreads = 0
31 -- get the status of a URL using the dispatcher
32 function getstatus(link)
33 local parsed = url.parse(link, {scheme = "file"})
34 if parsed.scheme == "http" then
35 nthreads = nthreads + 1
36 handler:start(function()
37 local r, c, h, s = http.request{
38 method = "HEAD",
39 url = link,
40 create = handler.tcp
42 if r and c == 200 then io.write('\t', link, '\n')
43 else io.write('\t', link, ': ', tostring(c), '\n') end
44 nthreads = nthreads - 1
45 end)
46 end
47 end
49 function readfile(path)
50 path = url.unescape(path)
51 local file, error = io.open(path, "r")
52 if file then
53 local body = file:read("*a")
54 file:close()
55 return body
56 else return nil, error end
57 end
59 function load(u)
60 local parsed = url.parse(u, { scheme = "file" })
61 local body, headers, code, error
62 local base = u
63 if parsed.scheme == "http" then
64 body, code, headers = http.request(u)
65 if code == 200 then
66 -- if there was a redirect, update base to reflect it
67 base = headers.location or base
68 end
69 if not body then
70 error = code
71 end
72 elseif parsed.scheme == "file" then
73 body, error = readfile(parsed.path)
74 else error = string.format("unhandled scheme '%s'", parsed.scheme) end
75 return base, body, error
76 end
78 function getlinks(body, base)
79 -- get rid of comments
80 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
81 local links = {}
82 -- extract links
83 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
84 table.insert(links, url.absolute(base, href))
85 end)
86 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
87 table.insert(links, url.absolute(base, href))
88 end)
89 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
90 table.insert(links, url.absolute(base, href))
91 end)
92 return links
93 end
95 function checklinks(address)
96 local base, body, error = load(address)
97 if not body then print(error) return end
98 print("Checking ", base)
99 local links = getlinks(body, base)
100 for _, link in ipairs(links) do
101 getstatus(link)
105 for _, address in ipairs(arg) do
106 checklinks(url.absolute("file:", address))
109 while nthreads > 0 do
110 handler:step()