1 -----------------------------------------------------------------------------
2 -- Little program that checks links in HTML files, using coroutines and
3 -- non-blocking I/O via the dispatcher module.
4 -- LuaSocket sample files
6 -----------------------------------------------------------------------------
7 local url
= require("socket.url")
8 local dispatch
= require("dispatch")
9 local http
= require("socket.http")
12 -- make sure the user knows how to invoke us
14 if table.getn(arg
) < 1 then
15 print("Usage:\n luasocket check-links.lua [-n] {<url>}")
19 -- '-n' means we are running in non-blocking mode
20 if arg
[1] == "-n" then
21 -- if non-blocking I/O was requested, use real dispatcher interface
23 handler
= dispatch
.newhandler("coroutine")
25 -- if using blocking I/O, use fake dispatcher interface
26 handler
= dispatch
.newhandler("sequential")
31 -- get the status of a URL using the dispatcher
32 function getstatus(link
)
33 local parsed
= url
.parse(link
, {scheme
= "file"})
34 if parsed
.scheme
== "http" then
35 nthreads
= nthreads
+ 1
36 handler
:start(function()
37 local r
, c
, h
, s
= http
.request
{
42 if r
and c
== 200 then io
.write('\t', link
, '\n')
43 else io
.write('\t', link
, ': ', tostring(c
), '\n') end
44 nthreads
= nthreads
- 1
49 function readfile(path
)
50 path
= url
.unescape(path
)
51 local file
, error = io
.open(path
, "r")
53 local body
= file
:read("*a")
56 else return nil, error end
60 local parsed
= url
.parse(u
, { scheme
= "file" })
61 local body
, headers
, code
, error
63 if parsed
.scheme
== "http" then
64 body
, code
, headers
= http
.request(u
)
66 -- if there was a redirect, update base to reflect it
67 base
= headers
.location
or base
72 elseif parsed
.scheme
== "file" then
73 body
, error = readfile(parsed
.path
)
74 else error = string.format("unhandled scheme '%s'", parsed
.scheme
) end
75 return base
, body
, error
78 function getlinks(body
, base
)
79 -- get rid of comments
80 body
= string.gsub(body
, "%<%!%-%-.-%-%-%>", "")
83 body
= string.gsub(body
, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href
)
84 table.insert(links
, url
.absolute(base
, href
))
86 body
= string.gsub(body
, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href
)
87 table.insert(links
, url
.absolute(base
, href
))
89 string.gsub(body
, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href
)
90 table.insert(links
, url
.absolute(base
, href
))
95 function checklinks(address
)
96 local base
, body
, error = load(address
)
97 if not body
then print(error) return end
98 print("Checking ", base
)
99 local links
= getlinks(body
, base
)
100 for _
, link
in ipairs(links
) do
105 for _
, address
in ipairs(arg
) do
106 checklinks(url
.absolute("file:", address
))
109 while nthreads
> 0 do