3 """GUI interface to webchecker.
5 This works as a Grail applet too! E.g.
7 <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET>
9 Checkpoints are not (yet??? ever???) supported.
13 Enter a root to check in the text entry box. To enter more than one root,
14 enter them one at a time and press <Return> for each one.
16 Command buttons Start, Stop and "Check one" govern the checking process in
17 the obvious way. Start and "Check one" also enter the root from the text
18 entry box if one is present. There's also a check box (enabled by default)
19 to decide whether actually to follow external links (since this can slow
20 the checking down considerably). Finally there's a Quit button.
22 A series of checkbuttons determines whether the corresponding output panel
23 is shown. List panels are also automatically shown or hidden when their
24 status changes between empty to non-empty. There are six panels:
26 Log -- raw output from the checker (-v, -q affect this)
27 To check -- links discovered but not yet checked
28 Checked -- links that have been checked
29 Bad links -- links that failed upon checking
30 Errors -- pages containing at least one bad link
31 Details -- details about one URL; double click on a URL in any of
32 the above list panels (not in Log) will show details
35 Use your window manager's Close command to quit.
39 -m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d)
40 -q -- quiet operation (also suppresses external links report)
41 -v -- verbose operation; repeating -v will increase verbosity
42 -t root -- specify root dir which should be treated as internal (can repeat)
43 -a -- don't check name anchors
45 Command line arguments:
47 rooturl -- URL to start checking
50 XXX The command line options (-m, -q, -v) should be GUI accessible.
52 XXX The roots should be visible as a list (?).
54 XXX The multipanel user interface is clumsy.
67 # Override some for a weaker platform
68 if sys
.platform
== 'mac':
69 webchecker
.DEFROOT
= "http://grail.cnri.reston.va.us/"
70 webchecker
.MAXPAGE
= 50000
71 webchecker
.verbose
= 4
75 opts
, args
= getopt
.getopt(sys
.argv
[1:], 't:m:qva')
76 except getopt
.error
, msg
:
77 sys
.stdout
= sys
.stderr
79 print __doc__
%vars(webchecker
)
81 webchecker
.verbose
= webchecker
.VERBOSE
82 webchecker
.nonames
= webchecker
.NONAMES
83 webchecker
.maxpage
= webchecker
.MAXPAGE
87 webchecker
.maxpage
= int(a
)
89 webchecker
.verbose
= 0
91 webchecker
.verbose
= webchecker
.verbose
+ 1
95 webchecker
.nonames
= not webchecker
.nonames
96 root
= Tk(className
='Webchecker')
97 root
.protocol("WM_DELETE_WINDOW", root
.quit
)
98 c
= CheckerWindow(root
)
99 c
.setflags(verbose
=webchecker
.verbose
, maxpage
=webchecker
.maxpage
,
100 nonames
=webchecker
.nonames
)
102 for arg
in args
[:-1]:
104 c
.suggestroot(args
[-1])
105 # Usually conditioned on whether external links
106 # will be checked, but since that's not a command
107 # line option, just toss them in.
108 for url_root
in extra_roots
:
109 # Make sure it's terminated by a slash,
110 # so that addroot doesn't discard the last
111 # directory component.
112 if url_root
[-1] != "/":
113 url_root
= url_root
+ "/"
114 c
.addroot(url_root
, add_to_do
= 0)
118 class CheckerWindow(webchecker
.Checker
):
120 def __init__(self
, parent
, root
=webchecker
.DEFROOT
):
121 self
.__parent
= parent
123 self
.__topcontrols
= Frame(parent
)
124 self
.__topcontrols
.pack(side
=TOP
, fill
=X
)
125 self
.__label
= Label(self
.__topcontrols
, text
="Root URL:")
126 self
.__label
.pack(side
=LEFT
)
127 self
.__rootentry
= Entry(self
.__topcontrols
, width
=60)
128 self
.__rootentry
.pack(side
=LEFT
)
129 self
.__rootentry
.bind('<Return>', self
.enterroot
)
130 self
.__rootentry
.focus_set()
132 self
.__controls
= Frame(parent
)
133 self
.__controls
.pack(side
=TOP
, fill
=X
)
135 self
.__start
= Button(self
.__controls
, text
="Run", command
=self
.start
)
136 self
.__start
.pack(side
=LEFT
)
137 self
.__stop
= Button(self
.__controls
, text
="Stop", command
=self
.stop
,
139 self
.__stop
.pack(side
=LEFT
)
140 self
.__step
= Button(self
.__controls
, text
="Check one",
142 self
.__step
.pack(side
=LEFT
)
143 self
.__cv
= BooleanVar(parent
)
144 self
.__cv
.set(self
.checkext
)
145 self
.__checkext
= Checkbutton(self
.__controls
, variable
=self
.__cv
,
146 command
=self
.update_checkext
,
147 text
="Check nonlocal links",)
148 self
.__checkext
.pack(side
=LEFT
)
149 self
.__reset
= Button(self
.__controls
, text
="Start over", command
=self
.reset
)
150 self
.__reset
.pack(side
=LEFT
)
151 if __name__
== '__main__': # No Quit button under Grail!
152 self
.__quit
= Button(self
.__controls
, text
="Quit",
153 command
=self
.__parent
.quit
)
154 self
.__quit
.pack(side
=RIGHT
)
156 self
.__status
= Label(parent
, text
="Status: initial", anchor
=W
)
157 self
.__status
.pack(side
=TOP
, fill
=X
)
158 self
.__checking
= Label(parent
, text
="Idle", anchor
=W
)
159 self
.__checking
.pack(side
=TOP
, fill
=X
)
160 self
.__mp
= mp
= MultiPanel(parent
)
161 sys
.stdout
= self
.__log
= LogPanel(mp
, "Log")
162 self
.__todo
= ListPanel(mp
, "To check", self
, self
.showinfo
)
163 self
.__done
= ListPanel(mp
, "Checked", self
, self
.showinfo
)
164 self
.__bad
= ListPanel(mp
, "Bad links", self
, self
.showinfo
)
165 self
.__errors
= ListPanel(mp
, "Pages w/ bad links", self
, self
.showinfo
)
166 self
.__details
= LogPanel(mp
, "Details")
167 self
.root_seed
= None
168 webchecker
.Checker
.__init
__(self
)
170 root
= str(root
).strip()
172 self
.suggestroot(root
)
176 webchecker
.Checker
.reset(self
)
177 for p
in self
.__todo
, self
.__done
, self
.__bad
, self
.__errors
:
180 self
.suggestroot(self
.root_seed
)
182 def suggestroot(self
, root
):
183 self
.__rootentry
.delete(0, END
)
184 self
.__rootentry
.insert(END
, root
)
185 self
.__rootentry
.select_range(0, END
)
186 self
.root_seed
= root
188 def enterroot(self
, event
=None):
189 root
= self
.__rootentry
.get()
192 self
.__checking
.config(text
="Adding root "+root
)
193 self
.__checking
.update_idletasks()
195 self
.__checking
.config(text
="Idle")
197 i
= self
.__todo
.items
.index(root
)
198 except (ValueError, IndexError):
201 self
.__todo
.list.select_clear(0, END
)
202 self
.__todo
.list.select_set(i
)
203 self
.__todo
.list.yview(i
)
204 self
.__rootentry
.delete(0, END
)
207 self
.__start
.config(state
=DISABLED
, relief
=SUNKEN
)
208 self
.__stop
.config(state
=NORMAL
)
209 self
.__step
.config(state
=DISABLED
)
215 self
.__stop
.config(state
=DISABLED
, relief
=SUNKEN
)
219 self
.__start
.config(state
=DISABLED
)
220 self
.__step
.config(state
=DISABLED
, relief
=SUNKEN
)
227 self
.__parent
.after_idle(self
.dosomething
)
229 self
.__checking
.config(text
="Idle")
230 self
.__start
.config(state
=NORMAL
, relief
=RAISED
)
231 self
.__stop
.config(state
=DISABLED
, relief
=RAISED
)
232 self
.__step
.config(state
=NORMAL
, relief
=RAISED
)
236 def dosomething(self
):
237 if self
.__busy
: return
240 l
= self
.__todo
.selectedindices()
245 self
.__todo
.list.select_set(i
)
246 self
.__todo
.list.yview(i
)
247 url
= self
.__todo
.items
[i
]
248 self
.__checking
.config(text
="Checking "+self
.format_url(url
))
249 self
.__parent
.update()
256 def showinfo(self
, url
):
259 d
.put("URL: %s\n" % self
.format_url(url
))
260 if self
.bad
.has_key(url
):
261 d
.put("Error: %s\n" % str(self
.bad
[url
]))
262 if url
in self
.roots
:
263 d
.put("Note: This is a root URL\n")
264 if self
.done
.has_key(url
):
265 d
.put("Status: checked\n")
267 elif self
.todo
.has_key(url
):
268 d
.put("Status: to check\n")
271 d
.put("Status: unknown (!)\n")
273 if (not url
[1]) and self
.errors
.has_key(url
[0]):
274 d
.put("Bad links from this page:\n")
275 for triple
in self
.errors
[url
[0]]:
276 link
, rawlink
, msg
= triple
277 d
.put(" HREF %s" % self
.format_url(link
))
278 if self
.format_url(link
) != rawlink
: d
.put(" (%s)" %rawlink
)
280 d
.put(" error %s\n" % str(msg
))
281 self
.__mp
.showpanel("Details")
282 for source
, rawlink
in o
:
283 d
.put("Origin: %s" % source
)
284 if rawlink
!= self
.format_url(url
):
285 d
.put(" (%s)" % rawlink
)
289 def setbad(self
, url
, msg
):
290 webchecker
.Checker
.setbad(self
, url
, msg
)
291 self
.__bad
.insert(url
)
294 def setgood(self
, url
):
295 webchecker
.Checker
.setgood(self
, url
)
296 self
.__bad
.remove(url
)
299 def newlink(self
, url
, origin
):
300 webchecker
.Checker
.newlink(self
, url
, origin
)
301 if self
.done
.has_key(url
):
302 self
.__done
.insert(url
)
303 elif self
.todo
.has_key(url
):
304 self
.__todo
.insert(url
)
307 def markdone(self
, url
):
308 webchecker
.Checker
.markdone(self
, url
)
309 self
.__done
.insert(url
)
310 self
.__todo
.remove(url
)
313 def seterror(self
, url
, triple
):
314 webchecker
.Checker
.seterror(self
, url
, triple
)
315 self
.__errors
.insert((url
, ''))
319 self
.__status
.config(text
="Status: "+self
.status())
320 self
.__parent
.update()
322 def update_checkext(self
):
323 self
.checkext
= self
.__cv
.get()
328 def __init__(self
, mp
, name
, checker
, showinfo
=None):
331 self
.showinfo
= showinfo
332 self
.checker
= checker
333 self
.panel
= mp
.addpanel(name
)
334 self
.list, self
.frame
= tktools
.make_list_box(
335 self
.panel
, width
=60, height
=5)
336 self
.list.config(exportselection
=0)
338 self
.list.bind('<Double-Button-1>', self
.doubleclick
)
343 self
.list.delete(0, END
)
344 self
.mp
.hidepanel(self
.name
)
346 def doubleclick(self
, event
):
347 l
= self
.selectedindices()
349 self
.showinfo(self
.items
[l
[0]])
351 def selectedindices(self
):
352 l
= self
.list.curselection()
356 def insert(self
, url
):
357 if url
not in self
.items
:
359 self
.mp
.showpanel(self
.name
)
360 # (I tried sorting alphabetically, but the display is too jumpy)
362 self
.list.insert(i
, self
.checker
.format_url(url
))
364 self
.items
.insert(i
, url
)
366 def remove(self
, url
):
368 i
= self
.items
.index(url
)
369 except (ValueError, IndexError):
372 was_selected
= i
in self
.selectedindices()
376 self
.mp
.hidepanel(self
.name
)
378 if i
>= len(self
.items
):
379 i
= len(self
.items
) - 1
380 self
.list.select_set(i
)
385 def __init__(self
, mp
, name
):
388 self
.panel
= mp
.addpanel(name
)
389 self
.text
, self
.frame
= tktools
.make_text_box(self
.panel
, height
=10)
390 self
.text
.config(wrap
=NONE
)
393 self
.text
.delete("1.0", END
)
394 self
.text
.yview("1.0")
397 self
.text
.insert(END
, s
)
402 self
.text
.insert(END
, s
)
410 def __init__(self
, parent
):
412 self
.frame
= Frame(self
.parent
)
413 self
.frame
.pack(expand
=1, fill
=BOTH
)
414 self
.topframe
= Frame(self
.frame
, borderwidth
=2, relief
=RAISED
)
415 self
.topframe
.pack(fill
=X
)
416 self
.botframe
= Frame(self
.frame
)
417 self
.botframe
.pack(expand
=1, fill
=BOTH
)
421 def addpanel(self
, name
, on
=0):
422 v
= StringVar(self
.parent
)
427 check
= Checkbutton(self
.topframe
, text
=name
,
428 offvalue
="", onvalue
=name
, variable
=v
,
429 command
=self
.checkpanel
)
430 check
.pack(side
=LEFT
)
431 panel
= Frame(self
.botframe
)
432 label
= Label(panel
, text
=name
, borderwidth
=2, relief
=RAISED
, anchor
=W
)
433 label
.pack(side
=TOP
, fill
=X
)
435 self
.panelnames
.append(name
)
436 self
.panels
[name
] = t
438 panel
.pack(expand
=1, fill
=BOTH
)
441 def showpanel(self
, name
):
442 v
, check
, panel
= self
.panels
[name
]
444 panel
.pack(expand
=1, fill
=BOTH
)
446 def hidepanel(self
, name
):
447 v
, check
, panel
= self
.panels
[name
]
451 def checkpanel(self
):
452 for name
in self
.panelnames
:
453 v
, check
, panel
= self
.panels
[name
]
455 for name
in self
.panelnames
:
456 v
, check
, panel
= self
.panels
[name
]
458 panel
.pack(expand
=1, fill
=BOTH
)
461 if __name__
== '__main__':