From f3186e82427cc8d7afc2aaaed596a40ca70c19a9 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Wed, 4 Apr 2001 17:47:25 +0000 Subject: [PATCH] A number of improvements based on a discussion with Chris McCafferty : Add javascript: and telnet: to the types of URLs we ignore. Add support for several additional URL-valued attributes on the BODY, FRAME, IFRAME, LINK, OBJECT, and SCRIPT elements. --- Tools/webchecker/webchecker.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index e79e7f10c09..b369ab809ce 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -481,8 +481,9 @@ class Checker: if self.name_table.has_key(url): return self.name_table[url] - if url[:7] == 'mailto:' or url[:5] == 'news:': - self.note(1, " Not checking mailto/news URL") + scheme = urllib.splittype(url) + if scheme in ('mailto', 'news', 'javascript', 'telnet'): + self.note(1, " Not checking %s URL" % scheme) return None isint = self.inroots(url) @@ -792,10 +793,31 @@ class MyHTMLParser(sgmllib.SGMLParser): def do_area(self, attributes): self.link_attr(attributes, 'href') + def do_body(self, attributes): + self.link_attr(attributes, 'background') + def do_img(self, attributes): self.link_attr(attributes, 'src', 'lowsrc') def do_frame(self, attributes): + self.link_attr(attributes, 'src', 'longdesc') + + def do_iframe(self, attributes): + self.link_attr(attributes, 'src', 'longdesc') + + def do_link(self, attributes): + for name, value in attributes: + if name == "rel": + parts = string.split(string.lower(value)) + if ( parts == ["stylesheet"] + or parts == ["alternate", "stylesheet"]): + self.link_attr(attributes, "href") + break + + def do_object(self, attributes): + self.link_attr(attributes, 'data', 'usemap') + + def do_script(self, attributes): self.link_attr(attributes, 'src') def link_attr(self, attributes, *args):