"
sys.exit(1)
contents = urllib.urlopen(sys.argv[1]).read()
# Try to figure out where the actual contents start :)
try:
firstline = contents.splitlines().index('')
except ValueError:
firstline = 0
# Generate a form body
body = encode_multipart_formdata([
('charset', 'utf-8'),
('doctype', 'inline'),
('group', '0'),
('verbose', '1'),
],
[('uploaded_file', 'test.html', contents)])
# Now submit it to the w3c validator
h = httplib.HTTP("validator.w3.org")
h.putrequest("POST", "/check")
h.putheader("User-Agent: localcheck-tester/0.0")
h.putheader("content-type", "multipart/form-data; boundary=%s" % BOUNDARY)
h.putheader("content-length", str(len(body)))
h.endheaders()
h.send(body)
errcode, errmsg, headers = h.getreply()
rbody = h.getfile().read()
if headers['x-w3c-validator-status'] == 'Valid':
print "Page validates!"
sys.exit(0)
elif headers['x-w3c-validator-status'] == 'Invalid':
print "Invalid!"
print "Errors: %s" % headers['x-w3c-validator-errors']
print "Warnings: %s" % headers['x-w3c-validator-warnings']
hp = HTMLParser.HTMLParser()
for m in re.findall('
.*?', rbody, re.DOTALL):
r = re.search('Line (\d+).*(.*?)', m, re.DOTALL)
print "Line %s (should be around %s): %s" % (r.group(1), int(r.group(1)) - firstline, hp.unescape(r.group(2)))
r2 = re.search('(.*?)(.*?)(.*?)
', unicode(m, 'utf8'), re.DOTALL)
if r2:
s = u"%s%s%s" % r2.groups()
print "Source: %s" % hp.unescape(s).encode('utf-8')
print ""
else:
print "Unknown status: %s" % headers['x-w3c-validator-status']
print headers
sys.exit(1)