#!/usr/bin/env python # # localhtmlvalidate.py - validate local HTML for pgweb # # This is a small tool that you run to validate the HTML output of your # localhost pgweb installation against the W3C validator. Give it the # localhost:8000 URL (or other, depending on what port you're running the # local server on), and it will give you a list of possible issues with # the page. # # In theory it can be used just fine for non-pgweb pages as well, but # for obvious reasons the functionality to show the source line number # based on the pgweb templates won't work. # import sys import urllib import httplib import re import HTMLParser BOUNDARY="-=--=foobar-=--=" def encode_multipart_formdata(fields, files): L = [] for (key, value) in fields: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"' % key) L.append('') L.append(value) for (key, filename, value) in files: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) L.append('Content-Type: text/html') L.append('') L.append(value) L.append('--' + BOUNDARY + '--') L.append('') body = "\r\n".join(L) return body if __name__=="__main__": if len(sys.argv) != 2: print "Usage: localhtmlvalidate.py " sys.exit(1) contents = urllib.urlopen(sys.argv[1]).read() # Try to figure out where the actual contents start :) try: firstline = contents.splitlines().index('
') except ValueError: firstline = 0 # Generate a form body body = encode_multipart_formdata([ ('charset', 'utf-8'), ('doctype', 'inline'), ('group', '0'), ('verbose', '1'), ], [('uploaded_file', 'test.html', contents)]) # Now submit it to the w3c validator h = httplib.HTTP("validator.w3.org") h.putrequest("POST", "/check") h.putheader("User-Agent: localcheck-tester/0.0") h.putheader("content-type", "multipart/form-data; boundary=%s" % BOUNDARY) h.putheader("content-length", str(len(body))) h.endheaders() h.send(body) errcode, errmsg, headers = h.getreply() rbody = h.getfile().read() if headers['x-w3c-validator-status'] == 'Valid': print "Page validates!" sys.exit(0) elif headers['x-w3c-validator-status'] == 'Invalid': print "Invalid!" print "Errors: %s" % headers['x-w3c-validator-errors'] print "Warnings: %s" % headers['x-w3c-validator-warnings'] hp = HTMLParser.HTMLParser() for m in re.findall('
  • .*?
  • ', rbody, re.DOTALL): r = re.search('Line (\d+).*(.*?)', m, re.DOTALL) print "Line %s (should be around %s): %s" % (r.group(1), int(r.group(1)) - firstline, hp.unescape(r.group(2))) r2 = re.search('(.*?)(.*?)(.*?)', unicode(m, 'utf8'), re.DOTALL) if r2: s = u"%s%s%s" % r2.groups() print "Source: %s" % hp.unescape(s).encode('utf-8') print "" else: print "Unknown status: %s" % headers['x-w3c-validator-status'] print headers sys.exit(1)