From d8169e0f0526c59370a822148a36fadb08da686c Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Mon, 10 Sep 2012 14:02:19 +0200 Subject: [PATCH] Add small tool to run w3c validator on local pages before deployment --- tools/localhtmlvalidate/localhtmlvalidate.py | 92 ++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100755 tools/localhtmlvalidate/localhtmlvalidate.py diff --git a/tools/localhtmlvalidate/localhtmlvalidate.py b/tools/localhtmlvalidate/localhtmlvalidate.py new file mode 100755 index 00000000..a41ad659 --- /dev/null +++ b/tools/localhtmlvalidate/localhtmlvalidate.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# +# localhtmlvalidate.py - validate local HTML for pgweb +# +# This is a small tool that you run to validate the HTML output of your +# localhost pgweb installation against the W3C validator. Give it the +# localhost:8000 URL (or other, depending on what port you're running the +# local server on), and it will give you a list of possible issues with +# the page. +# +# In theory it can be used just fine for non-pgweb pages as well, but +# for obvious reasons the functionality to show the source line number +# based on the pgweb templates won't work. +# + +import sys +import urllib +import httplib +import re +import HTMLParser + +BOUNDARY="-=--=foobar-=--=" + +def encode_multipart_formdata(fields, files): + L = [] + for (key, value) in fields: + L.append('--' + BOUNDARY) + L.append('Content-Disposition: form-data; name="%s"' % key) + L.append('') + L.append(value) + for (key, filename, value) in files: + L.append('--' + BOUNDARY) + L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) + L.append('Content-Type: text/html') + L.append('') + L.append(value) + L.append('--' + BOUNDARY + '--') + L.append('') + body = "\r\n".join(L) + return body + +if __name__=="__main__": + if len(sys.argv) != 2: + print "Usage: localhtmlvalidate.py " + sys.exit(1) + + contents = urllib.urlopen(sys.argv[1]).read() + + # Try to figure out where the actual contents start :) + firstline = contents.splitlines().index('
') + + # Generate a form body + body = encode_multipart_formdata([ + ('charset', 'utf-8'), + ('doctype', 'inline'), + ('group', '0'), + ('verbose', '1'), + ], + [('uploaded_file', 'test.html', contents)]) + + # Now submit it to the w3c validator + h = httplib.HTTP("validator.w3.org") + h.putrequest("POST", "/check") + h.putheader("content-type", "multipart/form-data; boundary=%s" % BOUNDARY) + h.putheader("content-length", str(len(body))) + h.endheaders() + h.send(body) + errcode, errmsg, headers = h.getreply() + rbody = h.getfile().read() + if headers['x-w3c-validator-status'] == 'Valid': + print "Page validates!" + sys.exit(0) + elif headers['x-w3c-validator-status'] == 'Invalid': + print "Invalid!" + print "Errors: %s" % headers['x-w3c-validator-errors'] + print "Warnings: %s" % headers['x-w3c-validator-warnings'] + hp = HTMLParser.HTMLParser() + for m in re.findall('
  • .*?
  • ', rbody, re.DOTALL): + r = re.search('Line (\d+).*(.*?)', m, re.DOTALL) + print "Line %s (should be around %s): %s" % (r.group(1), int(r.group(1)) - firstline, hp.unescape(r.group(2))) + + r2 = re.search('(.*?)(.*?)(.*?)', unicode(m, 'utf8'), re.DOTALL) + if r2: + s = u"%s%s%s" % r2.groups() + print "Source: %s" % hp.unescape(s).encode('utf-8') + print "" + else: + print "Unknown status: %s" % headers['x-w3c-validator-status'] + print headers + sys.exit(1) + +