Files
postgres-web/tools/localhtmlvalidate/localhtmlvalidate.py
2019-01-26 16:19:26 +01:00

99 lines
3.3 KiB
Python
Executable File

#!/usr/bin/env python3
#
# localhtmlvalidate.py - validate local HTML for pgweb
#
# This is a small tool that you run to validate the HTML output of your
# localhost pgweb installation against the W3C validator. Give it the
# localhost:8000 URL (or other, depending on what port you're running the
# local server on), and it will give you a list of possible issues with
# the page.
#
# In theory it can be used just fine for non-pgweb pages as well, but
# for obvious reasons the functionality to show the source line number
# based on the pgweb templates won't work.
#
import sys
import urllib
import httplib
import re
import HTMLParser
BOUNDARY = "-=--=foobar-=--="
def encode_multipart_formdata(fields, files):
L = []
for (key, value) in fields:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"' % key)
L.append('')
L.append(value)
for (key, filename, value) in files:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
L.append('Content-Type: text/html')
L.append('')
L.append(value)
L.append('--' + BOUNDARY + '--')
L.append('')
body = "\r\n".join(L)
return body
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: localhtmlvalidate.py <local url>")
sys.exit(1)
contents = urllib.urlopen(sys.argv[1]).read()
# Try to figure out where the actual contents start :)
try:
firstline = contents.splitlines().index('<div id="pgContentWrap">')
except ValueError:
firstline = 0
# Generate a form body
body = encode_multipart_formdata(
[
('charset', 'utf-8'),
('doctype', 'inline'),
('group', '0'),
('verbose', '1'),
],
[('uploaded_file', 'test.html', contents)]
)
# Now submit it to the w3c validator
h = httplib.HTTP("validator.w3.org")
h.putrequest("POST", "/check")
h.putheader("User-Agent: localcheck-tester/0.0")
h.putheader("content-type", "multipart/form-data; boundary=%s" % BOUNDARY)
h.putheader("content-length", str(len(body)))
h.endheaders()
h.send(body)
errcode, errmsg, headers = h.getreply()
rbody = h.getfile().read()
if headers['x-w3c-validator-status'] == 'Valid':
print("Page validates!")
sys.exit(0)
elif headers['x-w3c-validator-status'] == 'Invalid':
print("Invalid!")
print("Errors: %s" % headers['x-w3c-validator-errors'])
print("Warnings: %s" % headers['x-w3c-validator-warnings'])
hp = HTMLParser.HTMLParser()
for m in re.findall('<li class="msg_err">.*?</li>', rbody, re.DOTALL):
r = re.search('<em>Line (\d+).*<span class="msg">(.*?)</span>', m, re.DOTALL)
print("Line %s (should be around %s): %s" % (r.group(1), int(r.group(1)) - firstline, hp.unescape(r.group(2))))
r2 = re.search('<code class="input">(.*?)<strong title=".*?">(.*?)</strong>(.*?)</code>', unicode(m, 'utf8'), re.DOTALL)
if r2:
s = "%s%s%s" % r2.groups()
print("Source: %s" % hp.unescape(s).encode('utf-8'))
print("")
else:
print("Unknown status: %s" % headers['x-w3c-validator-status'])
print(headers)
sys.exit(1)