mirror of
https://github.com/postgres/pgweb.git
synced 2025-08-10 00:42:06 +00:00
Update docsload to use tidylib instead of tidy
The old library is not supported on py3. This is the same migration we've done for other projects like pgarchives.
This commit is contained in:
@ -6,7 +6,7 @@ import sys
|
||||
import os
|
||||
import tarfile
|
||||
import re
|
||||
import tidy
|
||||
import tidylib
|
||||
from optparse import OptionParser
|
||||
from configparser import ConfigParser
|
||||
|
||||
@ -34,7 +34,8 @@ def load_doc_file(filename, f):
|
||||
# Postgres 10 started using xml toolchain and now produces docmentation in utf8. So we need
|
||||
# to figure out which version it is.
|
||||
rawcontents = f.read()
|
||||
if rawcontents.startswith('<?xml version="1.0" encoding="UTF-8"'):
|
||||
rawfirst = rawcontents[:50].decode('utf8', errors='ignore')
|
||||
if rawfirst.startswith('<?xml version="1.0" encoding="UTF-8"'):
|
||||
# Version 10, use utf8
|
||||
encoding = 'utf-8'
|
||||
# XML builds also don't need clean=1, and that one adds some interesting CSS properties
|
||||
@ -58,12 +59,13 @@ def load_doc_file(filename, f):
|
||||
if not quiet:
|
||||
print("--- file: %s (%s) ---" % (filename, title))
|
||||
|
||||
s = tidy.parseString(contents.encode('utf-8'), **tidyopts)
|
||||
(html, errors) = tidylib.tidy_document(contents, options=tidyopts)
|
||||
|
||||
curs.execute("INSERT INTO docs (file, version, title, content) VALUES (%(f)s, %(v)s, %(t)s, %(c)s)", {
|
||||
'f': filename,
|
||||
'v': ver,
|
||||
't': title,
|
||||
'c': str(s),
|
||||
'c': html,
|
||||
})
|
||||
global pagecount
|
||||
pagecount += 1
|
||||
|
Reference in New Issue
Block a user