mirror of
https://github.com/postgres/pgweb.git
synced 2025-08-03 15:38:59 +00:00
Add a lot more code documentation to the documentation loader.
This commit is contained in:
@ -14,14 +14,19 @@ from configparser import ConfigParser
|
|||||||
|
|
||||||
import psycopg2
|
import psycopg2
|
||||||
|
|
||||||
|
# a counter that keeps track of the total number of pages (HTML, SVG) that are loaded
|
||||||
|
# into the database
|
||||||
pagecount = 0
|
pagecount = 0
|
||||||
|
# if set to "True" -- mutes any output from the script. Controlled by an option
|
||||||
quiet = False
|
quiet = False
|
||||||
|
# regular expression used to search and extract the title on a given piece of
|
||||||
|
# documentation, for further use in the application
|
||||||
re_titlematch = re.compile('<title\s*>([^<]+)</title\s*>', re.IGNORECASE)
|
re_titlematch = re.compile('<title\s*>([^<]+)</title\s*>', re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
# Load a single page
|
# Load a single page
|
||||||
def load_doc_file(filename, f, c):
|
def load_doc_file(filename, f, c):
|
||||||
|
"""Prepares and loads a HTML file for import into the documentation database"""
|
||||||
tidyopts = dict(
|
tidyopts = dict(
|
||||||
drop_proprietary_attributes=1,
|
drop_proprietary_attributes=1,
|
||||||
alt_text='',
|
alt_text='',
|
||||||
@ -51,18 +56,26 @@ def load_doc_file(filename, f, c):
|
|||||||
if float(ver) < 11 and float(ver) > 0:
|
if float(ver) < 11 and float(ver) > 0:
|
||||||
tidyopts['indent'] = 'no'
|
tidyopts['indent'] = 'no'
|
||||||
|
|
||||||
|
# convert the raw contents to the appropriate encoding for the content that will
|
||||||
|
# be stored in the database
|
||||||
contents = str(rawcontents, encoding)
|
contents = str(rawcontents, encoding)
|
||||||
|
|
||||||
|
# extract the title of the page, which is rendered in a few places in the documentation
|
||||||
tm = re_titlematch.search(contents)
|
tm = re_titlematch.search(contents)
|
||||||
if tm:
|
if tm:
|
||||||
title = tm.group(1)
|
title = tm.group(1)
|
||||||
else:
|
else:
|
||||||
title = ""
|
title = ""
|
||||||
|
|
||||||
|
# if not in quiet mode, output the (filename, title) pair of the docpage that is being processed
|
||||||
if not quiet:
|
if not quiet:
|
||||||
print("--- file: %s (%s) ---" % (filename, title))
|
print("--- file: %s (%s) ---" % (filename, title))
|
||||||
|
|
||||||
|
# run libtidy on the content
|
||||||
(html, errors) = tidylib.tidy_document(contents, options=tidyopts)
|
(html, errors) = tidylib.tidy_document(contents, options=tidyopts)
|
||||||
|
|
||||||
|
# add all of the information to the CSV that will be used to load the updated
|
||||||
|
# documentation pages into the database
|
||||||
c.writerow([filename, ver, title, html])
|
c.writerow([filename, ver, title, html])
|
||||||
|
|
||||||
|
|
||||||
@ -88,13 +101,16 @@ quiet = options.quiet
|
|||||||
ver = sys.argv[1]
|
ver = sys.argv[1]
|
||||||
tarfilename = sys.argv[2]
|
tarfilename = sys.argv[2]
|
||||||
|
|
||||||
|
# load the configuration that is used to connect to the database
|
||||||
config = ConfigParser()
|
config = ConfigParser()
|
||||||
config.read(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'docload.ini'))
|
config.read(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'docload.ini'))
|
||||||
|
|
||||||
|
# determine if the referenced tarball exists; if not, exit
|
||||||
if not os.path.isfile(tarfilename):
|
if not os.path.isfile(tarfilename):
|
||||||
print("File %s not found" % tarfilename)
|
print("File %s not found" % tarfilename)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# open up the tarball as well as a connection to the database
|
||||||
tf = tarfile.open(tarfilename)
|
tf = tarfile.open(tarfilename)
|
||||||
|
|
||||||
connection = psycopg2.connect(config.get('db', 'dsn'))
|
connection = psycopg2.connect(config.get('db', 'dsn'))
|
||||||
@ -109,11 +125,18 @@ if len(r) != 1:
|
|||||||
|
|
||||||
iscurrent = r[0][0]
|
iscurrent = r[0][0]
|
||||||
|
|
||||||
|
# begin creating a CSV that will be used to import the documentation into the database
|
||||||
s = io.StringIO()
|
s = io.StringIO()
|
||||||
c = csv.writer(s, delimiter=';', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
|
c = csv.writer(s, delimiter=';', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
|
||||||
|
|
||||||
|
# this regular expression is for "newer" versions of PostgreSQL that keep all of
|
||||||
|
# the HTML documentation built out
|
||||||
re_htmlfile = re.compile('[^/]*/doc/src/sgml/html/.*')
|
re_htmlfile = re.compile('[^/]*/doc/src/sgml/html/.*')
|
||||||
|
# this regular expression is for "older" versions of PostgreSQL that keep the
|
||||||
|
# HTML documentation in a tarball within the tarball
|
||||||
re_tarfile = re.compile('[^/]*/doc/postgres.tar.gz$')
|
re_tarfile = re.compile('[^/]*/doc/postgres.tar.gz$')
|
||||||
|
# go through each file of the tarball to determine if the file is documentation
|
||||||
|
# that should be imported
|
||||||
for member in tf:
|
for member in tf:
|
||||||
# newer versions of PostgreSQL will go down this path to find docfiles
|
# newer versions of PostgreSQL will go down this path to find docfiles
|
||||||
if re_htmlfile.match(member.name):
|
if re_htmlfile.match(member.name):
|
||||||
@ -151,12 +174,18 @@ if not quiet:
|
|||||||
|
|
||||||
s.seek(0)
|
s.seek(0)
|
||||||
|
|
||||||
|
# Start loading the documentation into the database
|
||||||
|
# First, load the newly discovered documentation into a temporary table, where we
|
||||||
|
# can validate that we loaded exactly the number of docs that we thought we would,
|
||||||
|
# based on the page counter
|
||||||
curs.execute("CREATE TEMP TABLE docsload (file varchar(64) NOT NULL, version numeric(3,1) NOT NULL, title varchar(256) NOT NULL, content text)")
|
curs.execute("CREATE TEMP TABLE docsload (file varchar(64) NOT NULL, version numeric(3,1) NOT NULL, title varchar(256) NOT NULL, content text)")
|
||||||
curs.copy_expert("COPY docsload FROM STDIN WITH CSV DELIMITER AS ';'", s)
|
curs.copy_expert("COPY docsload FROM STDIN WITH CSV DELIMITER AS ';'", s)
|
||||||
if curs.rowcount != pagecount:
|
if curs.rowcount != pagecount:
|
||||||
print("Loaded invalid number of rows! {} rows for {} pages!".format(curs.rowcount, pagecount))
|
print("Loaded invalid number of rows! {} rows for {} pages!".format(curs.rowcount, pagecount))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# If the previous step succeeded, delete all the documentation for the specified version
|
||||||
|
# and insert into / updatethe doc table the content that was loaded into the temporary table
|
||||||
curs.execute("DELETE FROM docs WHERE version=%(version)s AND NOT EXISTS (SELECT 1 FROM docsload WHERE docsload.file=docs.file)", {
|
curs.execute("DELETE FROM docs WHERE version=%(version)s AND NOT EXISTS (SELECT 1 FROM docsload WHERE docsload.file=docs.file)", {
|
||||||
'version': ver,
|
'version': ver,
|
||||||
})
|
})
|
||||||
@ -187,6 +216,7 @@ curs.execute("SELECT varnish_purge('^/docs/' || %(v)s || '/')", {'v': ver})
|
|||||||
if iscurrent:
|
if iscurrent:
|
||||||
curs.execute("SELECT varnish_purge('^/docs/current/')")
|
curs.execute("SELECT varnish_purge('^/docs/current/')")
|
||||||
|
|
||||||
|
# ensure the changes are committed, and close the connection
|
||||||
connection.commit()
|
connection.commit()
|
||||||
connection.close()
|
connection.close()
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user