mirror of
https://github.com/postgres/pgweb.git
synced 2025-07-25 16:02:27 +00:00
Get the information for the ftp browser from a file that is generated
on the ftp server, instead of crawling the directoreis directly. This removes the requirement to sync almost 10Gb worth of ftp site onto the web server... The pickle file for this is currently around 1Mb, so it's not a huge burden on the server. If it grows larger in the future, we may want to re-think this and split it up, or put it in a database format or something like that.
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
from django.shortcuts import render_to_response, get_object_or_404
|
||||
from django.http import HttpResponse, Http404, HttpResponseRedirect
|
||||
from django.http import HttpResponse, Http404, HttpResponseRedirect, HttpResponseServerError
|
||||
from django.template import TemplateDoesNotExist, loader, Context
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.db import connection, transaction
|
||||
@ -8,6 +8,7 @@ from django.conf import settings
|
||||
import os
|
||||
from datetime import datetime
|
||||
import urlparse
|
||||
import cPickle as pickle
|
||||
|
||||
from pgweb.util.decorators import ssl_required, nocache
|
||||
from pgweb.util.contexts import NavContext
|
||||
@ -19,46 +20,6 @@ from forms import *
|
||||
#######
|
||||
# FTP browser
|
||||
#######
|
||||
def _getfiledata(root, paths):
|
||||
for path in paths:
|
||||
fn = "%s/%s" % (root,path)
|
||||
if not os.path.isfile(fn):
|
||||
continue
|
||||
stat = os.stat(fn)
|
||||
yield {
|
||||
'name':path,
|
||||
'mtime': datetime.fromtimestamp(stat.st_mtime),
|
||||
'size': stat.st_size,
|
||||
}
|
||||
|
||||
def _getdirectorydata(root, paths):
|
||||
for path in paths:
|
||||
fn = "%s/%s" % (root,path)
|
||||
if not os.path.isdir(fn):
|
||||
continue
|
||||
if os.path.islink(fn):
|
||||
# This is a link, so change the url to point directly
|
||||
# to the link target. We'll just assume the link
|
||||
# is safe. Oh, and links must be relative
|
||||
yield {
|
||||
'link': path,
|
||||
'url': os.readlink(fn),
|
||||
}
|
||||
else:
|
||||
yield {
|
||||
'link': path,
|
||||
'url': path,
|
||||
}
|
||||
|
||||
def _getfile(root, filename):
|
||||
fn = "%s/%s" % (root,filename)
|
||||
if os.path.isfile(fn):
|
||||
f = open(fn)
|
||||
r = f.read()
|
||||
f.close()
|
||||
return r
|
||||
return None
|
||||
|
||||
def ftpbrowser(request, subpath):
|
||||
if subpath:
|
||||
# An actual path has been selected. Fancy!
|
||||
@ -67,20 +28,35 @@ def ftpbrowser(request, subpath):
|
||||
# Just claim it doesn't exist if the user tries to do this
|
||||
# type of bad thing
|
||||
raise Http404
|
||||
fspath = os.path.join(settings.FTP_ROOT, subpath)
|
||||
subpath = subpath.strip('/')
|
||||
else:
|
||||
fspath = settings.FTP_ROOT
|
||||
subpath=""
|
||||
|
||||
if not os.path.isdir(fspath):
|
||||
# Pickle up the list of things we need
|
||||
try:
|
||||
f = open(settings.FTP_PICKLE, "rb")
|
||||
allnodes = pickle.load(f)
|
||||
f.close()
|
||||
except Exception, e:
|
||||
return HttpResponseServerError("Failed to load ftp site information: %s" % e)
|
||||
|
||||
if not allnodes.has_key(subpath):
|
||||
raise Http404
|
||||
|
||||
everything = [n for n in os.listdir(fspath) if not n.startswith('.')]
|
||||
node = allnodes[subpath]
|
||||
del allnodes
|
||||
|
||||
directories = list(_getdirectorydata(fspath, everything))
|
||||
# Add all directories
|
||||
directories = [{'link': k, 'url': k} for k,v in node.items() if v['t'] == 'd']
|
||||
# Add all symlinks (only directoreis supported)
|
||||
directories.extend([{'link': k, 'url': v['d']} for k,v in node.items() if v['t'] == 'l'])
|
||||
|
||||
# Add a link to the parent directory
|
||||
if subpath:
|
||||
directories.append({'link':'[Parent Directory]', 'url':'..'})
|
||||
files = list(_getfiledata(fspath, everything))
|
||||
|
||||
# Fetch files
|
||||
files = [{'name': k, 'mtime': v['t'], 'size': v['s']} for k,v in node.items() if v['t'] == 'f']
|
||||
|
||||
breadcrumbs = []
|
||||
if subpath:
|
||||
@ -95,14 +71,21 @@ def ftpbrowser(request, subpath):
|
||||
breadroot = pathpiece
|
||||
breadcrumbs.append({'name': pathpiece, 'path': breadroot});
|
||||
|
||||
# Check if there are any "content files" we should render directly on the webpage
|
||||
file_readme = node.has_key('README') and node['README']['c'] or None;
|
||||
file_message = node.has_key('.message') and node['.message']['c'] or None;
|
||||
file_maintainer = node.has_key('CURRENT_MAINTAINER') and node['CURRENT_MAINTAINER']['c'] or None;
|
||||
|
||||
del node
|
||||
|
||||
return render_to_response('downloads/ftpbrowser.html', {
|
||||
'basepath': subpath.rstrip('/'),
|
||||
'directories': sorted(directories),
|
||||
'files': sorted(files),
|
||||
'breadcrumbs': breadcrumbs,
|
||||
'readme': _getfile(fspath, 'README'),
|
||||
'messagesfile': _getfile(fspath, '.messages'),
|
||||
'maintainer': _getfile(fspath, 'CURRENT_MAINTAINER'),
|
||||
'readme': file_readme,
|
||||
'messagefile': file_message,
|
||||
'maintainer': file_maintainer,
|
||||
}, NavContext(request, 'download'))
|
||||
|
||||
def _get_numeric_ip(request):
|
||||
|
@ -111,6 +111,8 @@ INSTALLED_APPS = [
|
||||
SITE_ROOT="http://www.postgresql.org"
|
||||
MASTERSITE_ROOT="http://wwwmaster.postgresql.org"
|
||||
|
||||
FTP_PICKLE="/usr/local/pgweb/ftpsite.pickle"
|
||||
|
||||
# Load local settings overrides
|
||||
from settings_local import *
|
||||
|
||||
|
70
tools/ftp/spider_ftp.py
Executable file
70
tools/ftp/spider_ftp.py
Executable file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
#
|
||||
# spider_ftp.py - spider the ftp site and generate an output file with all
|
||||
# the metadata we require, that can be transferred over to
|
||||
# the wwwmaster server.
|
||||
#
|
||||
|
||||
import sys
|
||||
import os
|
||||
from datetime import datetime
|
||||
import cPickle as pickle
|
||||
#from pprint import pprint
|
||||
|
||||
allnodes = {}
|
||||
|
||||
def read_file(fn):
|
||||
f = open(fn, "r")
|
||||
t = f.read()
|
||||
f.close()
|
||||
return t
|
||||
|
||||
def parse_directory(dirname, rootlen):
|
||||
mynode = {}
|
||||
for f in os.listdir(dirname):
|
||||
if f.startswith(".") and not f == ".message": continue
|
||||
if f == "sync_timestamp": continue
|
||||
|
||||
fn = os.path.join(dirname, f)
|
||||
if os.path.isdir(fn):
|
||||
# Can be a directory itself, or a symbolic link to a directory
|
||||
if os.path.islink(fn):
|
||||
# This is a symbolic link
|
||||
mynode[f] = {
|
||||
't': 'l',
|
||||
'd': os.readlink(fn),
|
||||
}
|
||||
else:
|
||||
# This is a subdirectory, recurse into it
|
||||
parse_directory(fn, rootlen)
|
||||
mynode[f] = {
|
||||
't': 'd',
|
||||
}
|
||||
else:
|
||||
# This a file
|
||||
stat = os.stat(fn)
|
||||
mynode[f] = {
|
||||
't': 'f',
|
||||
's': stat.st_size,
|
||||
'd': datetime.fromtimestamp(stat.st_mtime),
|
||||
}
|
||||
if f == "README" or f == "CURRENT_MAINTAINER" or f == ".message":
|
||||
mynode[f]['c'] = read_file(fn)
|
||||
|
||||
allnodes[dirname[rootlen:].strip("/")] = mynode
|
||||
|
||||
def Usage():
|
||||
print "Usage: spider_ftp.py <ftp_root> <pickle_file>"
|
||||
sys.exit(1)
|
||||
|
||||
if len(sys.argv) != 3: Usage()
|
||||
|
||||
parse_directory(sys.argv[1], len(sys.argv[1]))
|
||||
|
||||
f = open(sys.argv[2] + ".tmp", "wb")
|
||||
pickle.dump(allnodes, f)
|
||||
f.close()
|
||||
os.rename(sys.argv[2] + ".tmp", sys.argv[2])
|
||||
|
||||
#pprint(allnodes)
|
Reference in New Issue
Block a user