mirror of
https://github.com/php/web-doc.git
synced 2025-08-13 14:40:31 +00:00
massive re-working of the checkent script
This commit is contained in:
@ -3,6 +3,6 @@
|
|||||||
. `dirname $0`/../../build-ops
|
. `dirname $0`/../../build-ops
|
||||||
|
|
||||||
cd ${SCRIPTSDIR}
|
cd ${SCRIPTSDIR}
|
||||||
${PHP} checkent.php phpdoc > ${DOCWEB}/www/checkent_php.php
|
${PHP} checkent.php phpdoc > /dev/null
|
||||||
${PHP} checkent.php peardoc > ${DOCWEB}/www/checkent_pear.php
|
${PHP} checkent.php peardoc > /dev/null
|
||||||
${PHP} checkent.php smarty > ${DOCWEB}/www/checkent_smarty.php
|
${PHP} checkent.php smarty > /dev/null
|
||||||
|
@ -1,43 +1,46 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
|
||||||
* +----------------------------------------------------------------------+
|
+----------------------------------------------------------------------+
|
||||||
* | PHP Documentation Site Source Code |
|
| PHP Documentation Site Source Code |
|
||||||
* +----------------------------------------------------------------------+
|
+----------------------------------------------------------------------+
|
||||||
* | Copyright (c) 1997-2005 The PHP Group |
|
| Copyright (c) 1997-2005 The PHP Group |
|
||||||
* +----------------------------------------------------------------------+
|
+----------------------------------------------------------------------+
|
||||||
* | This source file is subject to version 3.0 of the PHP license, |
|
| This source file is subject to version 3.0 of the PHP license, |
|
||||||
* | that is bundled with this package in the file LICENSE, and is |
|
| that is bundled with this package in the file LICENSE, and is |
|
||||||
* | available at through the world-wide-web at |
|
| available at through the world-wide-web at |
|
||||||
* | http://www.php.net/license/3_0.txt. |
|
| http://www.php.net/license/3_0.txt. |
|
||||||
* | If you did not receive a copy of the PHP license and are unable to |
|
| If you did not receive a copy of the PHP license and are unable to |
|
||||||
* | obtain it through the world-wide-web, please send a note to |
|
| obtain it through the world-wide-web, please send a note to |
|
||||||
* | license@php.net so we can mail you a copy immediately. |
|
| license@php.net so we can mail you a copy immediately. |
|
||||||
* +----------------------------------------------------------------------+
|
+----------------------------------------------------------------------+
|
||||||
* | Authors: Georg Richter <georg@php.net> |
|
| Authors: Georg Richter <georg@php.net> |
|
||||||
* | Gabor Hojsty <goba@php.net> |
|
| Gabor Hojsty <goba@php.net> |
|
||||||
* | Docweb port: Nuno Lopes <nlopess@php.net> |
|
| Docweb port: Nuno Lopes <nlopess@php.net> |
|
||||||
* | Mehdi Achour <didou@php.net> |
|
| Mehdi Achour <didou@php.net> |
|
||||||
* +----------------------------------------------------------------------+
|
| Sean Coates <sean@php.net> |
|
||||||
* $Id$
|
+----------------------------------------------------------------------+
|
||||||
*/
|
$Id$
|
||||||
|
*/
|
||||||
|
|
||||||
set_time_limit(0);
|
set_time_limit(0);
|
||||||
$inCli = true;
|
$inCli = true;
|
||||||
include '../include/init.inc.php';
|
require_once '../include/init.inc.php';
|
||||||
|
require_once '../include/lib_url_entities.inc.php';
|
||||||
define('DOCWEB_CRAWLER_USER_AGENT', 'DocWeb Link Crawler (http://doc.php.net)');
|
|
||||||
|
|
||||||
switch (isset($_SERVER['argv'][1]) ? $_SERVER['argv'][1] : false) {
|
switch (isset($_SERVER['argv'][1]) ? $_SERVER['argv'][1] : false) {
|
||||||
case 'phpdoc':
|
case 'phpdoc':
|
||||||
$filename = CVS_DIR . '/phpdoc-all/entities/global.ent';
|
$filename = CVS_DIR . '/phpdoc-all/entities/global.ent';
|
||||||
|
$entType = 'php';
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'peardoc':
|
case 'peardoc':
|
||||||
$filename = CVS_DIR . '/peardoc/global.ent';
|
$filename = CVS_DIR . '/peardoc/global.ent';
|
||||||
|
$entType = 'pear';
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'smarty':
|
case 'smarty':
|
||||||
$filename = CVS_DIR . '/smarty/docs/entities/global.ent';
|
$filename = CVS_DIR . '/smarty/docs/entities/global.ent';
|
||||||
|
$entType = 'smarty';
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -56,157 +59,38 @@ if (extension_loaded('openssl')) {
|
|||||||
$schemes[] = 'https';
|
$schemes[] = 'https';
|
||||||
}
|
}
|
||||||
|
|
||||||
// constants for errors
|
$dbFile = SQLITE_DIR . "checkent_{$entType}.sqlite";
|
||||||
define('UNKNOWN_HOST', 1);
|
if (is_file($dbFile) && !unlink($dbFile)) {
|
||||||
define('FTP_CONNECT', 2);
|
echo "Error removing old database.\n";
|
||||||
define('FTP_LOGIN', 3);
|
die();
|
||||||
define('FTP_NO_FILE', 4);
|
|
||||||
define('HTTP_CONNECT', 5);
|
|
||||||
define('HTTP_MOVED', 6);
|
|
||||||
define('HTTP_WRONG_HEADER', 7);
|
|
||||||
define('HTTP_INTERNAL_ERROR', 8);
|
|
||||||
define('HTTP_NOT_FOUND', 9);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Handles relative HTTP URLs
|
|
||||||
*
|
|
||||||
* @param string $url URL to handle
|
|
||||||
* @param array $parsed result of parse_url()
|
|
||||||
* @return string fixed URL
|
|
||||||
*/
|
|
||||||
function fix_relative_url ($url, $parsed)
|
|
||||||
{
|
|
||||||
if ($url{0} == '/') {
|
|
||||||
return "{$parsed['scheme']}://{$parsed['host']}{$url}";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (preg_match('@(?:f|ht)tps?://@S', $url)) {
|
|
||||||
return $url;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* try to be RFC 1808 compliant */
|
|
||||||
$path = $parsed['path'] . $url;
|
|
||||||
$old = '';
|
|
||||||
|
|
||||||
do {
|
|
||||||
$old = $path;
|
|
||||||
$path = preg_replace('@[^/:?]+/\.\./|\./@S', '', $path);
|
|
||||||
} while ($old != $path);
|
|
||||||
|
|
||||||
return "{$parsed['scheme']}://{$parsed['host']}{$path}";
|
|
||||||
}
|
}
|
||||||
|
if (!($sqlite = sqlite_open($dbFile, 0666))) {
|
||||||
/**
|
echo "Error creating database.\n";
|
||||||
* Checks a URL (actually fetches the URL and returns the status)
|
|
||||||
*
|
|
||||||
* @param int $num sequence number of URL
|
|
||||||
* @param string $entity_url URL to check
|
|
||||||
* @return array
|
|
||||||
*/
|
|
||||||
function check_url ($num, $entity_url)
|
|
||||||
{
|
|
||||||
static $old_host = '';
|
|
||||||
|
|
||||||
// Get the parts of the URL
|
|
||||||
$url = parse_url($entity_url);
|
|
||||||
$entity = $GLOBALS['entity_names'][$num];
|
|
||||||
|
|
||||||
// sleep if accessing the same host more that once in a row
|
|
||||||
if ($url['host'] == $old_host) {
|
|
||||||
sleep(5);
|
|
||||||
} else {
|
|
||||||
$old_host = $url['host'];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to find host
|
|
||||||
if (gethostbyname($url['host']) == $url['host']) {
|
|
||||||
return array(UNKNOWN_HOST, array($num));
|
|
||||||
}
|
|
||||||
|
|
||||||
switch($url['scheme']) {
|
|
||||||
|
|
||||||
case 'http':
|
|
||||||
case 'https':
|
|
||||||
if (isset($url['path'])) {
|
|
||||||
$url['path'] = $url['path'] . (isset($url['query']) ? '?' . $url['query'] : '');
|
|
||||||
} else {
|
|
||||||
$url['path'] = '/';
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check if using secure http */
|
|
||||||
if ($url['scheme'] == 'https') {
|
|
||||||
$port = 443;
|
|
||||||
$scheme = 'ssl://';
|
|
||||||
} else {
|
|
||||||
$port = 80;
|
|
||||||
$scheme = '';
|
|
||||||
}
|
|
||||||
$port = isset($url['port']) ? $url['port'] : $port;
|
|
||||||
|
|
||||||
if (!$fp = @fsockopen($scheme . $url['host'], $port)) {
|
|
||||||
return array(HTTP_CONNECT, array($num));
|
|
||||||
|
|
||||||
} else {
|
|
||||||
fputs($fp, "HEAD {$url['path']} HTTP/1.0\r\nHost: {$url['host']}\r\nUser-agent: ". DOCWEB_CRAWLER_USER_AGENT ."\r\nConnection: close\r\n\r\n");
|
|
||||||
|
|
||||||
$str = '';
|
|
||||||
while (!feof($fp)) {
|
|
||||||
$str .= @fgets($fp, 2048);
|
|
||||||
}
|
|
||||||
fclose ($fp);
|
|
||||||
|
|
||||||
if (preg_match('@HTTP/1.\d (\d+)(?: .+)?@S', $str, $match)) {
|
|
||||||
if ($match[1] != '200') {
|
|
||||||
switch ($match[1])
|
|
||||||
{
|
|
||||||
case '500' :
|
|
||||||
case '501' :
|
|
||||||
return array(HTTP_INTERNAL_ERROR, array($num));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '404' :
|
|
||||||
return array(HTTP_NOT_FOUND, array($num));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '301' :
|
|
||||||
case '302' :
|
|
||||||
if (preg_match('/Location: (.+)/', $str, $redir)) {
|
|
||||||
return array(HTTP_MOVED, array($num, fix_relative_url($redir[1], $url)));
|
|
||||||
} else {
|
|
||||||
return array(HTTP_WRONG_HEADER, array($num, $str));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default :
|
|
||||||
return array(HTTP_WRONG_HEADER, array($num, $str));
|
|
||||||
}
|
|
||||||
} // error != 200
|
|
||||||
} else {
|
|
||||||
return array(HTTP_WRONG_HEADER, array($num, $str));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'ftp':
|
|
||||||
if ($ftp = @ftp_connect($url['host'])) {
|
|
||||||
|
|
||||||
if (@ftp_login($ftp, 'anonymous', 'IEUser@')) {
|
|
||||||
$flist = ftp_nlist($ftp, $url['path']);
|
|
||||||
if (!count($flist)) {
|
|
||||||
return array(FTP_NO_FILE, array($num));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return array(FTP_LOGIN, array($num));
|
|
||||||
}
|
|
||||||
@ftp_quit($ftp);
|
|
||||||
} else {
|
|
||||||
return array(FTP_CONNECT, array($num));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
$sqlCreateMeta = "
|
||||||
|
CREATE
|
||||||
|
TABLE
|
||||||
|
meta_info
|
||||||
|
(
|
||||||
|
start_time DATETIME,
|
||||||
|
end_time DATETIME,
|
||||||
|
schemes VARCHAR(100)
|
||||||
|
);
|
||||||
|
";
|
||||||
|
$sqlCreateChecked = "
|
||||||
|
CREATE
|
||||||
|
TABLE
|
||||||
|
checked_urls
|
||||||
|
(
|
||||||
|
url_num INT,
|
||||||
|
entity VARCHAR(255),
|
||||||
|
url VARCHAR(255),
|
||||||
|
check_result INT,
|
||||||
|
return_val VARCHAR(255)
|
||||||
|
);
|
||||||
|
";
|
||||||
|
sqlite_query($sqlite, $sqlCreateMeta);
|
||||||
|
sqlite_query($sqlite, $sqlCreateChecked);
|
||||||
|
|
||||||
if (!$file = @file_get_contents($filename)) {
|
if (!$file = @file_get_contents($filename)) {
|
||||||
// ouput the html
|
// ouput the html
|
||||||
@ -230,14 +114,48 @@ $entity_urls = $entities_found[3];
|
|||||||
$errors = array();
|
$errors = array();
|
||||||
$numb = 0;
|
$numb = 0;
|
||||||
|
|
||||||
|
$sql = "
|
||||||
|
INSERT
|
||||||
|
INTO
|
||||||
|
meta_info (start_time, end_time, schemes)
|
||||||
|
VALUES
|
||||||
|
(". time() .", NULL, '". sqlite_escape_string(implode(',', $schemes)) ."')
|
||||||
|
";
|
||||||
|
sqlite_query($sqlite, $sql);
|
||||||
|
|
||||||
|
echo "Found: ". count($entity_urls) ."URLs\n";
|
||||||
|
|
||||||
// Walk through entities found
|
// Walk through entities found
|
||||||
foreach ($entity_urls as $num => $entity_url) {
|
foreach ($entity_urls as $num => $entity_url) {
|
||||||
|
|
||||||
++$numb;
|
++$numb;
|
||||||
|
echo "Checking: $entity_url\n";
|
||||||
$err = check_url($num, $entity_url);
|
$err = check_url($num, $entity_url);
|
||||||
$errors[$err[0]][] = $err[1];
|
$errors[$err[0]][] = $err[1];
|
||||||
|
|
||||||
|
$return_val = isset($err[1][1]) ? $err[1][1] : '';
|
||||||
|
$sql = "
|
||||||
|
INSERT
|
||||||
|
INTO
|
||||||
|
checked_urls (url_num, entity, url, check_result, return_val)
|
||||||
|
VALUES
|
||||||
|
(
|
||||||
|
$num,
|
||||||
|
'". sqlite_escape_string($entity_names[$num]) ."',
|
||||||
|
'". sqlite_escape_string($entity_url) ."',
|
||||||
|
{$err[0]},
|
||||||
|
'". sqlite_escape_string($return_val) ."'
|
||||||
|
)
|
||||||
|
";
|
||||||
|
sqlite_query($sqlite, $sql);
|
||||||
}
|
}
|
||||||
|
$sql = "
|
||||||
|
UPDATE
|
||||||
|
meta_info
|
||||||
|
SET
|
||||||
|
end_time = ". time() ."
|
||||||
|
";
|
||||||
|
sqlite_query($sqlite, $sql);
|
||||||
|
|
||||||
// ouput the html
|
// ouput the html
|
||||||
echo "<?php include_once '../include/init.inc.php';
|
echo "<?php include_once '../include/init.inc.php';
|
||||||
|
@ -1,13 +1,73 @@
|
|||||||
<?php
|
<?php
|
||||||
|
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| PHP Documentation Site Source Code |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| Copyright (c) 1997-2005 The PHP Group |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| This source file is subject to version 3.0 of the PHP license, |
|
||||||
|
| that is bundled with this package in the file LICENSE, and is |
|
||||||
|
| available at through the world-wide-web at |
|
||||||
|
| http://www.php.net/license/3_0.txt. |
|
||||||
|
| If you did not receive a copy of the PHP license and are unable to |
|
||||||
|
| obtain it through the world-wide-web, please send a note to |
|
||||||
|
| license@php.net so we can mail you a copy immediately. |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| Authors: Nuno Lopes <nlopess@php.net> |
|
||||||
|
| Mehdi Achour <didou@php.net> |
|
||||||
|
| Sean Coates <sean@php.net> |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
$Id$
|
||||||
|
*/
|
||||||
|
|
||||||
include '../include/init.inc.php';
|
require_once '../include/init.inc.php';
|
||||||
|
require_once '../include/lib_url_entities.inc.php';
|
||||||
|
|
||||||
if(is_file('checkent_' . SITE . '.php')) {
|
$dbFile = SQLITE_DIR . "checkent_" . SITE . ".sqlite";
|
||||||
include 'checkent_' . SITE . '.php';
|
if (!($sqlite = @sqlite_open($dbFile))) {
|
||||||
} else {
|
|
||||||
echo site_header('docweb.common.header.checkent');
|
echo site_header('docweb.common.header.checkent');
|
||||||
echo '<p>checkent not found!</p>';
|
echo '<p>checkent not found!</p>'; // @@@ template this
|
||||||
echo site_footer();
|
echo site_footer();
|
||||||
|
exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$sql = "
|
||||||
|
SELECT
|
||||||
|
start_time, end_time, schemes
|
||||||
|
FROM
|
||||||
|
meta_info
|
||||||
|
";
|
||||||
|
list($startTime, $endTime, $schemes) = sqlite_fetch_array(sqlite_query($sqlite, $sql));
|
||||||
|
|
||||||
|
$entData = array();
|
||||||
|
$sql = "
|
||||||
|
SELECT
|
||||||
|
url_num, entity, url, check_result, return_val
|
||||||
|
FROM
|
||||||
|
checked_urls
|
||||||
|
WHERE
|
||||||
|
check_result > 0
|
||||||
|
ORDER BY
|
||||||
|
check_result, entity
|
||||||
|
";
|
||||||
|
$urlsQ = sqlite_query($sqlite, $sql);
|
||||||
|
while ($row = sqlite_fetch_array($urlsQ)) {
|
||||||
|
$entData[$row['check_result']][] = $row;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
echo site_header('docweb.common.header.checkent');
|
||||||
|
echo DocWeb_Template::get(
|
||||||
|
'checkent.tpl.php',
|
||||||
|
array(
|
||||||
|
'startTime' => $startTime,
|
||||||
|
'isComplete' => $endTime ? TRUE : FALSE,
|
||||||
|
'schemes' => $schemes,
|
||||||
|
'entData' => $entData,
|
||||||
|
'resultLkp' => $urlResultLookup,
|
||||||
|
'extraCol' => $urlResultExtraCol,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
echo site_footer();
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
Reference in New Issue
Block a user