mirror of
https://gitlab.com/gnuwget/wget2.git
synced 2025-08-20 16:24:12 +00:00
212 lines
5.3 KiB
C
212 lines
5.3 KiB
C
/*
|
|
* Copyright(c) 2012 Tim Ruehsen
|
|
* Copyright(c) 2015-2016 Free Software Foundation, Inc.
|
|
*
|
|
* This file is part of Wget.
|
|
*
|
|
* Wget is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Wget is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Wget. If not, see <https://www.gnu.org/licenses/>.
|
|
*
|
|
*
|
|
* test routines
|
|
*
|
|
* Changelog
|
|
* 16.08.2013 Tim Ruehsen created
|
|
*
|
|
*/
|
|
|
|
#if HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <c-ctype.h>
|
|
#include <dirent.h>
|
|
#include <fcntl.h>
|
|
#include <sys/stat.h>
|
|
#ifdef HAVE_MMAP
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
#include <wget.h>
|
|
#include "../libwget/private.h"
|
|
#include "../src/wget_options.h"
|
|
|
|
static int
|
|
ok,
|
|
failed;
|
|
|
|
static const char *test_data[] ={
|
|
"<!doctype html>\n"\
|
|
"<!--[if lt IE 7 ]> <html class=\"no-js ie6\" lang=\"en\"> <![endif]-->\n"\
|
|
"<!--[if (gte IE 9)|!(IE)]><!--> <html class=\"no-js\"> <!--<![endif]-->\n"\
|
|
"<head>\n"\
|
|
" <meta charset=\"utf-8\">\n"\
|
|
" <script>window.jQuery || document.write(\"<script src='/common/inc/js/jquery-1.5.1.min.js'><\\/script>\")</script>\n"\
|
|
" <script src=\"/common/inc/js/core.js?v=1\"></script>\n"\
|
|
"</head>\n"\
|
|
"<body>\n"\
|
|
" <ul>\n"\
|
|
" <li><a href=\"/solutions/platform.html#server\">Server</a></li>\n"\
|
|
" <li><a href=\"/solutions/platform.html#desktop\">Desktop</a></li>\n"\
|
|
" </ul>\n"\
|
|
"</body>\n"\
|
|
"</html>\n"
|
|
};
|
|
|
|
static void html_dump(void *user_ctx, int flags, const char *dir G_GNUC_WGET_UNUSED, const char *attr, const char *val, size_t len, size_t pos)
|
|
{
|
|
// info_printf("\n%02X %s %s '%.*s' %zd %zd\n", flags, dir, attr, (int) len, val, len, pos);
|
|
if ((flags & XML_FLG_ATTRIBUTE) && val) {
|
|
// info_printf("%02X %s %s '%.*s' %zd %zd\n", flags, dir, attr, (int) len, val, len, pos);
|
|
|
|
// very simplified
|
|
// see http://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
|
|
switch (c_tolower(*attr)) {
|
|
case 'h':
|
|
if (wget_strcasecmp_ascii(attr, "href"))
|
|
return;
|
|
break;
|
|
case 's':
|
|
if (wget_strcasecmp_ascii(attr, "src"))
|
|
return;
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
|
|
// check if len and pos matches
|
|
const char *doc = (const char *)user_ctx;
|
|
|
|
if (memcmp(doc + pos, val, len)) {
|
|
failed++;
|
|
error_printf_exit("Not found: '%.*s' expected at pos %zu with length %zu\n", (int) len, val, pos, len);
|
|
} else
|
|
ok++;
|
|
}
|
|
|
|
/*
|
|
if (flags & XML_FLG_BEGIN) {
|
|
const char *p = *dir == '/' ? strrchr(dir, '/') : dir;
|
|
if (p) {
|
|
if (*dir == '/') p++;
|
|
if (flags == (XML_FLG_BEGIN | XML_FLG_END)) {
|
|
info_printf("<%s/>", p);
|
|
return;
|
|
}
|
|
info_printf("<%s", p);
|
|
}
|
|
}
|
|
if (flags & XML_FLG_ATTRIBUTE) {
|
|
if (val)
|
|
info_printf(" %s=\"%.*s\"", attr, (int) len, val);
|
|
else
|
|
info_printf(" %s", attr); // HTML bareword attribute
|
|
}
|
|
if (flags & XML_FLG_CLOSE) {
|
|
info_printf(">");
|
|
}
|
|
if (flags & XML_FLG_CONTENT) {
|
|
info_printf("%.*s", (int) len, val);
|
|
}
|
|
if (flags & XML_FLG_END) {
|
|
const char *p = *dir == '/' ? strrchr(dir, '/') : dir;
|
|
if (p) {
|
|
if (*dir == '/') p++;
|
|
info_printf("</%s>", p);
|
|
}
|
|
}
|
|
|
|
if (flags == XML_FLG_COMMENT)
|
|
info_printf("<!--%.*s-->", (int) len, val);
|
|
else if (flags == XML_FLG_PROCESSING)
|
|
info_printf("<?%.*s?>", (int) len, val);
|
|
else if (flags == XML_FLG_SPECIAL)
|
|
info_printf("<!%.*s>", (int) len, val);
|
|
*/
|
|
}
|
|
|
|
static void test_parse_buffer(void)
|
|
{
|
|
unsigned it;
|
|
|
|
for (it = 0; it < countof(test_data); it++) {
|
|
wget_html_parse_buffer(test_data[it], html_dump, (void *)test_data[it], 0);
|
|
}
|
|
}
|
|
|
|
static void test_parse_files(void)
|
|
{
|
|
DIR *dirp;
|
|
struct dirent *dp;
|
|
const char *ext;
|
|
char fname[296];
|
|
int xml = 0, html = 0, css = 0, type;
|
|
|
|
// test the XML / HTML parser, you should start the test with valgrind
|
|
// to detect memory faults
|
|
if ((dirp = opendir(SRCDIR "/files")) != NULL) {
|
|
while ((dp = readdir(dirp)) != NULL) {
|
|
if (*dp->d_name == '.') continue;
|
|
if ((ext = strrchr(dp->d_name, '.'))) {
|
|
if (!wget_strcasecmp_ascii(ext, ".xml"))
|
|
type = 1;
|
|
else if (!wget_strcasecmp_ascii(ext, ".html"))
|
|
type = 2;
|
|
else
|
|
continue;
|
|
|
|
snprintf(fname, sizeof(fname), "%s/files/%s", SRCDIR, dp->d_name);
|
|
info_printf("parsing %s\n", fname);
|
|
|
|
char *data;
|
|
|
|
if ((data = wget_read_file(fname, NULL))) {
|
|
if (type == 1) {
|
|
wget_xml_parse_buffer(data, html_dump, data, 0);
|
|
xml++;
|
|
} else {
|
|
wget_html_parse_buffer(data, html_dump, data, 0);
|
|
html++;
|
|
}
|
|
|
|
xfree(data);
|
|
}
|
|
}
|
|
}
|
|
closedir(dirp);
|
|
}
|
|
|
|
info_printf("%d XML, %d HTML and %d CSS files parsed\n", xml, html, css);
|
|
}
|
|
|
|
int main(int argc, const char **argv)
|
|
{
|
|
if (init(argc, argv) < 0) // allows us to test with options (e.g. with --debug)
|
|
return -1;
|
|
test_parse_buffer();
|
|
test_parse_files();
|
|
|
|
deinit(); // free resources allocated by init()
|
|
|
|
if (failed) {
|
|
info_printf("Summary: %d out of %d tests failed\n", failed, ok + failed);
|
|
return 1;
|
|
}
|
|
|
|
info_printf("Summary: All %d tests passed\n", ok + failed);
|
|
return 0;
|
|
}
|