Files
wget2/examples/http_get2.c
2013-04-07 21:54:27 +02:00

164 lines
4.9 KiB
C

/*
* Copyright(c) 2013 Tim Ruehsen
*
* This file is part of libmget.
*
* Libmget is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Libmget is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with libmget. If not, see <http://www.gnu.org/licenses/>.
*
*
* Example for retrieving and parsing an HTTP URI
*
* Changelog
* 16.01.2013 Tim Ruehsen created
*
* Simple demonstration how to download an URI.
*
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdlib.h>
#include <libmget.h>
#define COOKIE_SUPPORT
int main(int argc G_GNUC_MGET_UNUSED, const char *const *argv G_GNUC_MGET_UNUSED)
{
MGET_IRI *uri;
MGET_HTTP_CONNECTION *conn = NULL;
MGET_HTTP_REQUEST *req;
/*
* todo: create a libmget init function like this:
mget_global_init(
MGET_DEBUG_FILE, stderr,
MGET_ERROR_FILE, stderr,
MGET_INFO_FILE, stdout,
MGET_DNS_CACHING, 1,
NULL);
*/
// We want the libmget debug messages be printed to STDERR.
// From here on, we can call mget_debug_printf, etc.
mget_logger_set_stream(mget_get_logger(MGET_LOGGER_DEBUG), stderr);
// We want the libmget error messages be printed to STDERR.
// From here on, we can call mget_error_printf, etc.
mget_logger_set_stream(mget_get_logger(MGET_LOGGER_ERROR), stderr);
// We want the libmget info messages be printed to STDOUT.
// From here on, we can call mget_info_printf, etc.
mget_logger_set_stream(mget_get_logger(MGET_LOGGER_INFO), stdout);
// 1. parse the URL into a URI
// if you want use a non-ascii (international) domain, the second
// parameter should be the character encoding of this file (e.g. "iso-8859-1")
uri = mget_iri_parse("http://www.example.org", NULL);
// 2. create a HTTP/1.1 GET request.
// the only default header is 'Host: www.example.com' (taken from uri)
req = http_create_request(uri, "GET");
// 3. add HTTP headers as you wish
http_add_header(req, "User-Agent", "TheUserAgent/0.5");
// libmget also supports gzip'ed or deflated response bodies
http_add_header_line(req, "Accept-Encoding: gzip, deflate\r\n");
http_add_header_line(req, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n");
http_add_header_line(req, "Accept-Language: en-us,en;q=0.5\r\n");
// use keep-alive if you want to send more requests on the same connection
// http_add_header_line(req, "Connection: keep-alive\r\n");
// you need cookie support ? just #define COOKIE_SUPPORT or remove the #ifdef/#endif
// 'keep_session_cookies' should be 0 or 1
#ifdef COOKIE_SUPPORT
int keep_session_cookies = 1;
const char *cookie_string;
// load public suffixes for cookie validation
mget_cookie_load_public_suffixes("public_suffixes.txt");
// load cookie-store
mget_cookie_load("cookies.txt", keep_session_cookies);
// enrich the HTTP request with the uri-related cookies we have
if ((cookie_string = mget_cookie_create_request_header(uri))) {
http_add_header(req, "Cookie", cookie_string);
free((void *)cookie_string);
}
#endif
// 4. establish connection to the host/port given by uri
// well, we could have done this directly after mget_iri_parse(), since
// http_open() works semi-async and returns immediately after domain name lookup.
conn = http_open(uri);
MGET_HTTP_RESPONSE *resp;
if (conn) {
if (http_send_request(conn, req) == 0) {
resp = http_get_response(conn, NULL, req, MGET_HTTP_RESPONSE_KEEPHEADER);
if (!resp)
goto out;
// server doesn't support or want keep-alive
if (!resp->keep_alive)
http_close(&conn);
#ifdef COOKIE_SUPPORT
// check and normalization of received cookies
mget_cookie_normalize_cookies(uri, resp->cookies);
// put cookies into cookie-store (also known as cookie-jar)
mget_cookie_store_cookies(resp->cookies);
// save cookie-store to file
mget_cookie_save("cookies.txt", keep_session_cookies);
#endif
// let's assume the body isn't binary (doesn't contain \0)
mget_info_printf("%s%s\n", resp->header->data, resp->body->data);
http_free_response(&resp);
}
}
/*
* todo: create this kind of high-level function:
resp = http_get("http://example.com",
HTTP_SERVER_PORT, 8000,
HTTP_URL_CHARACTERSET, "iso-8859-1",
HTTP_COOKIE_STORE, "cookies.txt",
HTTP_COOKIE_KEEPSESSIONCOOKIES, 1,
HTTP_ADD_HEADER, "Accept-Encoding: gzip, deflate",
HTTP_USE_PROXY, "myproxy.com:9375",
NULL);
*/
out:
#ifdef COOKIE_SUPPORT
mget_cookie_free_public_suffixes();
mget_cookie_free_cookies();
#endif
http_close(&conn);
http_free_request(&req);
mget_iri_free(&uri);
return 0;
}