Replace PCRE with PCRE2 where it is available.

This patch removes the needless assignment of re_erroffset in the conf pool
by the worker threads; such mistakes break the shared copy-on-write pages of
memory that should have remained common between all httpd worker processes.

Two de-optimizations are inherent in this patch, the former ovector-on-stack
opportunity is lost unless implemented as a new general context. Safer that
we either create a new general context using pool allocation, or recycle a
per pool or per thread match_data buffer of some arbitrary 10 elts or so.

Submitted by: wrowe, Petr Pisar <ppisar@redhat.com>


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1773454 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
William A. Rowe Jr
2016-12-09 19:06:06 +00:00
parent ff7722bc9a
commit 12cfcf08ff
2 changed files with 128 additions and 54 deletions

View File

@ -223,18 +223,18 @@ fi
AC_ARG_WITH(pcre, AC_ARG_WITH(pcre,
APACHE_HELP_STRING(--with-pcre=PATH,Use external PCRE library)) APACHE_HELP_STRING(--with-pcre=PATH,Use external PCRE library))
AC_PATH_PROG(PCRE_CONFIG, pcre-config, false) AC_CHECK_TARGET_TOOLS(PCRE_CONFIG, [pcre2-config pcre-config],
if test -d "$with_pcre" && test -x "$with_pcre/bin/pcre-config"; then [`which $with_pcre 2>/dev/null`],
PCRE_CONFIG=$with_pcre/bin/pcre-config [$with_pcre/bin:$with_pcre])
elif test -x "$with_pcre"; then
PCRE_CONFIG=$with_pcre
fi
if test "$PCRE_CONFIG" != "false"; then if test "x$PCRE_CONFIG" != "x"; then
if $PCRE_CONFIG --version >/dev/null 2>&1; then :; else if $PCRE_CONFIG --version >/dev/null 2>&1; then :; else
AC_MSG_ERROR([Did not find pcre-config script at $PCRE_CONFIG]) AC_MSG_ERROR([Did not find working script at $PCRE_CONFIG])
fi fi
case `$PCRE_CONFIG --version` in case `$PCRE_CONFIG --version` in
[1[0-9].*])
AC_DEFINE(HAVE_PCRE2, 1, [Detected PCRE2])
;;
[[1-5].*]) [[1-5].*])
AC_MSG_ERROR([Need at least pcre version 6.7]) AC_MSG_ERROR([Need at least pcre version 6.7])
;; ;;
@ -244,10 +244,10 @@ if test "$PCRE_CONFIG" != "false"; then
esac esac
AC_MSG_NOTICE([Using external PCRE library from $PCRE_CONFIG]) AC_MSG_NOTICE([Using external PCRE library from $PCRE_CONFIG])
APR_ADDTO(PCRE_INCLUDES, [`$PCRE_CONFIG --cflags`]) APR_ADDTO(PCRE_INCLUDES, [`$PCRE_CONFIG --cflags`])
APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs`]) APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs8 2>/dev/null || $PCRE_CONFIG --libs`])
APR_ADDTO(HTTPD_LIBS, [\$(PCRE_LIBS)]) APR_ADDTO(HTTPD_LIBS, [\$(PCRE_LIBS)])
else else
AC_MSG_ERROR([pcre-config for libpcre not found. PCRE is required and available from http://pcre.org/]) AC_MSG_ERROR([pcre(2)-config for libpcre not found. PCRE is required and available from http://pcre.org/])
fi fi
APACHE_SUBST(PCRE_LIBS) APACHE_SUBST(PCRE_LIBS)

View File

@ -46,10 +46,18 @@ POSSIBILITY OF SUCH DAMAGE.
#include "httpd.h" #include "httpd.h"
#include "apr_strings.h" #include "apr_strings.h"
#include "apr_tables.h" #include "apr_tables.h"
#ifdef HAVE_PCRE2
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#define PCREn(x) PCRE2_ ## x
#else
#include "pcre.h" #include "pcre.h"
#define PCREn(x) PCRE_ ## x
#endif
/* PCRE_DUPNAMES is only present since version 6.7 of PCRE */ /* PCRE_DUPNAMES is only present since version 6.7 of PCRE */
#ifndef PCRE_DUPNAMES #if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2)
#error PCRE Version 6.7 or later required! #error PCRE Version 6.7 or later required!
#else #else
@ -74,11 +82,19 @@ static const char *const pstring[] = {
AP_DECLARE(const char *) ap_pcre_version_string(int which) AP_DECLARE(const char *) ap_pcre_version_string(int which)
{ {
#ifdef HAVE_PCRE2
static char buf[80];
#endif
switch (which) { switch (which) {
case AP_REG_PCRE_COMPILED: case AP_REG_PCRE_COMPILED:
return APR_STRINGIFY(PCRE_MAJOR) "." APR_STRINGIFY(PCRE_MINOR) " " APR_STRINGIFY(PCRE_DATE); return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE));
case AP_REG_PCRE_LOADED: case AP_REG_PCRE_LOADED:
#ifdef HAVE_PCRE2
pcre2_config(PCRE2_CONFIG_VERSION, buf);
return buf;
#else
return pcre_version(); return pcre_version();
#endif
default: default:
return "Unknown"; return "Unknown";
} }
@ -118,7 +134,11 @@ AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg,
AP_DECLARE(void) ap_regfree(ap_regex_t *preg) AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
{ {
#ifdef HAVE_PCRE2
pcre2_code_free(preg->re_pcre);
#else
(pcre_free)(preg->re_pcre); (pcre_free)(preg->re_pcre);
#endif
} }
@ -139,34 +159,48 @@ AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
*/ */
AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags) AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
{ {
#ifdef HAVE_PCRE2
uint32_t capcount;
size_t erroffset;
#else
const char *errorptr; const char *errorptr;
int erroffset; int erroffset;
#endif
int errcode = 0; int errcode = 0;
int options = PCRE_DUPNAMES; int options = PCREn(DUPNAMES);
if ((cflags & AP_REG_ICASE) != 0) if ((cflags & AP_REG_ICASE) != 0)
options |= PCRE_CASELESS; options |= PCREn(CASELESS);
if ((cflags & AP_REG_NEWLINE) != 0) if ((cflags & AP_REG_NEWLINE) != 0)
options |= PCRE_MULTILINE; options |= PCREn(MULTILINE);
if ((cflags & AP_REG_DOTALL) != 0) if ((cflags & AP_REG_DOTALL) != 0)
options |= PCRE_DOTALL; options |= PCREn(DOTALL);
#ifdef HAVE_PCRE2
preg->re_pcre = pcre2_compile((const unsigned char *)pattern,
PCRE2_ZERO_TERMINATED, options, &errcode,
&erroffset, NULL);
#else
preg->re_pcre = pcre_compile2(pattern, options, &errcode,
&errorptr, &erroffset, NULL);
#endif
preg->re_pcre =
pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL);
preg->re_erroffset = erroffset; preg->re_erroffset = erroffset;
if (preg->re_pcre == NULL) { if (preg->re_pcre == NULL) {
/* /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */
* There doesn't seem to be constants defined for compile time error
* codes. 21 is "failed to get memory" according to pcreapi(3).
*/
if (errcode == 21) if (errcode == 21)
return AP_REG_ESPACE; return AP_REG_ESPACE;
return AP_REG_INVARG; return AP_REG_INVARG;
} }
#ifdef HAVE_PCRE2
pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
PCRE2_INFO_CAPTURECOUNT, &capcount);
preg->re_nsub = capcount;
#else
pcre_fullinfo((const pcre *)preg->re_pcre, NULL, pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
#endif
return 0; return 0;
} }
@ -198,17 +232,33 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
{ {
int rc; int rc;
int options = 0; int options = 0;
int *ovector = NULL; #ifdef HAVE_PCRE2
pcre2_match_data *matchdata;
size_t *ovector;
#else
int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
int allocated_ovector = 0; int allocated_ovector = 0;
int *ovector = NULL;
#endif
if ((eflags & AP_REG_NOTBOL) != 0) if ((eflags & AP_REG_NOTBOL) != 0)
options |= PCRE_NOTBOL; options |= PCREn(NOTBOL);
if ((eflags & AP_REG_NOTEOL) != 0) if ((eflags & AP_REG_NOTEOL) != 0)
options |= PCRE_NOTEOL; options |= PCREn(NOTEOL);
((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */
#ifdef HAVE_PCRE2
/* TODO: create a generic TLS matchdata buffer of some nmatch limit,
* e.g. 10 matches, to avoid a malloc-per-call. If it must be alloced,
* implement a general context using palloc and no free implementation.
*/
matchdata = pcre2_match_data_create(nmatch, NULL);
if (matchdata == NULL)
return AP_REG_ESPACE;
ovector = pcre2_get_ovector_pointer(matchdata);
rc = pcre2_match((const pcre2_code *)preg->re_pcre,
(const unsigned char *)buff, len,
0, options, matchdata, NULL);
#else
if (nmatch > 0) { if (nmatch > 0) {
if (nmatch <= POSIX_MALLOC_THRESHOLD) { if (nmatch <= POSIX_MALLOC_THRESHOLD) {
ovector = &(small_ovector[0]); ovector = &(small_ovector[0]);
@ -220,52 +270,64 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
allocated_ovector = 1; allocated_ovector = 1;
} }
} }
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len, rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
0, options, ovector, nmatch * 3); 0, options, ovector, nmatch * 3);
#endif
if (rc == 0) if (rc == 0)
rc = nmatch; /* All captured slots were filled in */ rc = nmatch; /* All captured slots were filled in */
if (rc >= 0) { if (rc >= 0) {
apr_size_t i; apr_size_t i;
for (i = 0; i < (apr_size_t)rc; i++) { apr_size_t nlim = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch;
for (i = 0; i < nlim; i++) {
pmatch[i].rm_so = ovector[i * 2]; pmatch[i].rm_so = ovector[i * 2];
pmatch[i].rm_eo = ovector[i * 2 + 1]; pmatch[i].rm_eo = ovector[i * 2 + 1];
} }
if (allocated_ovector)
free(ovector);
for (; i < nmatch; i++) for (; i < nmatch; i++)
pmatch[i].rm_so = pmatch[i].rm_eo = -1; pmatch[i].rm_so = pmatch[i].rm_eo = -1;
return 0;
} }
else { #ifdef HAVE_PCRE2
pcre2_match_data_free(matchdata);
#else
if (allocated_ovector) if (allocated_ovector)
free(ovector); free(ovector);
#endif
if (rc >= 0) {
return 0;
}
else {
#ifdef HAVE_PCRE2
if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
return AP_REG_INVARG;
#endif
switch (rc) { switch (rc) {
case PCRE_ERROR_NOMATCH: case PCREn(ERROR_NOMATCH):
return AP_REG_NOMATCH; return AP_REG_NOMATCH;
case PCRE_ERROR_NULL: case PCREn(ERROR_NULL):
return AP_REG_INVARG; return AP_REG_INVARG;
case PCRE_ERROR_BADOPTION: case PCREn(ERROR_BADOPTION):
return AP_REG_INVARG; return AP_REG_INVARG;
case PCRE_ERROR_BADMAGIC: case PCREn(ERROR_BADMAGIC):
return AP_REG_INVARG; return AP_REG_INVARG;
case PCREn(ERROR_NOMEMORY):
return AP_REG_ESPACE;
#if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT)
case PCREn(ERROR_MATCHLIMIT):
return AP_REG_ESPACE;
#endif
#if defined(PCRE_ERROR_UNKNOWN_NODE)
case PCRE_ERROR_UNKNOWN_NODE: case PCRE_ERROR_UNKNOWN_NODE:
return AP_REG_ASSERT; return AP_REG_ASSERT;
case PCRE_ERROR_NOMEMORY:
return AP_REG_ESPACE;
#ifdef PCRE_ERROR_MATCHLIMIT
case PCRE_ERROR_MATCHLIMIT:
return AP_REG_ESPACE;
#endif #endif
#ifdef PCRE_ERROR_BADUTF8 #if defined(PCRE_ERROR_BADUTF8)
case PCRE_ERROR_BADUTF8: case PCREn(ERROR_BADUTF8):
return AP_REG_INVARG; return AP_REG_INVARG;
#endif #endif
#ifdef PCRE_ERROR_BADUTF8_OFFSET #if defined(PCRE_ERROR_BADUTF8_OFFSET)
case PCRE_ERROR_BADUTF8_OFFSET: case PCREn(ERROR_BADUTF8_OFFSET):
return AP_REG_INVARG; return AP_REG_INVARG;
#endif #endif
default: default:
@ -278,17 +340,29 @@ AP_DECLARE(int) ap_regname(const ap_regex_t *preg,
apr_array_header_t *names, const char *prefix, apr_array_header_t *names, const char *prefix,
int upper) int upper)
{ {
char *nametable;
#ifdef HAVE_PCRE2
uint32_t namecount;
uint32_t nameentrysize;
uint32_t i;
pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
PCRE2_INFO_NAMECOUNT, &namecount);
pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize);
pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
PCRE2_INFO_NAMETABLE, &nametable);
#else
int namecount; int namecount;
int nameentrysize; int nameentrysize;
int i; int i;
char *nametable;
pcre_fullinfo((const pcre *)preg->re_pcre, NULL, pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
PCRE_INFO_NAMECOUNT, &namecount); PCRE_INFO_NAMECOUNT, &namecount);
pcre_fullinfo((const pcre *)preg->re_pcre, NULL, pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
pcre_fullinfo((const pcre *)preg->re_pcre, NULL, pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
PCRE_INFO_NAMETABLE, &nametable); PCRE_INFO_NAMETABLE, &nametable);
#endif
for (i = 0; i < namecount; i++) { for (i = 0; i < namecount; i++) {
const char *offset = nametable + i * nameentrysize; const char *offset = nametable + i * nameentrysize;