ap_regex: Use Thread Local Storage (if efficient) to avoid allocations.

PCRE2 wants an opaque context by providing the API to allocate and free it, so
to minimize these calls we maintain one opaque context per thread (in Thread
Local Storage, TLS) grown as needed, and while at it we do the same for PCRE1
ints vectors. Note that this requires a fast TLS mechanism to be worth it,
which is the case of apr_thread_data_get/set() from/to apr_thread_current()
when APR_HAS_THREAD_LOCAL; otherwise we'll do the allocation and freeing for
each ap_regexec().

The small stack vector is used for PCRE1 && !APR_HAS_THREAD_LOCAL only now.



git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1897240 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yann Ylavic
2022-01-20 11:09:34 +00:00
parent e0889457b5
commit 747df57e08
2 changed files with 174 additions and 47 deletions

View File

@ -338,6 +338,15 @@ static void reset_process_pconf(process_rec *process)
apr_pool_pre_cleanup_register(process->pconf, NULL, deregister_all_hooks); apr_pool_pre_cleanup_register(process->pconf, NULL, deregister_all_hooks);
} }
#if APR_HAS_THREAD_LOCAL
static apr_status_t main_thread_exit_cleanup(void *arg)
{
apr_thread_t *thd = arg;
apr_pool_destroy(apr_thread_pool_get(thd));
return APR_SUCCESS;
}
#endif
static process_rec *init_process(int *argc, const char * const * *argv) static process_rec *init_process(int *argc, const char * const * *argv)
{ {
process_rec *process; process_rec *process;
@ -388,6 +397,34 @@ static process_rec *init_process(int *argc, const char * const * *argv)
process->argc = *argc; process->argc = *argc;
process->argv = *argv; process->argv = *argv;
process->short_name = apr_filepath_name_get((*argv)[0]); process->short_name = apr_filepath_name_get((*argv)[0]);
#if APR_HAS_THREAD_LOCAL
/* Create an apr_thread_t for the main thread to set up its
* Thread Local Storage. Since it's detached and it won't
* apr_thread_exit(), destroy its pool before exiting via
* a process->pool cleanup
*/
{
apr_thread_t *main_thd;
apr_threadattr_t *main_thd_attr = NULL;
if (apr_threadattr_create(&main_thd_attr, process->pool)
|| apr_threadattr_detach_set(main_thd_attr, 1)
|| apr_thread_current_create(&main_thd, main_thd_attr,
process->pool)) {
char ctimebuff[APR_CTIME_LEN];
apr_ctime(ctimebuff, apr_time_now());
fprintf(stderr, "[%s] [crit] (%d) %s: %s failed "
"to initialize thread context, exiting\n",
ctimebuff, stat, (*argv)[0], failed);
apr_terminate();
exit(1);
}
apr_pool_cleanup_register(process->pool, main_thd,
main_thread_exit_cleanup,
apr_pool_cleanup_null);
}
#endif
return process; return process;
} }

View File

@ -53,6 +53,8 @@ POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "httpd.h" #include "httpd.h"
#include "apr_version.h"
#include "apr_portable.h"
#include "apr_strings.h" #include "apr_strings.h"
#include "apr_tables.h" #include "apr_tables.h"
@ -263,7 +265,122 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
* ints. However, if the number of possible capturing brackets is small, use a * ints. However, if the number of possible capturing brackets is small, use a
* block of store on the stack, to reduce the use of malloc/free. The threshold * block of store on the stack, to reduce the use of malloc/free. The threshold
* is in a macro that can be changed at configure time. * is in a macro that can be changed at configure time.
* Yet more unfortunately, PCRE2 wants an opaque context by providing the API
* to allocate and free it, so to minimize these calls we maintain one opaque
* context per thread (in Thread Local Storage, TLS) grown as needed, and while
* at it we do the same for PCRE1 ints vectors. Note that this requires a fast
* TLS mechanism to be worth it, which is the case of apr_thread_data_get/set()
* from/to apr_thread_current() when APR_HAS_THREAD_LOCAL; otherwise we'll do
* the allocation and freeing for each ap_regexec().
*/ */
#ifdef HAVE_PCRE2
typedef pcre2_match_data* match_data_pt;
typedef size_t* match_vector_pt;
#else
typedef int* match_data_pt;
typedef int* match_vector_pt;
#endif
#if APR_HAS_THREAD_LOCAL
static match_data_pt get_match_data(apr_size_t size,
match_vector_pt *ovector,
match_vector_pt small_vector)
{
apr_thread_t *current;
struct {
match_data_pt data;
apr_size_t size;
} *tls = NULL;
/* APR_HAS_THREAD_LOCAL garantees this works */
current = apr_thread_current();
ap_assert(current != NULL);
apr_thread_data_get((void **)&tls, "apreg", current);
if (!tls || tls->size < size) {
apr_pool_t *tp = apr_thread_pool_get(current);
if (tls) {
#ifdef HAVE_PCRE2
pcre2_match_data_free(tls->data); /* NULL safe */
#endif
}
else {
tls = apr_pcalloc(tp, sizeof(*tls));
apr_thread_data_set(tls, "apreg", NULL, current);
}
tls->size *= 2;
if (tls->size < size) {
tls->size = size;
if (tls->size < POSIX_MALLOC_THRESHOLD) {
tls->size = POSIX_MALLOC_THRESHOLD;
}
}
#ifdef HAVE_PCRE2
tls->data = pcre2_match_data_create(tls->size, NULL);
#else
tls->data = apr_palloc(tp, tls->size * sizeof(int) * 3);
#endif
if (!tls->data) {
tls->size = 0;
return NULL;
}
}
#ifdef HAVE_PCRE2
*ovector = pcre2_get_ovector_pointer(tls->data);
#else
*vector = tls->data;
#endif
return tls->data;
}
/* Nothing to put back with thread local */
static APR_INLINE void put_match_data(match_data_pt data,
apr_size_t size)
{ }
#else /* !APR_HAS_THREAD_LOCAL */
/* Always allocate/free without thread local */
static match_data_pt get_match_data(apr_size_t size,
match_vector_pt *ovector,
match_vector_pt small_vector)
{
match_data_pt data;
#ifdef HAVE_PCRE2
data = pcre2_match_data_create(size, NULL);
*ovector = pcre2_get_ovector_pointer(data);
#else
if (size > POSIX_MALLOC_THRESHOLD) {
data = malloc(size * sizeof(int) * 3);
}
else {
data = small_vector;
}
*ovector = data;
#endif
return data;
}
static APR_INLINE void put_match_data(match_data_pt data,
apr_size_t size)
{
#ifdef HAVE_PCRE2
pcre2_match_data_free(data);
#else
if (size > POSIX_MALLOC_THRESHOLD) {
free(data);
}
#endif
}
#endif /* !APR_HAS_THREAD_LOCAL */
AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string, AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
apr_size_t nmatch, ap_regmatch_t *pmatch, apr_size_t nmatch, ap_regmatch_t *pmatch,
int eflags) int eflags)
@ -278,16 +395,20 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
{ {
int rc; int rc;
int options = 0; int options = 0;
apr_size_t nlim; match_vector_pt ovector = NULL;
#ifdef HAVE_PCRE2 apr_size_t nlim = ((apr_size_t)preg->re_nsub + 1) > nmatch
pcre2_match_data *matchdata; ? ((apr_size_t)preg->re_nsub + 1) : nmatch;
size_t *ovector; #if defined(HAVE_PCRE2) || APR_HAS_THREAD_LOCAL
match_data_pt data = get_match_data(nlim, &ovector, NULL);
#else #else
int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; int small_vector[POSIX_MALLOC_THRESHOLD * 3];
int allocated_ovector = 0; match_data_pt data = get_match_data(nlim, &ovector, small_vector);
int *ovector = NULL;
#endif #endif
if (!data) {
return AP_REG_ESPACE;
}
if ((eflags & AP_REG_NOTBOL) != 0) if ((eflags & AP_REG_NOTBOL) != 0)
options |= PCREn(NOTBOL); options |= PCREn(NOTBOL);
if ((eflags & AP_REG_NOTEOL) != 0) if ((eflags & AP_REG_NOTEOL) != 0)
@ -298,61 +419,30 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
options |= PCREn(ANCHORED); options |= PCREn(ANCHORED);
#ifdef HAVE_PCRE2 #ifdef HAVE_PCRE2
/* TODO: create a generic TLS matchdata buffer of some nmatch limit,
* e.g. 10 matches, to avoid a malloc-per-call. If it must be allocated,
* implement a general context using palloc and no free implementation.
*/
nlim = ((apr_size_t)preg->re_nsub + 1) > nmatch
? ((apr_size_t)preg->re_nsub + 1) : nmatch;
matchdata = pcre2_match_data_create(nlim, NULL);
if (matchdata == NULL)
return AP_REG_ESPACE;
ovector = pcre2_get_ovector_pointer(matchdata);
rc = pcre2_match((const pcre2_code *)preg->re_pcre, rc = pcre2_match((const pcre2_code *)preg->re_pcre,
(const unsigned char *)buff, len, (const unsigned char *)buff, len,
0, options, matchdata, NULL); 0, options, data, NULL);
if (rc == 0)
rc = nlim; /* All captured slots were filled in */
#else #else
if (nmatch > 0) {
if (nmatch <= POSIX_MALLOC_THRESHOLD) {
ovector = &(small_ovector[0]);
}
else {
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
if (ovector == NULL)
return AP_REG_ESPACE;
allocated_ovector = 1;
}
}
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len, rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
0, options, ovector, nmatch * 3); 0, options, ovector, nlim * 3);
if (rc == 0)
rc = nmatch; /* All captured slots were filled in */
#endif #endif
if (rc >= 0) { if (rc >= 0) {
apr_size_t i; apr_size_t n, i;
nlim = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch; if (rc == 0)
for (i = 0; i < nlim; i++) { rc = nlim; /* All captured slots were filled in */
n = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch;
for (i = 0; i < n; i++) {
pmatch[i].rm_so = ovector[i * 2]; pmatch[i].rm_so = ovector[i * 2];
pmatch[i].rm_eo = ovector[i * 2 + 1]; pmatch[i].rm_eo = ovector[i * 2 + 1];
} }
for (; i < nmatch; i++) for (; i < nmatch; i++)
pmatch[i].rm_so = pmatch[i].rm_eo = -1; pmatch[i].rm_so = pmatch[i].rm_eo = -1;
} put_match_data(data, nlim);
#ifdef HAVE_PCRE2
pcre2_match_data_free(matchdata);
#else
if (allocated_ovector)
free(ovector);
#endif
if (rc >= 0) {
return 0; return 0;
} }
else { else {
put_match_data(data, nlim);
#ifdef HAVE_PCRE2 #ifdef HAVE_PCRE2
if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21) if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
return AP_REG_INVARG; return AP_REG_INVARG;