ap_expr: open string expressions to the <word>.

Introduces the syntax "%{:<word>:}", borrowed from the <var>'s one, and which
likewise can be embedded anywhere in a string expression (the same reserved
character ':' gets reused in an unambiguous manner).

This allows the two types of expressions (boolean and string) to now share
fully the same language set, namely: strings, lists, vars, regexes, backrefs,
functions with multiple or complex arguments, and especially combinations
thereof.

Most of them were reserved to boolean expressions only, while complex string
constructions can also benefit to, well, strings. The <word> construct allows
that (say the syntax "%{:<word>:}" looks like a temporary variable constructed
in a string).

Since string expressions may now have to deal with lists (arrays), they also
need a way to produce/extract strings from list and vice versa. This can be
done with the new "join" and "split" operators, while the new substitution
regexes (like "s/<pattern>/<substitute>/<flags>") may be used to manipulate
strings in place. All this of course available for both string and boolean
expressions.

Tests and doc updates upcoming..



git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1810605 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yann Ylavic
2017-10-02 21:57:26 +00:00
parent 5e90ede647
commit 08ddf7ad0a
9 changed files with 2117 additions and 1238 deletions

View File

@ -27,6 +27,7 @@
#include "util_varbuf.h"
#include "util_expr_private.h"
#include "util_md5.h"
#include "util_varbuf.h"
#include "apr_lib.h"
#include "apr_fnmatch.h"
@ -54,6 +55,8 @@ AP_IMPLEMENT_HOOK_RUN_FIRST(int, expr_lookup, (ap_expr_lookup_parms *parms),
#define LOG_MARK(info) __FILE__, __LINE__, (info)->module_index
static int ap_expr_eval_cond(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node);
static const char *ap_expr_eval_string_func(ap_expr_eval_ctx_t *ctx,
const ap_expr_t *info,
const ap_expr_t *args);
@ -63,6 +66,19 @@ static const char *ap_expr_eval_var(ap_expr_eval_ctx_t *ctx,
ap_expr_var_func_t *func,
const void *data);
typedef struct {
int type, flags;
const ap_expr_t *subst;
} ap_expr_regctx_t;
static const char *ap_expr_regexec(const char *subject,
const ap_expr_t *reg,
apr_array_header_t *list,
ap_expr_eval_ctx_t *ctx);
static apr_array_header_t *ap_expr_list_make(ap_expr_eval_ctx_t *ctx,
const ap_expr_t *node);
/* define AP_EXPR_DEBUG to log the parse tree when parsing an expression */
#ifdef AP_EXPR_DEBUG
static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
@ -71,7 +87,7 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
/*
* To reduce counting overhead, we only count calls to
* ap_expr_eval_word() and ap_expr_eval(). The max number of
* ap_expr_eval_word() and ap_expr_eval_cond(). The max number of
* stack frames is larger by some factor.
*/
#define AP_EXPR_MAX_RECURSION 20
@ -87,6 +103,37 @@ static int inc_rec(ap_expr_eval_ctx_t *ctx)
return 1;
}
static const char *ap_expr_list_pstrcat(apr_pool_t *p,
const apr_array_header_t *list,
const char *sep)
{
if (list->nelts <= 0) {
return NULL;
}
else if (list->nelts == 1) {
return APR_ARRAY_IDX(list, 0, const char*);
}
else {
struct ap_varbuf vb;
int n = list->nelts - 1, i;
apr_size_t slen = strlen(sep), vlen;
const char *val;
ap_varbuf_init(p, &vb, 0);
for (i = 0; i < n; ++i) {
val = APR_ARRAY_IDX(list, i, const char*);
vlen = strlen(val);
ap_varbuf_grow(&vb, vlen + slen + 1);
ap_varbuf_strmemcat(&vb, val, vlen);
ap_varbuf_strmemcat(&vb, sep, slen);
}
val = APR_ARRAY_IDX(list, n, const char*);
ap_varbuf_strmemcat(&vb, val, strlen(val));
return vb.buf;
}
}
static const char *ap_expr_eval_word(ap_expr_eval_ctx_t *ctx,
const ap_expr_t *node)
{
@ -98,6 +145,12 @@ static const char *ap_expr_eval_word(ap_expr_eval_ctx_t *ctx,
case op_String:
result = node->node_arg1;
break;
case op_Word:
result = ap_expr_eval_word(ctx, node->node_arg1);
break;
case op_Bool:
result = ap_expr_eval_cond(ctx, node->node_arg1) ? "true" : "false";
break;
case op_Var:
result = ap_expr_eval_var(ctx, (ap_expr_var_func_t *)node->node_arg1,
node->node_arg2);
@ -168,7 +221,20 @@ static const char *ap_expr_eval_word(ap_expr_eval_ctx_t *ctx,
result = ap_expr_eval_string_func(ctx, info, args);
break;
}
case op_RegexBackref: {
case op_Join: {
const char *sep;
apr_array_header_t *list = ap_expr_list_make(ctx, node->node_arg1);
sep = node->node_arg2 ? ap_expr_eval_word(ctx, node->node_arg2) : "";
result = ap_expr_list_pstrcat(ctx->p, list, sep);
break;
}
case op_Regsub: {
const ap_expr_t *reg = node->node_arg2;
const char *subject = ap_expr_eval_word(ctx, node->node_arg1);
result = ap_expr_regexec(subject, reg, NULL, ctx);
break;
}
case op_Regref: {
const unsigned int *np = node->node_arg1;
result = ap_expr_eval_re_backref(ctx, *np);
break;
@ -219,15 +285,7 @@ static const char *ap_expr_eval_string_func(ap_expr_eval_ctx_t *ctx,
if (arg->node_op == op_ListElement) {
/* Evaluate the list elements and store them in apr_array_header. */
ap_expr_string_list_func_t *func = (ap_expr_string_list_func_t *)info->node_arg1;
apr_array_header_t *args = apr_array_make(ctx->p, 2, sizeof(char *));
do {
const ap_expr_t *val = arg->node_arg1;
const char **new = apr_array_push(args);
*new = ap_expr_eval_word(ctx, val);
arg = arg->node_arg2;
} while (arg != NULL);
apr_array_header_t *args = ap_expr_list_make(ctx, arg->node_arg1);
return (*func)(ctx, data, args);
}
else {
@ -249,6 +307,170 @@ static int intstrcmp(const char *s1, const char *s2)
return 1;
}
static const char *ap_expr_regexec(const char *subject,
const ap_expr_t *reg,
apr_array_header_t *list,
ap_expr_eval_ctx_t *ctx)
{
struct ap_varbuf vb;
const char *val = subject;
const ap_regex_t *regex = reg->node_arg1;
const ap_expr_regctx_t *regctx = reg->node_arg2;
ap_regmatch_t *pmatch = NULL, match0;
apr_size_t nmatch = 0;
const char *str = "";
apr_size_t len = 0;
int empty = 0, rv;
ap_varbuf_init(ctx->p, &vb, 0);
if (ctx->re_nmatch > 0) {
nmatch = ctx->re_nmatch;
pmatch = ctx->re_pmatch;
}
else if (regctx->type != 'm') {
nmatch = 1;
pmatch = &match0;
}
do {
/* If previous match was empty, we can't issue the exact same one or
* we'd loop indefinitively. So let's instead ask for an anchored and
* non-empty match (i.e. something not empty at the start of the value)
* and if nothing is found advance by one character below.
*/
rv = ap_regexec(regex, val, nmatch, pmatch,
empty ? AP_REG_ANCHORED | AP_REG_NOTEMPTY : 0);
if (regctx->type == 'm') {
/* Simple match "m//", just return whether it matched (subject)
* or not (NULL)
*/
return (rv == 0) ? subject : NULL;
}
if (rv == 0) {
/* Substitution "s//" or split "S//" matched.
* s// => replace $0 with evaluated regctx->subst
* S// => split at $0 (keeping evaluated regctx->subst if any)
*/
int pos = pmatch[0].rm_so,
end = pmatch[0].rm_eo;
AP_DEBUG_ASSERT(pos >= 0 && pos <= end);
if (regctx->subst) {
*ctx->re_source = val;
str = ap_expr_eval_word(ctx, regctx->subst);
len = strlen(str);
}
/* Splitting makes sense into a given list only, if NULL we fall
* back into returning a s// string...
*/
if (list) {
char *tmp = apr_palloc(ctx->p, pos + len + 1);
memcpy(tmp, val, pos);
memcpy(tmp + pos, str, len);
tmp[pos + len] = '\0';
APR_ARRAY_PUSH(list, const char*) = tmp;
}
else { /* regctx->type == 's' */
ap_varbuf_grow(&vb, pos + len + 1);
ap_varbuf_strmemcat(&vb, val, pos);
ap_varbuf_strmemcat(&vb, str, len);
if (!(regctx->flags & AP_REG_MULTI)) {
/* Single substitution, preserve remaining data */
ap_varbuf_strmemcat(&vb, val + end, strlen(val) - end);
break;
}
}
/* Note an empty match */
empty = (end == 0);
val += end;
}
else if (empty) {
/* Skip this non-matching character (or CRLF) and restart
* another "normal" match (possibly empty) from there.
*/
if (val[0] == APR_ASCII_CR && val[1] == APR_ASCII_LF) {
val += 2;
}
else {
val++;
}
empty = 0;
}
else {
if (list) {
APR_ARRAY_PUSH(list, const char*) = val;
}
else if (vb.avail) {
ap_varbuf_strmemcat(&vb, val, strlen(val));
}
else {
return val;
}
break;
}
} while (*val);
return vb.buf;
}
static apr_array_header_t *ap_expr_list_make(ap_expr_eval_ctx_t *ctx,
const ap_expr_t *node)
{
apr_array_header_t *list = NULL;
if (node->node_op == op_ListRegex) {
const ap_expr_t *arg = node->node_arg1;
const ap_expr_t *reg = node->node_arg2;
const ap_expr_regctx_t *regctx = reg->node_arg2;
const apr_array_header_t *source = ap_expr_list_make(ctx, arg);
int i;
list = apr_array_make(ctx->p, source->nelts, sizeof(const char*));
for (i = 0; i < source->nelts; ++i) {
const char *val = APR_ARRAY_IDX(source, i, const char*);
if (regctx->type == 'S') {
(void)ap_expr_regexec(val, reg, list, ctx);
}
else {
val = ap_expr_regexec(val, reg, NULL, ctx);
if (val) {
APR_ARRAY_PUSH(list, const char*) = val;
}
}
}
}
else if (node->node_op == op_ListElement) {
int n = 0;
const ap_expr_t *elem;
for (elem = node; elem; elem = elem->node_arg2) {
AP_DEBUG_ASSERT(elem->node_op == op_ListElement);
n++;
}
list = apr_array_make(ctx->p, n, sizeof(const char*));
for (elem = node; elem; elem = elem->node_arg2) {
APR_ARRAY_PUSH(list, const char*) =
ap_expr_eval_word(ctx, elem->node_arg1);
}
}
else if (node->node_op == op_ListFuncCall) {
const ap_expr_t *info = node->node_arg1;
ap_expr_list_func_t *func = info->node_arg1;
AP_DEBUG_ASSERT(func != NULL);
AP_DEBUG_ASSERT(info->node_op == op_ListFuncInfo);
list = (*func)(ctx, info->node_arg2,
ap_expr_eval_word(ctx, node->node_arg2));
}
else {
const char *subject = ap_expr_eval_word(ctx, node);
list = apr_array_make(ctx->p, 8, sizeof(const char*));
(void)ap_expr_regexec(subject, node->node_arg2, list, ctx);
}
return list;
}
static int ap_expr_eval_comp(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
{
const ap_expr_t *e1 = node->node_arg1;
@ -279,30 +501,17 @@ static int ap_expr_eval_comp(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
case op_STR_GE:
return (strcmp(ap_expr_eval_word(ctx, e1), ap_expr_eval_word(ctx, e2)) >= 0);
case op_IN: {
const char *needle = ap_expr_eval_word(ctx, e1);
if (e2->node_op == op_ListElement) {
do {
const ap_expr_t *val = e2->node_arg1;
AP_DEBUG_ASSERT(e2->node_op == op_ListElement);
if (strcmp(needle, ap_expr_eval_word(ctx, val)) == 0)
int n;
const char *needle, *subject;
apr_array_header_t *haystack;
haystack = ap_expr_list_make(ctx, e2);
if (haystack) {
needle = ap_expr_eval_word(ctx, e1);
for (n = 0; n < haystack->nelts; ++n) {
subject = APR_ARRAY_IDX(haystack, n, const char*);
if (strcmp(needle, subject) == 0) {
return 1;
e2 = e2->node_arg2;
} while (e2 != NULL);
}
else if (e2->node_op == op_ListFuncCall) {
const ap_expr_t *info = e2->node_arg1;
const ap_expr_t *arg = e2->node_arg2;
ap_expr_list_func_t *func = (ap_expr_list_func_t *)info->node_arg1;
apr_array_header_t *haystack;
AP_DEBUG_ASSERT(func != NULL);
AP_DEBUG_ASSERT(info->node_op == op_ListFuncInfo);
haystack = (*func)(ctx, info->node_arg2, ap_expr_eval_word(ctx, arg));
if (haystack == NULL) {
return 0;
}
if (ap_array_str_contains(haystack, needle)) {
return 1;
}
}
}
return 0;
@ -326,10 +535,7 @@ static int ap_expr_eval_comp(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
result = (0 == ap_regexec(regex, word, 0, NULL, 0));
}
if (node->node_op == op_REG)
return result;
else
return !result;
return result ^ (node->node_op == op_NRE);
}
default:
*ctx->err = "Internal evaluation error: Unknown comp expression node";
@ -401,18 +607,13 @@ AP_DECLARE(const char *) ap_expr_parse(apr_pool_t *pool, apr_pool_t *ptemp,
ap_expr_parse_ctx_t ctx;
int rc;
memset(&ctx, 0, sizeof ctx);
ctx.pool = pool;
ctx.ptemp = ptemp;
ctx.inputbuf = expr;
ctx.inputlen = strlen(expr);
ctx.inputptr = ctx.inputbuf;
ctx.expr = NULL;
ctx.error = NULL; /* generic bison error message (XXX: usually not very useful, should be axed) */
ctx.error2 = NULL; /* additional error message */
ctx.flags = info->flags;
ctx.scan_del = '\0';
ctx.scan_buf[0] = '\0';
ctx.scan_ptr = ctx.scan_buf;
ctx.lookup_fn = lookup_fn ? lookup_fn : ap_expr_lookup_default;
ctx.at_start = 1;
@ -420,6 +621,11 @@ AP_DECLARE(const char *) ap_expr_parse(apr_pool_t *pool, apr_pool_t *ptemp,
ap_expr_yyset_extra(&ctx, ctx.scanner);
rc = ap_expr_yyparse(&ctx);
ap_expr_yylex_destroy(ctx.scanner);
/* ctx.error: the generic bison error message
* (XXX: usually not very useful, should be axed)
* ctx.error2: an additional error message
*/
if (ctx.error) {
if (ctx.error2)
return apr_psprintf(pool, "%s: %s", ctx.error, ctx.error2);
@ -464,7 +670,7 @@ AP_DECLARE(ap_expr_info_t*) ap_expr_parse_cmd_mi(const cmd_parms *cmd,
}
ap_expr_t *ap_expr_make(ap_expr_node_op_e op, const void *a1, const void *a2,
ap_expr_parse_ctx_t *ctx)
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op;
@ -473,6 +679,100 @@ ap_expr_t *ap_expr_make(ap_expr_node_op_e op, const void *a1, const void *a2,
return node;
}
ap_expr_t *ap_expr_concat_make(const void *a1, const void *a2,
ap_expr_parse_ctx_t *ctx)
{
const ap_expr_t *node;
/* Optimize out empty string(s) concatenation */
if ((node = a1)
&& node->node_op == op_String
&& !*(const char *)node->node_arg1) {
return (ap_expr_t *)a2;
}
if ((node = a2)
&& node->node_op == op_String
&& !*(const char *)node->node_arg1) {
return (ap_expr_t *)a1;
}
return ap_expr_make(op_Concat, a1, a2, ctx);
}
ap_expr_t *ap_expr_str_word_make(const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op_Word;
node->node_arg1 = arg;
node->node_arg2 = NULL;
return node;
}
ap_expr_t *ap_expr_str_bool_make(const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op_Bool;
node->node_arg1 = arg;
node->node_arg2 = NULL;
return node;
}
ap_expr_t *ap_expr_regex_make(const char *pattern, const char *flags,
const ap_expr_t *subst, int split,
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = NULL;
ap_expr_regctx_t *regctx;
ap_regex_t *regex;
regctx = apr_palloc(ctx->pool, sizeof *regctx);
regctx->subst = subst;
regctx->flags = 0;
if (flags) {
for (; *flags; ++flags) {
switch (*flags) {
case 'i':
regctx->flags |= AP_REG_ICASE;
break;
case 'm':
regctx->flags |= AP_REG_NEWLINE;
break;
case 's':
regctx->flags |= AP_REG_DOTALL;
break;
case 'g':
regctx->flags |= AP_REG_MULTI;
break;
}
}
}
if (subst) {
if (split) {
regctx->type = 'S';
regctx->flags |= AP_REG_MULTI;
}
else {
regctx->type = 's';
}
}
else {
regctx->type = 'm';
}
regex = ap_pregcomp(ctx->pool, pattern, regctx->flags);
if (!regex) {
return NULL;
}
node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op_Regex;
node->node_arg1 = regex;
node->node_arg2 = regctx;
return node;
}
static ap_expr_t *ap_expr_info_make(int type, const char *name,
ap_expr_parse_ctx_t *ctx,
const ap_expr_t *arg)
@ -533,6 +833,16 @@ ap_expr_t *ap_expr_list_func_make(const char *name, const ap_expr_t *arg,
return ap_expr_make(op_ListFuncCall, info, arg, ctx);
}
ap_expr_t *ap_expr_list_regex_make(const ap_expr_t *arg, const ap_expr_t *reg,
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op_ListRegex;
node->node_arg1 = arg;
node->node_arg2 = reg;
return node;
}
ap_expr_t *ap_expr_unary_op_make(const char *name, const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx)
{
@ -654,10 +964,15 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
case op_IN:
case op_REG:
case op_NRE:
case op_Word:
case op_Bool:
case op_Join:
case op_Regsub:
case op_Concat:
case op_StringFuncCall:
case op_ListFuncCall:
case op_ListElement:
case op_ListRegex:
{
char *name;
switch (e->node_op) {
@ -680,10 +995,15 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
CASE_OP(op_IN);
CASE_OP(op_REG);
CASE_OP(op_NRE);
CASE_OP(op_Word);
CASE_OP(op_Bool);
CASE_OP(op_Join);
CASE_OP(op_Regsub);
CASE_OP(op_Concat);
CASE_OP(op_StringFuncCall);
CASE_OP(op_ListFuncCall);
CASE_OP(op_ListElement);
CASE_OP(op_ListRegex);
default:
ap_assert(0);
}
@ -729,8 +1049,8 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
DUMP_P("op_Regex", e->node_arg1);
break;
/* arg1: pointer to int */
case op_RegexBackref:
DUMP_IP("op_RegexBackref", e->node_arg1);
case op_Regref:
DUMP_IP("op_Regref", e->node_arg1);
break;
default:
ap_log_error(MARK, "%*sERROR: INVALID OP %d", indent, " ", e->node_op);
@ -769,7 +1089,7 @@ static int ap_expr_eval_binary_op(ap_expr_eval_ctx_t *ctx,
}
static int ap_expr_eval(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
static int ap_expr_eval_cond(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
{
const ap_expr_t *e1 = node->node_arg1;
const ap_expr_t *e2 = node->node_arg2;
@ -791,13 +1111,13 @@ static int ap_expr_eval(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
case op_Or:
do {
if (e1->node_op == op_Not) {
if (!ap_expr_eval(ctx, e1->node_arg1)) {
if (!ap_expr_eval_cond(ctx, e1->node_arg1)) {
result ^= TRUE;
goto out;
}
}
else {
if (ap_expr_eval(ctx, e1)) {
if (ap_expr_eval_cond(ctx, e1)) {
result ^= TRUE;
goto out;
}
@ -809,13 +1129,13 @@ static int ap_expr_eval(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
case op_And:
do {
if (e1->node_op == op_Not) {
if (ap_expr_eval(ctx, e1->node_arg1)) {
if (ap_expr_eval_cond(ctx, e1->node_arg1)) {
result ^= FALSE;
goto out;
}
}
else {
if (!ap_expr_eval(ctx, e1)) {
if (!ap_expr_eval_cond(ctx, e1)) {
result ^= FALSE;
goto out;
}
@ -889,7 +1209,7 @@ AP_DECLARE(int) ap_expr_exec_ctx(ap_expr_eval_ctx_t *ctx)
}
}
else {
rc = ap_expr_eval(ctx, ctx->info->root_node);
rc = ap_expr_eval_cond(ctx, ctx->info->root_node);
if (*ctx->err != NULL) {
ap_log_rerror(LOG_MARK(ctx->info), APLOG_ERR, 0, ctx->r,
APLOGNO(03299)