I was debugging this issue for some time now and found that NEsper incorrectly handles WHERE regexp '' statement. So if I have
SELECT *
FROM MyType
WHERE PropertyA regexp 'some valid regexp'
Then NEsper does some string formatting and validation with 'some valid regexp' and removes some important (and valid) symbols from regexp.
Proposed changes. (I don't really know the code but I tried to fix this small issue for me, this is not necessarily the best approach):
-------
1. com.espertech.esper.epl.expression.ExprRegexpNode
public object Evaluate(EventBean[] eventsPerStream, bool isNewData, ExprEvaluatorContext exprEvaluatorContext)
{...}
Two occurances of
_pattern = new Regex(String.Format("^{0}$", patternText));
change to
_pattern = new Regex(patternText);
Reason: I think it is up to the user how regexp is constructed, this shall not be part of a framework.
-------
2. com.espertech.esper.epl.parse.ASTConstantHelper
public static Object Parse(ITree node){...}
From
case EsperEPL2GrammarParser.STRING_TYPE:
{
return StringValue.ParseString(node.Text, requireUnescape);
}
To
case EsperEPL2GrammarParser.STRING_TYPE:
{
bool requireUnescape = true;
if (node.Parent != null)
{
if (!String.IsNullOrEmpty(node.Parent.Text))
{
if (node.Parent.Text == "regexp")
{
requireUnescape = false;
}
}
}
return StringValue.ParseString(node.Text, requireUnescape);
}
Reason: requireUnescape for all strings, but skip regexp as this brakes valid regexp and removes some valid symbols from it.
-------
3. com.espertech.esper.type.StringValue
public static String ParseString(String value)
{...}
From
public static String ParseString(String value)
{
if ((value.StartsWith("\"")) & (value.EndsWith("\"")) || (value.StartsWith("'")) & (value.EndsWith("'")))
{
if (value.Length > 1)
{
if (value.IndexOf('
') != -1)
{
return Unescape(value.Substring(1, value.Length - 2));
}
return value.Substring(1, value.Length - 2);
}
}
throw new ArgumentException("String value of '" + value + "' cannot be parsed");
}
To
public static String ParseString(String value, bool requireUnescape = true)
{
if ((value.StartsWith("\"")) & (value.EndsWith("\"")) || (value.StartsWith("'")) & (value.EndsWith("'")))
{
if (value.Length > 1)
{
if (requireUnescape)
{
if (value.IndexOf('
') != -1)
{
return Unescape(value.Substring(1, value.Length - 2));
}
}
return value.Substring(1, value.Length - 2);
}
}
throw new ArgumentException("String value of '" + value + "' cannot be parsed");
}
Reason: unescape all strings, but the regexp value.
I was debugging this issue for some time now and found that NEsper incorrectly handles WHERE regexp '' statement. So if I have
SELECT *
FROM MyType
WHERE PropertyA regexp 'some valid regexp'
Then NEsper does some string formatting and validation with 'some valid regexp' and removes some important (and valid) symbols from regexp.
Proposed changes. (I don't really know the code but I tried to fix this small issue for me, this is not necessarily the best approach):
-------
1. com.espertech.esper.epl.expression.ExprRegexpNode
public object Evaluate(EventBean[] eventsPerStream, bool isNewData, ExprEvaluatorContext exprEvaluatorContext)
{...}Two occurances of
_pattern = new Regex(String.Format("^{0}$", patternText));
change to
_pattern = new Regex(patternText);
Reason: I think it is up to the user how regexp is constructed, this shall not be part of a framework.
-------
2. com.espertech.esper.epl.parse.ASTConstantHelper
public static Object Parse(ITree node){...}
From
{ return StringValue.ParseString(node.Text, requireUnescape); }case EsperEPL2GrammarParser.STRING_TYPE:
To
case EsperEPL2GrammarParser.STRING_TYPE:
{
bool requireUnescape = true;
if (node.Parent != null)
{ requireUnescape = false; }{
if (!String.IsNullOrEmpty(node.Parent.Text))
{
if (node.Parent.Text == "regexp")
}
}
return StringValue.ParseString(node.Text, requireUnescape);
}
Reason: requireUnescape for all strings, but skip regexp as this brakes valid regexp and removes some valid symbols from it.
-------
3. com.espertech.esper.type.StringValue
public static String ParseString(String value)
{...}From
{ return Unescape(value.Substring(1, value.Length - 2)); }public static String ParseString(String value)
{
if ((value.StartsWith("\"")) & (value.EndsWith("\"")) || (value.StartsWith("'")) & (value.EndsWith("'")))
{
if (value.Length > 1)
{
if (value.IndexOf('
') != -1)
return value.Substring(1, value.Length - 2);
}
}
throw new ArgumentException("String value of '" + value + "' cannot be parsed");
}
To
{ return Unescape(value.Substring(1, value.Length - 2)); }public static String ParseString(String value, bool requireUnescape = true)
{
if ((value.StartsWith("\"")) & (value.EndsWith("\"")) || (value.StartsWith("'")) & (value.EndsWith("'")))
{
if (value.Length > 1)
{
if (requireUnescape)
{
if (value.IndexOf('
') != -1)
}
return value.Substring(1, value.Length - 2);
}
}
throw new ArgumentException("String value of '" + value + "' cannot be parsed");
}
Reason: unescape all strings, but the regexp value.