188 lines
5.5 KiB
C++
188 lines
5.5 KiB
C++
// Copyright 2008 The RE2 Authors. All Rights Reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Exhaustive testing of regular expression matching.
|
|
|
|
// Each test picks an alphabet (e.g., "abc"), a maximum string length,
|
|
// a maximum regular expression length, and a maximum number of letters
|
|
// that can appear in the regular expression. Given these parameters,
|
|
// it tries every possible regular expression and string, verifying that
|
|
// the NFA, DFA, and a trivial backtracking implementation agree about
|
|
// the location of the match.
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#ifndef LOGGING
|
|
#define LOGGING 0
|
|
#endif
|
|
|
|
#include "util/test.h"
|
|
#include "re2/testing/exhaustive_tester.h"
|
|
#include "re2/testing/tester.h"
|
|
|
|
DEFINE_bool(show_regexps, false, "show regexps during testing");
|
|
|
|
DEFINE_int32(max_bad_regexp_inputs, 1,
|
|
"Stop testing a regular expression after finding this many "
|
|
"strings that break it.");
|
|
|
|
// Compiled in debug mode, the usual tests run for over an hour.
|
|
// Have to cut it down to make the unit test machines happy.
|
|
DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
|
|
|
|
namespace re2 {
|
|
|
|
static char* escape(const StringPiece& sp) {
|
|
static char buf[512];
|
|
char* p = buf;
|
|
*p++ = '\"';
|
|
for (int i = 0; i < sp.size(); i++) {
|
|
if(p+5 >= buf+sizeof buf)
|
|
LOG(FATAL) << "ExhaustiveTester escape: too long";
|
|
if(sp[i] == '\\' || sp[i] == '\"') {
|
|
*p++ = '\\';
|
|
*p++ = sp[i];
|
|
} else if(sp[i] == '\n') {
|
|
*p++ = '\\';
|
|
*p++ = 'n';
|
|
} else {
|
|
*p++ = sp[i];
|
|
}
|
|
}
|
|
*p++ = '\"';
|
|
*p = '\0';
|
|
return buf;
|
|
}
|
|
|
|
static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
|
|
if (!re.Match(input, 0, input.size(), anchor, m, n)) {
|
|
printf("-");
|
|
return;
|
|
}
|
|
for (int i = 0; i < n; i++) {
|
|
if (i > 0)
|
|
printf(" ");
|
|
if (m[i].begin() == NULL)
|
|
printf("-");
|
|
else
|
|
printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
|
|
}
|
|
}
|
|
|
|
// Processes a single generated regexp.
|
|
// Compiles it using Regexp interface and PCRE, and then
|
|
// checks that NFA, DFA, and PCRE all return the same results.
|
|
void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
|
|
regexps_++;
|
|
string regexp = const_regexp;
|
|
if (!topwrapper_.empty())
|
|
regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
|
|
|
|
if (FLAGS_show_regexps) {
|
|
printf("\r%s", regexp.c_str());
|
|
fflush(stdout);
|
|
}
|
|
|
|
if (LOGGING) {
|
|
// Write out test cases and answers for use in testing
|
|
// other implementations, such as Go's regexp package.
|
|
if (randomstrings_)
|
|
LOG(ERROR) << "Cannot log with random strings.";
|
|
if (regexps_ == 1) { // first
|
|
printf("strings\n");
|
|
strgen_.Reset();
|
|
while (strgen_.HasNext())
|
|
printf("%s\n", escape(strgen_.Next()));
|
|
printf("regexps\n");
|
|
}
|
|
printf("%s\n", escape(regexp));
|
|
|
|
RE2 re(regexp);
|
|
RE2::Options longest;
|
|
longest.set_longest_match(true);
|
|
RE2 relongest(regexp, longest);
|
|
int ngroup = re.NumberOfCapturingGroups()+1;
|
|
StringPiece* group = new StringPiece[ngroup];
|
|
|
|
strgen_.Reset();
|
|
while (strgen_.HasNext()) {
|
|
StringPiece input = strgen_.Next();
|
|
PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
|
|
printf(";");
|
|
PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
|
|
printf(";");
|
|
PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
|
|
printf(";");
|
|
PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
|
|
printf("\n");
|
|
}
|
|
delete[] group;
|
|
return;
|
|
}
|
|
|
|
Tester tester(regexp);
|
|
if (tester.error())
|
|
return;
|
|
|
|
strgen_.Reset();
|
|
strgen_.GenerateNULL();
|
|
if (randomstrings_)
|
|
strgen_.Random(stringseed_, stringcount_);
|
|
int bad_inputs = 0;
|
|
while (strgen_.HasNext()) {
|
|
tests_++;
|
|
if (!tester.TestInput(strgen_.Next())) {
|
|
failures_++;
|
|
if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Runs an exhaustive test on the given parameters.
|
|
void ExhaustiveTest(int maxatoms, int maxops,
|
|
const vector<string>& alphabet,
|
|
const vector<string>& ops,
|
|
int maxstrlen, const vector<string>& stralphabet,
|
|
const string& wrapper,
|
|
const string& topwrapper) {
|
|
if (DEBUG_MODE && FLAGS_quick_debug_mode) {
|
|
if (maxatoms > 1)
|
|
maxatoms--;
|
|
if (maxops > 1)
|
|
maxops--;
|
|
if (maxstrlen > 1)
|
|
maxstrlen--;
|
|
}
|
|
ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
|
|
maxstrlen, stralphabet, wrapper,
|
|
topwrapper);
|
|
t.Generate();
|
|
if (!LOGGING) {
|
|
printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
|
|
t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
|
|
}
|
|
EXPECT_EQ(0, t.failures());
|
|
}
|
|
|
|
// Runs an exhaustive test using the given parameters and
|
|
// the basic egrep operators.
|
|
void EgrepTest(int maxatoms, int maxops, const string& alphabet,
|
|
int maxstrlen, const string& stralphabet,
|
|
const string& wrapper) {
|
|
const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
|
|
|
|
for (int i = 0; i < arraysize(tops); i++) {
|
|
ExhaustiveTest(maxatoms, maxops,
|
|
Split("", alphabet),
|
|
RegexpGenerator::EgrepOps(),
|
|
maxstrlen,
|
|
Split("", stralphabet),
|
|
wrapper,
|
|
tops[i]);
|
|
}
|
|
}
|
|
|
|
} // namespace re2
|