34 #ifdef PCRE_CONFIG_UTF8
35 RegExp::UTF8SupportState RegExp::utf8Support = RegExp::Unknown;
38 RegExp::RegExp(
const UString &p,
int f)
39 : pat(p), flgs(f), m_notEmpty(false), valid(true), buffer(0), originalPos(0)
42 #ifdef PCRE_CONFIG_UTF8
43 if (utf8Support == Unknown) {
45 pcre_config(PCRE_CONFIG_UTF8, (
void*)&supported);
46 utf8Support = supported ? Supported : Unsupported;
58 const char*
const nil =
"\\x00";
61 for (
int i = 0; i < p.
size(); ++i) {
71 for (j = 0; j < 4; ++j) {
72 if (i + 1 < p.
size() && Lexer::isHexDigit(p[i + 1].unicode())) {
73 u = (u << 4) + Lexer::convertHex(p[i + 1].unicode());
78 fprintf(stderr,
"KJS: saw %d digit \\u sequence.\n", j);
130 #ifdef HAVE_PCREPOSIX
132 const char *perrormsg;
135 if (flgs & IgnoreCase)
136 pcreflags |= PCRE_CASELESS;
138 if (flgs & Multiline)
139 pcreflags |= PCRE_MULTILINE;
141 #ifdef PCRE_CONFIG_UTF8
142 if (utf8Support == Supported)
143 pcreflags |= (PCRE_UTF8 | PCRE_NO_UTF8_CHECK);
148 prepareMatch(intern);
150 pcregex = pcre_compile(buffer, pcreflags,
151 &perrormsg, &errorOffset, NULL);
155 fprintf(stderr,
"KJS: pcre_compile() failed with '%s'\n", perrormsg);
161 #ifdef PCRE_INFO_CAPTURECOUNT
163 int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns);
172 regflags |= REG_EXTENDED;
175 if ( f & IgnoreCase )
176 regflags |= REG_ICASE;
184 int errorCode = regcomp(&preg, intern.
ascii(), regflags);
185 if (errorCode != 0) {
187 char errorMessage[80];
188 regerror(errorCode, &preg, errorMessage,
sizeof errorMessage);
189 fprintf(stderr,
"KJS: regcomp failed with '%s'\n", errorMessage);
199 #ifdef HAVE_PCREPOSIX
208 void RegExp::prepareUtf8(
const UString& s)
211 const int length = s.
size();
212 buffer =
new char[length * 3 + 1];
216 originalPos =
new int[length * 3 + 2];
222 int *posOut = originalPos;
224 for (
int i = 0; i != length; ++i) {
225 unsigned short c = d[i].
unicode();
231 }
else if (c < 0x800) {
232 *p++ = (char)((c >> 6) | 0xC0);
233 *p++ = (char)((c | 0x80) & 0xBF);
236 *p++ = (char)((c >> 12) | 0xE0);
237 *p++ = (char)(((c >> 6) | 0x80) & 0xBF);
238 *p++ = (char)((c | 0x80) & 0xBF);
242 while (sequenceLen > 0) {
249 bufferSize = p - buffer;
255 *(posOut+1) = length+1;
258 void RegExp::prepareASCII (
const UString& s)
266 buffer =
new char[truncated.size() + 1];
267 memcpy(buffer, truncated.c_str(), truncated.size());
268 buffer[truncated.size()] =
'\0';
269 bufferSize = truncated.size();
272 void RegExp::prepareMatch(
const UString &s)
274 delete[] originalPos;
276 #ifdef PCRE_CONFIG_UTF8
277 if (utf8Support == Supported)
288 void RegExp::doneMatch()
290 delete[] originalPos; originalPos = 0;
291 delete[] buffer; buffer = 0;
294 UString RegExp::match(
const UString &s,
int i,
int *pos,
int **ovector)
297 assert(s.
data() == originalS.data());
312 #ifdef HAVE_PCREPOSIX
313 int ovecsize = (nrSubPatterns+1)*3;
314 if (ovector) *ovector =
new int[ovecsize];
321 #ifdef PCRE_CONFIG_UTF8
322 if (utf8Support == Supported) {
324 while (originalPos[startPos] < i)
328 while (originalPos[nextPos] < (i + 1))
338 #ifdef PCRE_CONFIG_UTF8
339 utf8Support == Supported ? PCRE_NO_UTF8_CHECK :
342 if (pcre_exec(pcregex, NULL, buffer, bufferSize, startPos,
343 m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED | baseFlags) : baseFlags,
344 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
347 if ((flgs & Global) && m_notEmpty && ovector)
353 fprintf(stderr,
"No match after m_notEmpty. +1 and keep going.\n");
356 if (pcre_exec(pcregex, NULL, buffer, bufferSize, nextPos, baseFlags,
357 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
366 if (ovector && originalPos) {
367 for (
unsigned c = 0; c < 2 * (nrSubPatterns + 1); ++c) {
368 if ((*ovector)[c] != -1)
369 (*ovector)[c] = originalPos[(*ovector)[c]];
376 const uint maxMatch = 10;
377 regmatch_t rmatch[maxMatch];
379 char *str = strdup(s.
ascii());
380 if (regexec(&preg, str + i, maxMatch, rmatch, 0)) {
387 *pos = rmatch[0].rm_so + i;
388 return s.
substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so);
393 for (uint j = 0; j < maxMatch && rmatch[j].rm_so >= 0 ; j++) {
397 if (m_notEmpty && rmatch[j].rm_so == rmatch[j].rm_eo)
401 if (nrSubPatterns == 0) nrSubPatterns = 1;
403 int ovecsize = (nrSubPatterns)*3;
404 *ovector =
new int[ovecsize];
405 for (uint j = 0; j < nrSubPatterns; j++) {
406 (*ovector)[2*j] = rmatch[j].rm_so + i;
407 (*ovector)[2*j+1] = rmatch[j].rm_eo + i;
411 *pos = (*ovector)[0];
412 if ( *pos == (*ovector)[1] && (flgs & Global) )
417 return s.
substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
421 bool RegExp::test(
const UString &s,
int)
423 #ifdef HAVE_PCREPOSIX
428 pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0,
429 0, ovector, 300) == PCRE_ERROR_NOMATCH)
436 char *str = strdup(s.
ascii());
437 int r = regexec(&preg, str, 0, 0, 0);