ます’s Blog - どうでもいい記事100選

どうでもいい記事100選

マルチバイト対応正規表現関数(mb_ereg系)でUTF-8の文字列が正しく処理できない件について(続き)

昨日のですが。
論より証拠、という事で差分を晒してみたいと思います。
まずは、Ruby1.6.8とRuby1.8.5-p2との差分。

--- ruby-1.6.8/regex.c	2002-11-19 22:36:29.000000000 +0900
+++ ruby-1.8.5-p2/regex.c	2006-08-07 12:43:42.000000000 +0900
@@ -12,7 +12,7 @@
    Library General Public License for more details.
 
    You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   License along with the GNU C Library; see the file LGPL.  If not,
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
@@ -51,7 +51,7 @@
 # include <sys/types.h>
 #endif
 
-#ifndef __STDC__
+#if !defined(__STDC__) && !defined(_MSC_VER)
 # define volatile
 #endif
 
@@ -69,19 +69,11 @@
 extern int rb_trap_pending;
 void rb_trap_exec _((void));
 
-# define CHECK_INTS if (!rb_prohibit_interrupt) {\
-    if (rb_trap_pending) rb_trap_exec();\
-}
-
-#define xmalloc ruby_xmalloc
-#define xcalloc ruby_xcalloc
-#define xrealloc ruby_xrealloc
-#define xfree ruby_xfree
-
-void *xmalloc _((size_t));
-void *xcalloc _((size_t,size_t));
-void *xrealloc _((void*,size_t));
-void xfree _((void*));
+# define CHECK_INTS do {\
+    if (!rb_prohibit_interrupt) {\
+	if (rb_trap_pending) rb_trap_exec();\
+    }\
+} while (0)
 #endif
 
 /* Make alloca work the best possible way.  */
@@ -92,16 +84,19 @@
 #  endif
 # endif /* atarist */
 #else
-# if defined(HAVE_ALLOCA_H)
+# ifdef HAVE_ALLOCA_H
 #  include <alloca.h>
-# elif !defined(alloca)
-char *alloca();
-# endif
-#endif /* __GNUC__ */
+# else
+#  ifdef _AIX
+ #pragma alloca
+#  else
+#   ifndef alloca /* predefined by HP cc +Olibcalls */
+void *alloca ();
+#   endif
+#  endif /* AIX */
+# endif /* HAVE_ALLOCA_H */
 
-#ifdef _AIX
-#pragma alloca
-#endif
+#endif /* __GNUC__ */
 
 #ifdef HAVE_STRING_H
 # include <string.h>
@@ -193,6 +188,12 @@
 
 #ifdef RUBY
 #include "util.h"
+void rb_warn _((const char*, ...));
+# define re_warning(x) rb_warn(x)
+#endif
+
+#ifndef re_warning
+# define re_warning(x)
 #endif
 
 static void
@@ -429,7 +430,6 @@
     return 0;
 }
 
-
 /* Macros for re_compile_pattern, which is found below these definitions.  */
 
 #define TRANSLATE_P() ((options&RE_OPTION_IGNORECASE) && translate)
@@ -478,6 +478,19 @@
 #define WC2MBC1ST(c)							\
  ((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
 
+typedef unsigned int (*mbc_startpos_func_t) _((const char *string, unsigned int pos));
+
+static unsigned int asc_startpos _((const char *string, unsigned int pos));
+static unsigned int euc_startpos _((const char *string, unsigned int pos));
+static unsigned int sjis_startpos _((const char *string, unsigned int pos));
+static unsigned int utf8_startpos _((const char *string, unsigned int pos));
+
+static const mbc_startpos_func_t mbc_startpos_func[4] = {
+  asc_startpos, euc_startpos, sjis_startpos, utf8_startpos
+};
+
+#define mbc_startpos(start, pos) (*mbc_startpos_func[current_mbctype])((start), (pos))
+
 static unsigned int
 utf8_firstbyte(c)
      unsigned long c;
@@ -547,7 +560,7 @@
    reset the pointers that pointed into the old allocation to point to
    the correct places in the new allocation.  If extending the buffer
    results in it being larger than 1 << 16, then flag memory exhausted.  */
-#define EXTEND_BUFFER							\
+#define EXTEND_BUFFER						\
   do { char *old_buffer = bufp->buffer;					\
     if (bufp->allocated == (1L<<16)) goto too_big;			\
     bufp->allocated *= 2;						\
@@ -694,7 +707,18 @@
 }
 
 static int
-is_in_list(c, b)
+is_in_list_sbc(c, b)
+    unsigned long c;
+    const unsigned char *b;
+{
+  unsigned short size;
+
+  size = *b++;
+  return ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH);
+}
+  
+static int
+is_in_list_mbc(c, b)
     unsigned long c;
     const unsigned char *b;
 {
@@ -702,9 +726,6 @@
   unsigned short i, j;
 
   size = *b++;
-  if ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) {
-    return 1;
-  }
   b += size + 2;
   size = EXTRACT_UNSIGNED(&b[-2]);
   if (size == 0) return 0;
@@ -719,9 +740,18 @@
   }
   if (i < size && EXTRACT_MBC(&b[i*8]) <= c)
     return 1;
+
   return 0;
 }
 
+static int
+is_in_list(c, b)
+    unsigned long c;
+    const unsigned char *b;
+{
+  return is_in_list_sbc(c, b) || (current_mbctype ? is_in_list_mbc(c, b) : 0);
+}
+
 static void
 print_partial_compiled_pattern(start, end)
     unsigned char *start;
@@ -828,7 +858,7 @@
 	  unsigned bit;
 	  unsigned char map_byte = p[c];
 
-	  putchar ('/');
+	  putchar('/');
 
 	  for (bit = 0; bit < BYTEWIDTH; bit++)
 	    if (map_byte & (1 << bit))
@@ -836,10 +866,10 @@
 	}
 	p += mcnt;
 	mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
-	printf("/");
+	putchar('/');
 	while (mcnt--) {
 	  print_mbc(EXTRACT_MBC_AND_INCR(p));
-	  printf("-");
+	  putchar('-');
 	  print_mbc(EXTRACT_MBC_AND_INCR(p));
 	}
 	break;
@@ -984,8 +1014,8 @@
 {
   int mcnt;
   int max = 0;
-  char *p = start;
-  char *pend = end;
+  unsigned char *p = start;
+  unsigned char *pend = end;
   char *must = 0;
 
   if (start == NULL) return 0;
@@ -1011,6 +1041,7 @@
       break;
 
     case duplicate:
+    case option_set:
       p++;
       break;
 
@@ -1036,7 +1067,6 @@
     case push_dummy_failure:
     case start_paren:
     case stop_paren:
-    case option_set:
       break;
 
     case charset:
@@ -1053,7 +1083,7 @@
       EXTRACT_NUMBER_AND_INCR(mcnt, p);
       if (mcnt > 0) p += mcnt;
       if ((enum regexpcode)p[-3] == jump) {
-       p -= 2;
+	p -= 2;
 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
 	if (mcnt > 0) p += mcnt;
       }
@@ -1135,7 +1165,7 @@
     PATFETCH_RAW(c);
     *pp = p;
     if (c == '\\') {
-      return read_special(p, pend, pp) | 0x80;
+      return read_special(--p, pend, pp) | 0x80;
     }
     else if (c == -1) return ~0;
     else {
@@ -1149,12 +1179,13 @@
     PATFETCH_RAW(c);
     *pp = p;
     if (c == '\\') {
-      c = read_special(p, pend, pp);
+      c = read_special(--p, pend, pp);
     }
     else if (c == '?') return 0177;
     else if (c == -1) return ~0;
     return c & 0x9f;
   default:
+    *pp = p + 1;
     return read_backslash(c);
   }
 
@@ -1272,13 +1303,9 @@
 
   if (bufp->allocated == 0) {
     bufp->allocated = INIT_BUF_SIZE;
-    if (bufp->buffer)
-      /* EXTEND_BUFFER loses when bufp->allocated is 0.  */
-      bufp->buffer = (char*)xrealloc(bufp->buffer, INIT_BUF_SIZE);
-    else
-      /* Caller did not allocate a buffer.  Do it for them.  */
-      bufp->buffer = (char*)xmalloc(INIT_BUF_SIZE);
-    if (!bufp->buffer) goto memory_exhausted;
+    /* EXTEND_BUFFER loses when bufp->allocated is 0.  */
+    bufp->buffer = (char*)xrealloc(bufp->buffer, INIT_BUF_SIZE);
+    if (!bufp->buffer) goto memory_exhausted; /* this not happen */
     begalt = b = bufp->buffer;
   }
 
@@ -1430,8 +1457,7 @@
 	int size;
 	unsigned last = (unsigned)-1;
 
-	if ((size = EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH]))
-	    || current_mbctype) {
+	if ((size = EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])) || current_mbctype) {
 	  /* Ensure the space is enough to hold another interval
 	     of multi-byte chars in charset(_not)?.  */
 	  size = (1 << BYTEWIDTH) / BYTEWIDTH + 2 + size*8 + 8;
@@ -1442,12 +1468,13 @@
 	if (range && had_char_class) {
 	  FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as an end value of range");
 	}
-	PATFETCH(c);
+	PATFETCH_RAW(c);
 
 	if (c == ']') {
 	  if (p == p0 + 1) {
 	    if (p == pend)
 	      FREE_AND_RETURN(stackb, "invalid regular expression; empty character class");
+            re_warning("character class has `]' without escape");
 	  }
 	  else 
 	    /* Stop if this isn't merely a ] inside a bracket
@@ -1465,6 +1492,13 @@
 	}
 	had_char_class = 0;
 
+	if (c == '-' && ((p != p0 + 1 && *p != ']') ||
+                         (p[0] == '-' && p[1] != ']') ||
+                         range))
+          re_warning("character class has `-' without escape");
+        if (c == '[' && *p != ':')
+          re_warning("character class has `[' without escape");
+
 	/* \ escapes characters when inside [...].  */
 	if (c == '\\') {
 	  PATFETCH_RAW(c);
@@ -1547,7 +1581,7 @@
 	  case 'C':
 	  case 'c':
 	    {
-	      char *pp;
+	      const char *pp;
 
 	      --p;
 	      c = read_special(p, pend, &pp);
@@ -1566,32 +1600,7 @@
 	    break;
 	  }
 	}
-
-	/* Get a range.  */
-	if (range) {
-	  if (last > c)
-	    goto invalid_pattern;
-
-	  range = 0;
-	  if (had_mbchar == 0) {
-	    for (;last<=c;last++)
-	      SET_LIST_BIT(last);
-	  }
-	  else if (had_mbchar == 2) {
-	    set_list_bits(last, c, b);
-	  }
-	  else {
-	    /* restriction: range between sbc and mbc */
-	    goto invalid_pattern;
-	  }
-	}
-	else if (p[0] == '-' && p[1] != ']') {
-	  last = c;
-	  PATFETCH(c1);
-	  range = 1;
-	  goto range_retry;
-	}
-	else if (c == '[' && *p == ':') {
+        else if (c == '[' && *p == ':') { /* [:...:] */
 	  /* Leave room for the null.  */
 	  char str[CHAR_CLASS_MAX_LENGTH + 1];
 
@@ -1603,7 +1612,7 @@
 	    FREE_AND_RETURN(stackb, "invalid regular expression; re can't end '[[:'");
 
 	  for (;;) {
-	    PATFETCH (c);
+	    PATFETCH_RAW(c);
 	    if (c == ':' || c == ']' || p == pend
 		|| c1 == CHAR_CLASS_MAX_LENGTH)
 	      break;
@@ -1611,9 +1620,9 @@
 	  }
 	  str[c1] = '\0';
 
-	  /* If isn't a word bracketed by `[:' and:`]':
-	     undo the ending character, the letters, and leave 
-	     the leading `:' and `[' (but set bits for them).  */
+	  /* If isn't a word bracketed by `[:' and `:]':
+	     undo the ending character, the letters, and
+	     the leading `:' and `['.  */
 	  if (c == ':' && *p == ']') {
 	    int ch;
 	    char is_alnum = STREQ(str, "alnum");
@@ -1657,23 +1666,57 @@
 		SET_LIST_BIT(ch);
 	    }
 	    had_char_class = 1;
+            continue;
 	  }
 	  else {
-	    c1++;
+	    c1 += 2;
 	    while (c1--)    
 	      PATUNFETCH;
-	    SET_LIST_BIT(TRANSLATE_P()?translate['[']:'[');
-	    SET_LIST_BIT(TRANSLATE_P()?translate[':']:':');
-	    had_char_class = 0;
-	    last = ':';
+            re_warning("character class has `[' without escape");
+            c = '[';
 	  }
 	}
-	else if (had_mbchar == 0 && (!current_mbctype || !had_num_literal)) {
-	  SET_LIST_BIT(c);
-	  had_num_literal = 0;
+
+	/* Get a range.  */
+	if (range) {
+	  if (last > c)
+	    goto invalid_pattern;
+
+	  range = 0;
+	  if (had_mbchar == 0) {
+	    if (TRANSLATE_P()) {
+	      for (;last<=c;last++) 
+		SET_LIST_BIT(translate[last]);
+	    }
+	    else {
+	      for (;last<=c;last++) 
+		SET_LIST_BIT(last);
+	    }
+	  }
+	  else if (had_mbchar == 2) {
+	    set_list_bits(last, c, b);
+	  }
+	  else {
+	    /* restriction: range between sbc and mbc */
+	    goto invalid_pattern;
+	  }
+	}
+	else if (p[0] == '-' && p[1] != ']') {
+	  last = c;
+	  PATFETCH_RAW(c1);
+	  range = 1;
+	  goto range_retry;
+	}
+	else {
+	  if (TRANSLATE_P() && c < 0x100) c = (unsigned char)translate[c];
+	  if (had_mbchar == 0 && (!current_mbctype || !had_num_literal)) {
+	    SET_LIST_BIT(c);
+	    had_num_literal = 0;
+	  }
+	  else {
+	    set_list_bits(c, c, b);
+	  }
 	}
-	else
-	  set_list_bits(c, c, b);
 	had_mbchar = 0;
       }
 
@@ -1682,9 +1725,10 @@
       while ((int)b[-1] > 0 && b[b[-1] - 1] == 0) 
 	b[-1]--; 
       if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
-	memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
+	memmove(&b[(unsigned char)b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
 		2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
-      b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[b[-1]])*8;
+      b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(unsigned char)b[-1]])*8;
+      had_num_literal = 0;
       break;
 
     case '(':
@@ -1693,155 +1737,143 @@
 	int push_option = 0;
 	int casefold = 0;
 
-      PATFETCH(c);
-      if (c == '?') {
-	int negative = 0;
-
-	PATFETCH_RAW(c);
-	switch (c) {
-	case 'x': case 'p': case 'm': case 'i': case '-':
-	  for (;;) {
-	    switch (c) {
-	    case '-':
-	      negative = 1;
-	      break;
-
-	    case ':':
-	    case ')':
-	      break;
-
-	    case 'x':
-	      if (negative)
-		options &= ~RE_OPTION_EXTENDED;
-	      else
-		options |= RE_OPTION_EXTENDED;
-	      break;
+	PATFETCH(c);
+	if (c == '?') {
+	  int negative = 0;
 
-	    case 'p':
-	      if (negative) {
-		if ((options&RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) {
-		  options &= ~RE_OPTION_POSIXLINE;
+	  PATFETCH_RAW(c);
+	  switch (c) {
+	  case 'x': case 'm': case 'i': case '-':
+	    for (;;) {
+	      switch (c) {
+	      case '-':
+		negative = 1;
+		break;
+
+	      case ':':
+	      case ')':
+		break;
+
+	      case 'x':
+		if (negative)
+		  options &= ~RE_OPTION_EXTENDED;
+		else
+		  options |= RE_OPTION_EXTENDED;
+		break;
+
+	      case 'm':
+		if (negative) {
+		  if (options&RE_OPTION_MULTILINE) {
+		    options &= ~RE_OPTION_MULTILINE;
+		  }
 		}
-	      }
-	      else if ((options&RE_OPTION_POSIXLINE) != RE_OPTION_POSIXLINE) {
-		options |= RE_OPTION_POSIXLINE;
-	      }
-	      push_option = 1;
-	      break;
-
-	    case 'm':
-	      if (negative) {
-		if (options&RE_OPTION_MULTILINE) {
-		  options &= ~RE_OPTION_MULTILINE;
+		else if (!(options&RE_OPTION_MULTILINE)) {
+		  options |= RE_OPTION_MULTILINE;
 		}
-	      }
-	      else if (!(options&RE_OPTION_MULTILINE)) {
-		options |= RE_OPTION_MULTILINE;
-	      }
-	      push_option = 1;
-	      break;
+		push_option = 1;
+		break;
 
-	    case 'i':
-	      if (negative) {
-		if (options&RE_OPTION_IGNORECASE) {
-		  options &= ~RE_OPTION_IGNORECASE;
+	      case 'i':
+		if (negative) {
+		  if (options&RE_OPTION_IGNORECASE) {
+		    options &= ~RE_OPTION_IGNORECASE;
+		  }
+		}
+		else if (!(options&RE_OPTION_IGNORECASE)) {
+		  options |= RE_OPTION_IGNORECASE;
 		}
-	      }
-	      else if (!(options&RE_OPTION_IGNORECASE)) {
-		options |= RE_OPTION_IGNORECASE;
-	      }
 		casefold = 1;
-	      break;
+		break;
 
-	    default:
-	      FREE_AND_RETURN(stackb, "undefined (?...) inline option");
-	    }
-	    if (c == ')') {
-	      c = '#';	/* read whole in-line options */
-	      break;
+	      default:
+		FREE_AND_RETURN(stackb, "undefined (?...) inline option");
+	      }
+	      if (c == ')') {
+		c = '#';	/* read whole in-line options */
+		break;
+	      }
+	      if (c == ':') break;
+	      PATFETCH_RAW(c);
 	    }
-	    if (c == ':') break;
-	    PATFETCH_RAW(c);
-	  }
-	  break;
+	    break;
 
-	case '#':
-	  for (;;) {
-	    PATFETCH(c);
-	    if (c == ')') break;
-	  }
-	  c = '#';
-	  break;
+	  case '#':
+	    for (;;) {
+	      PATFETCH(c);
+	      if (c == ')') break;
+	    }
+	    c = '#';
+	    break;
 
-	case ':':
-	case '=':
-	case '!':
-	case '>':
-	  break;
+	  case ':':
+	  case '=':
+	  case '!':
+	  case '>':
+	    break;
 
-	default:
-	  FREE_AND_RETURN(stackb, "undefined (?...) sequence");
-	}
+	  default:
+	    FREE_AND_RETURN(stackb, "undefined (?...) sequence");
+	  }
 	}
 	else {
 	  PATUNFETCH;
 	  c = '(';
 	}
 	if (c == '#') {
-	if (push_option) {
-	  BUFPUSH(option_set);
-	  BUFPUSH(options);
-	}
+	  if (push_option) {
+	    BUFPUSH(option_set);
+	    BUFPUSH(options);
+	  }
 	  if (casefold) {
 	    if (options & RE_OPTION_IGNORECASE)
 	      BUFPUSH(casefold_on);
 	    else
 	      BUFPUSH(casefold_off);
-      }
+	  }
 	  break;
-      }
-      if (stackp+8 >= stacke) {
-	DOUBLE_STACK(int);
-      }
-
-      /* Laststart should point to the start_memory that we are about
-	 to push (unless the pattern has RE_NREGS or more ('s).  */
-      /* obsolete: now RE_NREGS is just a default register size. */
-      *stackp++ = b - bufp->buffer;    
-      *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
-      *stackp++ = begalt - bufp->buffer;
-      switch (c) {
-      case '(':
-	BUFPUSH(start_memory);
-	BUFPUSH(regnum);
-	*stackp++ = regnum++;
-	*stackp++ = b - bufp->buffer;
-	BUFPUSH(0);
-	/* too many ()'s to fit in a byte. (max 254) */
-	if (regnum >= RE_REG_MAX) goto too_big;
-	break;
+	}
+	if (stackp+8 >= stacke) {
+	  DOUBLE_STACK(int);
+	}
 
-      case '=':
-      case '!':
-      case '>':
-	BUFPUSH(start_nowidth);
-	*stackp++ = b - bufp->buffer;
-	BUFPUSH(0);	/* temporary value */
-	BUFPUSH(0);
-	if (c != '!') break;
+	/* Laststart should point to the start_memory that we are about
+	   to push (unless the pattern has RE_NREGS or more ('s).  */
+	/* obsolete: now RE_NREGS is just a default register size. */
+	*stackp++ = b - bufp->buffer;    
+	*stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+	*stackp++ = begalt - bufp->buffer;
+	switch (c) {
+	case '(':
+	  BUFPUSH(start_memory);
+	  BUFPUSH(regnum);
+	  *stackp++ = regnum++;
+	  *stackp++ = b - bufp->buffer;
+	  BUFPUSH(0);
+	  /* too many ()'s to fit in a byte. (max 254) */
+	  if (regnum >= RE_REG_MAX) goto too_big;
+	  break;
 
-	BUFPUSH(on_failure_jump);
-	*stackp++ = b - bufp->buffer;
-	BUFPUSH(0);	/* temporary value */
-	BUFPUSH(0);
-	break;
+	case '=':
+	case '!':
+	case '>':
+	  BUFPUSH(start_nowidth);
+	  *stackp++ = b - bufp->buffer;
+	  BUFPUSH(0);	/* temporary value */
+	  BUFPUSH(0);
+	  if (c != '!') break;
+
+	  BUFPUSH(on_failure_jump);
+	  *stackp++ = b - bufp->buffer;
+	  BUFPUSH(0);	/* temporary value */
+	  BUFPUSH(0);
+	  break;
 
-      case ':':
-	BUFPUSH(start_paren);
-	pending_exact = 0;
-      default:
-	break;
-      }
+	case ':':
+	  BUFPUSH(start_paren);
+	  pending_exact = 0;
+	default:
+	  break;
+	}
 	if (push_option) {
 	  BUFPUSH(option_set);
 	  BUFPUSH(options);
@@ -1852,11 +1884,11 @@
 	  else
 	    BUFPUSH(casefold_off);
 	}
-      *stackp++ = c;
-      *stackp++ = old_options;
-      fixup_alt_jump = 0;
-      laststart = 0;
-      begalt = b;
+	*stackp++ = c;
+	*stackp++ = old_options;
+	fixup_alt_jump = 0;
+	laststart = 0;
+	begalt = b;
       }
       break;
 
@@ -2154,6 +2186,7 @@
 
     unfetch_interval:
       /* If an invalid interval, match the characters as literals.  */
+      re_warning("regexp has invalid interval");
       p = beg_interval;
       beg_interval = 0;
 
@@ -2205,9 +2238,9 @@
 	while ((int)b[-1] > 0 && b[b[-1] - 1] == 0) 
 	  b[-1]--; 
 	if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
-	  memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
+	  memmove(&b[(unsigned char)b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
 		  2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
-	b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[b[-1]])*8;
+	b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(unsigned char)b[-1]])*8;
 	break;
 
       case 'w':
@@ -2277,22 +2310,22 @@
       case '1': case '2': case '3':
       case '4': case '5': case '6':
       case '7': case '8': case '9':
-	  PATUNFETCH;
+	PATUNFETCH;
 	p0 = p;
 
-	  had_mbchar = 0;
-	  c1 = 0;
-	  GET_UNSIGNED_NUMBER(c1);
-	  if (!ISDIGIT(c)) PATUNFETCH;
+	had_mbchar = 0;
+	c1 = 0;
+	GET_UNSIGNED_NUMBER(c1);
+	if (!ISDIGIT(c)) PATUNFETCH;
 
 	if (9 < c1 && c1 >= regnum) {
-	    /* need to get octal */
+	  /* need to get octal */
 	  c = scan_oct(p0, 3, &numlen) & 0xff;
 	  p = p0 + numlen;
-	    c1 = 0;
-	    had_num_literal = 1;
-	    goto numeric_char;
-	  }
+	  c1 = 0;
+	  had_num_literal = 1;
+	  goto numeric_char;
+	}
 
 	laststart = b;
 	BUFPUSH(duplicate);
@@ -2334,6 +2367,10 @@
 	break;
 
     default:
+      if (c == ']')
+        re_warning("regexp has `]' without escape");
+      else if (c == '}')
+        re_warning("regexp has `}' without escape");
     normal_char:		/* Expects the character in `c'.  */
       had_mbchar = 0;
       if (ismbchar(c)) {
@@ -2344,9 +2381,10 @@
       nextp = p + mbclen(c) - 1;
       if (!pending_exact || pending_exact + *pending_exact + 1 != b
 	  || *pending_exact >= (c1 ? 0176 : 0177)
-	  || *nextp == '+' || *nextp == '?'
-	  || *nextp == '*' || *nextp == '^'
-	  || *nextp == '{') {
+	  || (nextp < pend &&
+	      (   *nextp == '+' || *nextp == '?'
+	       || *nextp == '*' || *nextp == '^'
+	       || *nextp == '{'))) {
 	laststart = b;
 	BUFPUSH(exactn);
 	pending_exact = b;
@@ -2379,7 +2417,6 @@
   /* set optimize flags */
   laststart = bufp->buffer;
   if (laststart != b) {
-    if (*laststart == start_memory) laststart += 3;
     if (*laststart == dummy_failure_jump) laststart += 3;
     else if (*laststart == try_next) laststart += 3;
     if (*laststart == anychar_repeat) {
@@ -2591,9 +2628,9 @@
 #define trans_eq(c1, c2, translate) (translate?(translate[c1]==translate[c2]):((c1)==(c2)))
 static int
 slow_match(little, lend, big, bend, translate)
-     unsigned char *little, *lend;
-     unsigned char *big, *bend;
-     unsigned char *translate;
+     const unsigned char *little, *lend;
+     const unsigned char *big, *bend;
+     const unsigned char *translate;
 {
   int c;
 
@@ -2609,14 +2646,14 @@
 
 static int
 slow_search(little, llen, big, blen, translate)
-     unsigned char *little;
+     const unsigned char *little;
      int llen;
-     unsigned char *big;
+     const unsigned char *big;
      int blen;
-     char *translate;
+     const char *translate;
 {
-  unsigned char *bsave = big;
-  unsigned char *bend = big + blen;
+  const unsigned char *bsave = big;
+  const unsigned char *bend = big + blen;
   register int c;
   int fescape = 0;
 
@@ -2686,12 +2723,12 @@
 
 static int
 bm_search(little, llen, big, blen, skip, translate)
-     unsigned char *little;
+     const unsigned char *little;
      int llen;
-     unsigned char *big;
+     const unsigned char *big;
      int blen;
      int *skip;
-     unsigned char *translate;
+     const unsigned char *translate;
 {
   int i, j, k;
 
@@ -2798,8 +2835,11 @@
 
       case casefold_on:
 	bufp->options |= RE_MAY_IGNORECASE;
+	options |= RE_OPTION_IGNORECASE;
+	continue;
+
       case casefold_off:
-	options ^= RE_OPTION_IGNORECASE;
+	options &= ~RE_OPTION_IGNORECASE;
 	continue;
 
       case option_set:
@@ -2889,6 +2929,7 @@
 
       case duplicate:
 	bufp->can_be_null = 1;
+	if (*p >= bufp->re_nsub) break;
 	fastmap['\n'] = 1;
       case anychar_repeat:
       case anychar:
@@ -3080,27 +3121,18 @@
 
   /* Adjust startpos for mbc string */
   if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) {
-    int i = 0;
+    int i = mbc_startpos(string, startpos);
 
-    if (range > 0) {
-      while (i<size) {
-	i += mbclen(string[i]);
-	if (startpos <= i) {
-	  startpos = i;
-	  break;
-	}
+    if (i < startpos) {
+      if (range > 0) {
+	startpos = i + mbclen(string[i]);
       }
-    }
-    else {
-      int w;
-
-      while (i<size) {
-	w = mbclen(string[i]);
-	if (startpos < i + w) {
+      else {
+	int len = mbclen(string[i]);
+	if (i + len <= startpos)
+	  startpos = i + len;
+	else
 	  startpos = i;
-	  break;
-	}
-	i += w;
       }
     }
   }
@@ -3108,6 +3140,9 @@
 }
 
 
+static int re_match_exec _((struct re_pattern_buffer *, const char *, int, int, int,
+			    struct re_registers *));
+
 /* Using the compiled pattern in BUFP->buffer, first tries to match
    STRING, starting first at index STARTPOS, then at STARTPOS + 1, and
    so on.  RANGE is the number of places to try before giving up.  If
@@ -3128,7 +3163,7 @@
      struct re_registers *regs;
 {
   register char *fastmap = bufp->fastmap;
-  int val, anchor = 0;
+  int val, anchor = 0, initpos = startpos;
 
   /* Check for out-of-range starting position.  */
   if (startpos < 0  ||  startpos > size)
@@ -3170,7 +3205,7 @@
     }
   }
   if (bufp->options & RE_OPTIMIZE_ANCHOR) {
-    if (bufp->options&RE_OPTION_SINGLELINE) {
+    if (bufp->options&RE_OPTION_MULTILINE && range > 0) {
       goto begbuf_match;
     }
     anchor = 1;
@@ -3257,7 +3292,7 @@
     if (startpos > size) return -1;
     if ((anchor || !bufp->can_be_null) && range > 0 && size > 0 && startpos == size)
       return -1;
-    val = re_match(bufp, string, size, startpos, regs);
+    val = re_match_exec(bufp, string, size, startpos, initpos, regs);
     if (val >= 0) return startpos;
     if (val == -2) return -2;
 
@@ -3492,6 +3527,16 @@
      int size, pos;
      struct re_registers *regs;
 {
+  return re_match_exec(bufp, string_arg, size, pos, pos, regs);
+}
+
+static int
+re_match_exec(bufp, string_arg, size, pos, beg, regs)
+     struct re_pattern_buffer *bufp;
+     const char *string_arg;
+     int size, pos, beg;
+     struct re_registers *regs;
+{
   register unsigned char *p = (unsigned char*)bufp->buffer;
   unsigned char *p1;
 
@@ -3821,19 +3866,25 @@
 	  int cc, c;
 
 	  PREFETCH;
-	  cc = c = (unsigned char)*d++;
+	  c = (unsigned char)*d++;
 	  if (ismbchar(c)) {
 	    if (d + mbclen(c) - 1 <= dend) {
+	      cc = c;
 	      MBC2WC(c, d);
+	      not = is_in_list_mbc(c, p);
+	      if (!not) {
+		part = not = is_in_list_sbc(cc, p);
+	      }
+	    } else {
+	      not = is_in_list(c, p);
 	    }
 	  }
-	  else if (TRANSLATE_P())
-	    cc = c = (unsigned char)translate[c];
-
-	  not = is_in_list(c, p);
-	  if (!not && cc != c) {
-	      part = not = is_in_list(cc, p);
+	  else {
+	    if (TRANSLATE_P())
+	      c = (unsigned char)translate[c];
+	    not = is_in_list(c, p);
 	  }
+
 	  if (*(p - 1) == (unsigned char)charset_not) {
 	    not = !not;
 	  }
@@ -3855,8 +3906,7 @@
 
       case endline:
 	if (AT_STRINGS_END(d)) {
-	  if (size == 0 || d[-1] != '\n')
-	    break;
+	  break;
 	}
 	else if (*d == '\n')
 	  break;
@@ -3877,8 +3927,7 @@
 	/* Match at the very end of the data. */
       case endbuf2:
 	if (AT_STRINGS_END(d)) {
-	  if (size == 0 || d[-1] != '\n')
-	    break;
+	  break;
 	}
 	/* .. or newline just before the end of the data. */
 	if (*d == '\n' && AT_STRINGS_END(d+1))
@@ -3903,7 +3952,7 @@
 
 	/* Match at the starting position. */
       case begpos:
-	if (d - string == pos)
+	if (d - string == beg)
 	  break;
 	goto fail;
 
@@ -4411,7 +4460,7 @@
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 };
 
 static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
@@ -4423,17 +4472,17 @@
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
 };
 
-static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
+static const unsigned char mbctab_sjis[] = { /* 0x81-0x9F,0xE0-0xFC */
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4442,14 +4491,33 @@
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
+};
+
+static const unsigned char mbctab_sjis_trail[] = { /* 0x40-0x7E,0x80-0xFC */
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
 };
 
 static const unsigned char mbctab_utf8[] = {
@@ -4468,7 +4536,7 @@
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-  3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0
+  3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
 };
 
 const unsigned char *re_mbctab = mbctab_ascii;
@@ -4496,3 +4564,85 @@
     break;
   }
 }
+
+#define mbc_isfirst(t, c) (t)[(unsigned char)(c)]
+#define mbc_len(t, c)     ((t)[(unsigned char)(c)]+1)
+
+static unsigned int
+asc_startpos(string, pos)
+     const char *string;
+     unsigned int pos;
+{
+  return pos;
+}
+
+#define euc_islead(c)  ((unsigned char)((c) - 0xa1) > 0xfe - 0xa1)
+#define euc_mbclen(c)  mbc_len(mbctab_euc, (c))
+static unsigned int
+euc_startpos(string, pos)
+     const char *string;
+     unsigned int pos;
+{
+  unsigned int i = pos, w;
+
+  while (i > 0 && !euc_islead(string[i])) {
+    --i;
+  }
+  if (i == pos || i + (w = euc_mbclen(string[i])) > pos) {
+    return i;
+  }
+  i += w;
+  return i + ((pos - i) & ~1);
+}
+
+#define sjis_isfirst(c) mbc_isfirst(mbctab_sjis, (c))
+#define sjis_istrail(c) mbctab_sjis_trail[(unsigned char)(c)]
+#define sjis_mbclen(c)  mbc_len(mbctab_sjis, (c))
+static unsigned int
+sjis_startpos(string, pos)
+     const char *string;
+     unsigned int pos;
+{
+  unsigned int i = pos, w;
+
+  if (i > 0 && sjis_istrail(string[i])) {
+    do {
+      if (!sjis_isfirst(string[--i])) {
+	++i;
+	break;
+      }
+    } while (i > 0);
+  }
+  if (i == pos || i + (w = sjis_mbclen(string[i])) > pos) {
+    return i;
+  }
+  i += w;
+  return i + ((pos - i) & ~1);
+}
+
+#define utf8_islead(c)  ((unsigned char)((c) & 0xc0) != 0x80)
+#define utf8_mbclen(c)  mbc_len(mbctab_utf8, (c))
+static unsigned int
+utf8_startpos(string, pos)
+     const char *string;
+     unsigned int pos;
+{
+  unsigned int i = pos, w;
+
+  while (i > 0 && !utf8_islead(string[i])) {
+    --i;
+  }
+  if (i == pos || i + (w = utf8_mbclen(string[i])) > pos) {
+    return i;
+  }
+  return i + w;
+}
+
+/*
+  vi: sw=2 ts=8
+  Local variables:
+  mode		 : C
+  c-file-style	 : "gnu"
+  tab-width	 : 8
+  End		 :
+*/
--- ruby-1.6.8/regex.h	2001-12-29 01:56:11.000000000 +0900
+++ ruby-1.8.5-p2/regex.h	2003-08-03 18:22:50.000000000 +0900
@@ -23,8 +23,8 @@
    Last change: May 21, 1993 by t^2  */
 /* modified for Ruby by matz@netlab.co.jp */
 
-#ifndef __REGEXP_LIBRARY
-#define __REGEXP_LIBRARY
+#ifndef REGEX_H
+#define REGEX_H
 
 /* symbol mangling for ruby */
 #ifdef RUBY
@@ -73,8 +73,6 @@
 #define RE_OPTION_MULTILINE  (RE_OPTION_EXTENDED<<1)
 /* ^ and $ ignore newline */
 #define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE<<1)
-/* works line Perl's /s; it's called POSIX for wrong reason */
-#define RE_OPTION_POSIXLINE  (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
 /* search for longest match, in accord with POSIX regexp */
 #define RE_OPTION_LONGEST    (RE_OPTION_SINGLELINE<<1)
 
@@ -90,13 +88,10 @@
 #define MBCTYPE_SJIS 2
 #define MBCTYPE_UTF8 3
 
-#if defined IMPORT || defined USEIMPORTLIB
-extern __declspec(dllimport)
-#elif defined EXPORT
-extern __declspec(dllexport)
-#else
 extern
-#endif
+#if defined _WIN32 && !defined __GNUC__ && !defined RUBY_EXPORT
+__declspec(dllimport)
+# endif
 const unsigned char *re_mbctab;
 #if defined(__STDC__)
 void re_mbcinit (int);
@@ -187,7 +182,6 @@
   regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
 } regmatch_t;
 
-
 #ifdef __STDC__
 
 extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *);
@@ -224,4 +218,4 @@
 
 #endif /* __STDC__ */
 
-#endif /* !__REGEXP_LIBRARY */
+#endif /* !REGEX_H */

でもって、PHP4.4.4とRuby1.6.8との差分。

--- php-4.4.4/ext/mbstring/mbregex/mbregex.c	2003-10-25 20:58:46.000000000 +0900
+++ ruby-1.6.8/regex.c	2002-11-19 22:36:29.000000000 +0900
@@ -21,23 +21,7 @@
 /* Perl5 extension added by matz <matz@caelum.co.jp> */
 /* UTF-8 extension added Jan 16 1999 by Yoshida Masato  <yoshidam@tau.bekkoame.ne.jp> */
 
-#include "php.h"
-
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
-
-#if HAVE_MBREGEX
-
-#define re_compile_pattern mbre_compile_pattern
-#define re_free_pattern mbre_free_pattern
-#define re_adjust_startpos mbre_adjust_startpos
-#define re_compile_fastmap mbre_compile_fastmap
-#define re_search mbre_search
-#define re_match mbre_match
-#define re_set_casetable mbre_set_casetable
-#define re_copy_registers mbre_copy_registers
-#define re_free_registers mbre_free_registers
 
 #ifdef HAVE_STRING_H
 # include <string.h>
@@ -100,13 +84,6 @@
 void xfree _((void*));
 #endif
 
-
-#define xmalloc emalloc
-#define xcalloc ecalloc
-#define xrealloc erealloc
-#define xfree efree
-
-
 /* Make alloca work the best possible way.  */
 #ifdef __GNUC__
 # ifndef atarist
@@ -185,19 +162,16 @@
   } while (0)
 
 /* Get the interface, including the syntax bits.  */
-#include "mbregex.h"
+#include "regex.h"
 
 /* Subroutines for re_compile_pattern.  */
 static void store_jump _((char*, int, char*));
 static void insert_jump _((int, char*, char*, char*));
 static void store_jump_n _((char*, int, char*, unsigned));
 static void insert_jump_n _((int, char*, char*, char*, unsigned));
-#if 0
 static void insert_op _((int, char*, char*));
-#endif
 static void insert_op_2 _((int, char*, char*, int, int));
-static int memcmp_translate _((unsigned char*, unsigned char*, int, const unsigned char*));
-static const unsigned char* re_mbctab_get _((int));
+static int memcmp_translate _((unsigned char*, unsigned char*, int));
 
 /* Define the syntax stuff, so we can do the \<, \>, etc.  */
 
@@ -208,137 +182,39 @@
 
 #define SYNTAX(c) re_syntax_table[c]
 
-static const char casetable[] = {
-        '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
-        '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
-        '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
-        '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
-        /* ' '     '!'     '"'     '#'     '$'     '%'     '&'     ''' */
-        '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
-        /* '('     ')'     '*'     '+'     ','     '-'     '.'     '/' */
-        '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
-        /* '0'     '1'     '2'     '3'     '4'     '5'     '6'     '7' */
-        '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
-        /* '8'     '9'     ':'     ';'     '<'     '='     '>'     '?' */
-        '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
-        /* '@'     'A'     'B'     'C'     'D'     'E'     'F'     'G' */
-        '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
-        /* 'H'     'I'     'J'     'K'     'L'     'M'     'N'     'O' */
-        '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
-        /* 'P'     'Q'     'R'     'S'     'T'     'U'     'V'     'W' */
-        '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
-        /* 'X'     'Y'     'Z'     '['     '\'     ']'     '^'     '_' */
-        '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
-        /* '`'     'a'     'b'     'c'     'd'     'e'     'f'     'g' */
-        '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
-        /* 'h'     'i'     'j'     'k'     'l'     'm'     'n'     'o' */
-        '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
-        /* 'p'     'q'     'r'     's'     't'     'u'     'v'     'w' */
-        '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
-        /* 'x'     'y'     'z'     '{'     '|'     '}'     '~' */
-        '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
-        '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
-        '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
-        '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
-        '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
-        '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
-        '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
-        '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
-        '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
-        '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
-        '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
-        '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
-        '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
-        '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
-        '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
-        '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
-        '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
-};
-
 static char re_syntax_table[256];
 static void init_syntax_once _((void));
-static const unsigned char *translate = (const unsigned char*)casetable;
-static void init_regs _((struct mbre_registers*, unsigned int));
+static const unsigned char *translate = 0;
+static void init_regs _((struct re_registers*, unsigned int));
 static void bm_init_skip _((int *, unsigned char*, int, const unsigned char*));
-#if 0
 static int current_mbctype = MBCTYPE_ASCII;
-#endif
 
 #undef P
 
-static unsigned long
-scan_oct(start, len, retlen)
-const char *start;
-int len;
-int *retlen;
-{
-    register const char *s = start;
-    register unsigned long retval = 0;
-
-    while (len-- && *s >= '0' && *s <= '7') {
-	retval <<= 3;
-	retval |= *s++ - '0';
-    }
-    *retlen = s - start;
-    return retval;
-}
-
-static unsigned long
-scan_hex(start, len, retlen)
-const char *start;
-int len;
-int *retlen;
-{
-    static char hexdigit[] = "0123456789abcdef0123456789ABCDEFx";
-    register const char *s = start;
-    register unsigned long retval = 0;
-    char *tmp;
-
-    while (len-- && *s && (tmp = strchr(hexdigit, *s))) {
-	retval <<= 4;
-	retval |= (tmp - hexdigit) & 15;
-	s++;
-    }
-    *retlen = s - start;
-    return retval;
-}
+#ifdef RUBY
+#include "util.h"
+#endif
 
-#define rt re_syntax_table
 static void
 init_syntax_once()
 {
-	register int c;
-	static int done = 0;
+   register int c;
+   static int done = 0;
 
-#ifdef ZTS
-	extern MUTEX_T mbregex_locale_mutex;
-#endif
+   if (done)
+     return;
 
-	if (done) {
-		return;
-	}
-#ifdef ZTS
-	tsrm_mutex_lock( mbregex_locale_mutex );
-#endif
-
-	memset(re_syntax_table, 0, sizeof(re_syntax_table));
-
-	for (c=0; c<=0x7f; c++) {
-		if (isalnum(c)) { 
-			re_syntax_table[c] = Sword;
-		}
-	}
-	re_syntax_table['_'] = Sword;
+   memset(re_syntax_table, 0, sizeof re_syntax_table);
 
-	for (c=0x80; c<=0xff; c++) {
-		if (isalnum(c)) { 
-			re_syntax_table[c] = Sword2;
-		}
-	}
-#ifdef ZTS
-	tsrm_mutex_unlock( mbregex_locale_mutex );
-#endif
-	done = 1;
+   for (c=0; c<=0x7f; c++)
+     if (isalnum(c)) 
+       re_syntax_table[c] = Sword;
+   re_syntax_table['_'] = Sword;
+
+   for (c=0x80; c<=0xff; c++)
+     if (isalnum(c)) 
+       re_syntax_table[c] = Sword2;
+   done = 1;
 }
 
 void
@@ -477,11 +353,11 @@
     start_memory, /* Start remembering the text that is matched, for
 		    storing in a memory register.  Followed by one
                     byte containing the register number.  Register numbers
-                    must be in the range 0 through MBRE_NREGS.  */
+                    must be in the range 0 through RE_NREGS.  */
     stop_memory, /* Stop remembering the text that is matched
 		    and store it in a memory register.  Followed by
                     one byte containing the register number. Register
-                    numbers must be in the range 0 through MBRE_NREGS.  */
+                    numbers must be in the range 0 through RE_NREGS.  */
     start_paren,    /* Place holder at the start of (?:..). */
     stop_paren,    /* Place holder at the end of (?:..). */
     casefold_on,   /* Turn on casefold flag. */
@@ -494,7 +370,6 @@
     duplicate,   /* Match a duplicate of something remembered.
 		    Followed by one byte containing the index of the memory 
                     register.  */
-    fail,        /* always fails. */
     wordchar,    /* Matches any word-constituent character.  */
     notwordchar, /* Matches any char that is not a word-constituent.  */
     wordbeg,	 /* Succeeds if at word beginning.  */
@@ -545,7 +420,7 @@
 
    The argument SYNTAX is a bit-mask comprised of the various bits
    defined in regex.h.  */
-#if 0
+
 long
 re_set_syntax(syntax)
   long syntax;
@@ -553,12 +428,12 @@
     /* obsolete */
     return 0;
 }
-#endif
+
 
 /* Macros for re_compile_pattern, which is found below these definitions.  */
 
-#define TRANSLATE_P() ((options&MBRE_OPTION_IGNORECASE) && translate)
-#define MAY_TRANSLATE() ((bufp->options&(MBRE_OPTION_IGNORECASE|MBRE_MAY_IGNORECASE)) && translate)
+#define TRANSLATE_P() ((options&RE_OPTION_IGNORECASE) && translate)
+#define MAY_TRANSLATE() ((bufp->options&(RE_OPTION_IGNORECASE|RE_MAY_IGNORECASE)) && translate)
 /* Fetch the next character in the uncompiled pattern---translating it 
    if necessary.  Also cast from a signed character in the constant
    string passed to us by the user to an unsigned char that we can use
@@ -583,7 +458,7 @@
   do {									\
     if (current_mbctype == MBCTYPE_UTF8) {				\
       int n = mbclen(c) - 1;						\
-      c &= (1<<(MBRE_BYTEWIDTH-2-n)) - 1;				\
+      c &= (1<<(BYTEWIDTH-2-n)) - 1;					\
       while (n--) {							\
 	c = c << 6 | (*p++ & ((1<<6)-1));				\
       }									\
@@ -601,7 +476,7 @@
   } while(0)
 
 #define WC2MBC1ST(c)							\
- ((c<0x100)?(c):((current_mbctype != MBCTYPE_UTF8)?(((c)>>8)&0xff):utf8_firstbyte(c)))
+ ((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
 
 static unsigned int
 utf8_firstbyte(c)
@@ -620,33 +495,36 @@
 #endif
 }
 
-#if 0
 static void
 print_mbc(c)
      unsigned int c;
 {
   if (current_mbctype == MBCTYPE_UTF8) {
     if (c < 0x80)
-      printf("%c", c);
+      printf("%c", (int)c);
     else if (c <= 0x7ff)
-      printf("%c%c", utf8_firstbyte(c), c&0x3f);
+      printf("%c%c", (int)utf8_firstbyte(c), (int)(c & 0x3f));
     else if (c <= 0xffff)
-      printf("%c%c%c", utf8_firstbyte(c), (c>>6)&0x3f, c&0x3f);
+      printf("%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 6) & 0x3f),
+	     (int)(c & 0x3f));
     else if (c <= 0x1fffff) 
-      printf("%c%c%c%c", utf8_firstbyte(c), (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f);
+      printf("%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 12) & 0x3f),
+	     (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
     else if (c <= 0x3ffffff)
-      printf("%c%c%c%c%c", utf8_firstbyte(c), (c>>18)&0x3f, (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f);
+      printf("%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 18) & 0x3f),
+	     (int)((c >> 12) & 0x3f), (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
     else if (c <= 0x7fffffff)
-      printf("%c%c%c%c%c%c", utf8_firstbyte(c), (c>>24)&0x3f, (c>>18)&0x3f, (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f);
+      printf("%c%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 24) & 0x3f),
+	     (int)((c >> 18) & 0x3f), (int)((c >> 12) & 0x3f),
+	     (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
   }
   else if (c < 0xff) {
-    printf("\\%o", c);
+    printf("\\%o", (int)c);
   }
   else {
-    printf("%c%c", c>>MBRE_BYTEWIDTH, c&0xff);
+    printf("%c%c", (int)(c >> BYTEWIDTH), (int)(c &0xff));
   }
 }
-#endif
 
 /* If the buffer isn't allocated when it comes in, use this.  */
 #define INIT_BUF_SIZE  28
@@ -654,7 +532,7 @@
 /* Make sure we have at least N more bytes of space in buffer.  */
 #define GET_BUFFER_SPACE(n)						\
   do {								        \
-    while (b - bufp->buffer + (size_t)(n) >= (size_t)bufp->allocated)			\
+    while (b - bufp->buffer + (n) >= bufp->allocated)			\
       EXTEND_BUFFER;							\
   } while (0)
 
@@ -690,8 +568,8 @@
 
 /* Set the bit for character C in a character set list.  */
 #define SET_LIST_BIT(c)							\
-  (b[(unsigned char)(c) / MBRE_BYTEWIDTH]					\
-   |= 1 << ((unsigned char)(c) % MBRE_BYTEWIDTH))
+  (b[(unsigned char)(c) / BYTEWIDTH]					\
+   |= 1 << ((unsigned char)(c) % BYTEWIDTH))
 
 /* Get the next unsigned number in the uncompiled pattern.  */
 #define GET_UNSIGNED_NUMBER(num) 					\
@@ -824,7 +702,7 @@
   unsigned short i, j;
 
   size = *b++;
-  if ((int)c / MBRE_BYTEWIDTH < (int)size && b[c / MBRE_BYTEWIDTH] & 1 << c % MBRE_BYTEWIDTH) {
+  if ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) {
     return 1;
   }
   b += size + 2;
@@ -839,13 +717,11 @@
     else
       j = k;
   }
-  if (i < size && EXTRACT_MBC(&b[i*8]) <= c
-      && ((unsigned char)c != '\n' && (unsigned char)c != '\0'))
+  if (i < size && EXTRACT_MBC(&b[i*8]) <= c)
     return 1;
   return 0;
 }
 
-#if 0
 static void
 print_partial_compiled_pattern(start, end)
     unsigned char *start;
@@ -954,9 +830,9 @@
 
 	  putchar ('/');
 
-	  for (bit = 0; bit < MBRE_BYTEWIDTH; bit++)
+	  for (bit = 0; bit < BYTEWIDTH; bit++)
 	    if (map_byte & (1 << bit))
-	      printf("%c", c * MBRE_BYTEWIDTH + bit);
+	      printf("%c", c * BYTEWIDTH + bit);
 	}
 	p += mcnt;
 	mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
@@ -1094,13 +970,12 @@
 
 static void
 print_compiled_pattern(bufp)
-     struct mbre_pattern_buffer *bufp;
+     struct re_pattern_buffer *bufp;
 {
   unsigned char *buffer = (unsigned char*)bufp->buffer;
 
   print_partial_compiled_pattern(buffer, buffer + bufp->used);
 }
-#endif
 
 static char*
 calculate_must_string(start, end)
@@ -1170,7 +1045,7 @@
       p += mcnt;
       mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
       while (mcnt--) {
-	p += 4;
+	p += 8;
       }
       break;
 
@@ -1292,27 +1167,27 @@
 
    PATTERN   is the address of the pattern string
    SIZE      is the length of it.
-   BUFP	    is a  struct mbre_pattern_buffer *  which points to the info
+   BUFP	    is a  struct re_pattern_buffer *  which points to the info
 	     on where to store the byte commands.
 	     This structure contains a  char *  which points to the
 	     actual space, which should have been obtained with malloc.
 	     re_compile_pattern may use realloc to grow the buffer space.
 
    The number of bytes of commands can be found out by looking in
-   the `struct mbre_pattern_buffer' that bufp pointed to, after
+   the `struct re_pattern_buffer' that bufp pointed to, after
    re_compile_pattern returns. */
 
 char *
 re_compile_pattern(pattern, size, bufp)
      const char *pattern;
      int size;
-     struct mbre_pattern_buffer *bufp;
+     struct re_pattern_buffer *bufp;
 {
   register char *b = bufp->buffer;
   register const char *p = pattern;
   const char *nextp;
   const char *pend = pattern + size;
-  register unsigned int c, c1=0;
+  register unsigned int c, c1 = 0;
   const char *p0;
   int numlen;
 #define ERROR_MSG_MAX_SIZE 200
@@ -1388,13 +1263,9 @@
 
   int options = bufp->options;
 
-  int current_mbctype = bufp->mbctype;
-  const unsigned char *re_mbctab = re_mbctab_get(current_mbctype);
-
   bufp->fastmap_accurate = 0;
   bufp->must = 0;
   bufp->must_skip = 0;
-  bufp->stclass = 0;
 
   /* Initialize the syntax table.  */
   init_syntax_once();
@@ -1416,7 +1287,7 @@
 
     switch (c) {
     case '$':
-      if (bufp->options & MBRE_OPTION_SINGLELINE) {
+      if (bufp->options & RE_OPTION_SINGLELINE) {
 	BUFPUSH(endbuf);
       }
       else {
@@ -1436,7 +1307,7 @@
       break;
 
     case '^':
-      if (bufp->options & MBRE_OPTION_SINGLELINE)
+      if (bufp->options & RE_OPTION_SINGLELINE)
 	BUFPUSH(begbuf);
       else
 	BUFPUSH(begline);
@@ -1450,9 +1321,6 @@
 	snprintf(error_msg, ERROR_MSG_MAX_SIZE, 
 		 "invalid regular expression; there's no previous pattern, to which '%c' would define cardinality at %d", 
 		 c, p-pattern);
-	if (bufp->buffer) {
-		xfree(bufp->buffer);
-	}	
 	FREE_AND_RETURN(stackb, error_msg);
       }
       /* If there is a sequence of repetition chars,
@@ -1536,7 +1404,7 @@
     case '[':
       if (p == pend)
 	FREE_AND_RETURN(stackb, "invalid regular expression; '[' can't be the last character ie. can't start range at the end of pattern");
-      while ((b - bufp->buffer + 9 + (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH)
+      while ((b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH)
 	     > bufp->allocated)
 	EXTEND_BUFFER;
 
@@ -1549,9 +1417,9 @@
 	BUFPUSH(charset);
       p0 = p;
 
-      BUFPUSH((1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH);
+      BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
       /* Clear the whole map */
-      memset(b, 0, (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH + 2);
+      memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
 
       had_mbchar = 0;
       had_num_literal = 0;
@@ -1562,11 +1430,11 @@
 	int size;
 	unsigned last = (unsigned)-1;
 
-	if ((size = EXTRACT_UNSIGNED(&b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH]))
+	if ((size = EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH]))
 	    || current_mbctype) {
 	  /* Ensure the space is enough to hold another interval
 	     of multi-byte chars in charset(_not)?.  */
-	  size = (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH + 2 + size*8 + 8;
+	  size = (1 << BYTEWIDTH) / BYTEWIDTH + 2 + size*8 + 8;
 	  while (b + size + 1 > bufp->buffer + bufp->allocated)
 	    EXTEND_BUFFER;
 	}
@@ -1602,7 +1470,7 @@
 	  PATFETCH_RAW(c);
 	  switch (c) {
 	  case 'w':
-	    for (c = 0; c < (1 << MBRE_BYTEWIDTH); c++) {
+	    for (c = 0; c < (1 << BYTEWIDTH); c++) {
 	      if (SYNTAX(c) == Sword ||
 		  (!current_mbctype && SYNTAX(c) == Sword2))
 		SET_LIST_BIT(c);
@@ -1615,7 +1483,7 @@
 	    continue;
 
 	  case 'W':
-	    for (c = 0; c < (1 << MBRE_BYTEWIDTH); c++) {
+	    for (c = 0; c < (1 << BYTEWIDTH); c++) {
 	      if (SYNTAX(c) != Sword &&
 		  ((current_mbctype && !re_mbctab[c]) ||
 		  (!current_mbctype && SYNTAX(c) != Sword2)))
@@ -1662,6 +1530,7 @@
 
 	  case 'x':
 	    c = scan_hex(p, 2, &numlen);
+	    if (numlen == 0) goto invalid_escape;
 	    p += numlen;
 	    had_num_literal = 1;
 	    break;
@@ -1677,11 +1546,15 @@
 	  case 'M':
 	  case 'C':
 	  case 'c':
-	    p0 = --p;
-	    c = read_special(p, pend, &p0);
-	    if (c > 255) goto invalid_escape;
-	    p = p0;
-	    had_num_literal = 1;
+	    {
+	      char *pp;
+
+	      --p;
+	      c = read_special(p, pend, &pp);
+	      if (c > 255) goto invalid_escape;
+	      p = pp;
+	      had_num_literal = 1;
+	    }
 	    break;
 
 	  default:
@@ -1768,7 +1641,7 @@
 	    if (p == pend) 
 	      FREE_AND_RETURN(stackb, "invalid regular expression; range doesn't have ending ']' after a character class");
 
-	    for (ch = 0; ch < 1 << MBRE_BYTEWIDTH; ch++) {
+	    for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
 	      if (   (is_alnum  && ISALNUM(ch))
 		  || (is_alpha  && ISALPHA(ch))
 		  || (is_blank  && ISBLANK(ch))
@@ -1806,12 +1679,12 @@
 
       /* Discard any character set/class bitmap bytes that are all
 	 0 at the end of the map. Decrement the map-length byte too.  */
-      while ((int)b[-1] > 0 && b[(int)b[-1] - 1] == 0) 
+      while ((int)b[-1] > 0 && b[b[-1] - 1] == 0) 
 	b[-1]--; 
-      if (b[-1] != (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH)
-	memmove(&b[(int)b[-1]], &b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH],
-		2 + EXTRACT_UNSIGNED(&b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH])*8);
-      b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(int)b[-1]])*8;
+      if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
+	memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
+		2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
+      b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[b[-1]])*8;
       break;
 
     case '(':
@@ -1839,43 +1712,43 @@
 
 	    case 'x':
 	      if (negative)
-		options &= ~MBRE_OPTION_EXTENDED;
+		options &= ~RE_OPTION_EXTENDED;
 	      else
-		options |= MBRE_OPTION_EXTENDED;
+		options |= RE_OPTION_EXTENDED;
 	      break;
 
 	    case 'p':
 	      if (negative) {
-		if ((options&MBRE_OPTION_POSIXLINE) == MBRE_OPTION_POSIXLINE) {
-		  options &= ~MBRE_OPTION_POSIXLINE;
+		if ((options&RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) {
+		  options &= ~RE_OPTION_POSIXLINE;
 		}
 	      }
-	      else if ((options&MBRE_OPTION_POSIXLINE) != MBRE_OPTION_POSIXLINE) {
-		options |= MBRE_OPTION_POSIXLINE;
+	      else if ((options&RE_OPTION_POSIXLINE) != RE_OPTION_POSIXLINE) {
+		options |= RE_OPTION_POSIXLINE;
 	      }
 	      push_option = 1;
 	      break;
 
 	    case 'm':
 	      if (negative) {
-		if (options&MBRE_OPTION_MULTILINE) {
-		  options &= ~MBRE_OPTION_MULTILINE;
+		if (options&RE_OPTION_MULTILINE) {
+		  options &= ~RE_OPTION_MULTILINE;
 		}
 	      }
-	      else if (!(options&MBRE_OPTION_MULTILINE)) {
-		options |= MBRE_OPTION_MULTILINE;
+	      else if (!(options&RE_OPTION_MULTILINE)) {
+		options |= RE_OPTION_MULTILINE;
 	      }
 	      push_option = 1;
 	      break;
 
 	    case 'i':
 	      if (negative) {
-		if (options&MBRE_OPTION_IGNORECASE) {
-		  options &= ~MBRE_OPTION_IGNORECASE;
+		if (options&RE_OPTION_IGNORECASE) {
+		  options &= ~RE_OPTION_IGNORECASE;
 		}
 	      }
-	      else if (!(options&MBRE_OPTION_IGNORECASE)) {
-		options |= MBRE_OPTION_IGNORECASE;
+	      else if (!(options&RE_OPTION_IGNORECASE)) {
+		options |= RE_OPTION_IGNORECASE;
 	      }
 		casefold = 1;
 	      break;
@@ -1920,7 +1793,7 @@
 	  BUFPUSH(options);
 	}
 	  if (casefold) {
-	    if (options & MBRE_OPTION_IGNORECASE)
+	    if (options & RE_OPTION_IGNORECASE)
 	      BUFPUSH(casefold_on);
 	    else
 	      BUFPUSH(casefold_off);
@@ -1932,8 +1805,8 @@
       }
 
       /* Laststart should point to the start_memory that we are about
-	 to push (unless the pattern has MBRE_NREGS or more ('s).  */
-      /* obsolete: now MBRE_NREGS is just a default register size. */
+	 to push (unless the pattern has RE_NREGS or more ('s).  */
+      /* obsolete: now RE_NREGS is just a default register size. */
       *stackp++ = b - bufp->buffer;    
       *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
       *stackp++ = begalt - bufp->buffer;
@@ -1945,7 +1818,7 @@
 	*stackp++ = b - bufp->buffer;
 	BUFPUSH(0);
 	/* too many ()'s to fit in a byte. (max 254) */
-	if (regnum >= MBRE_REG_MAX) goto too_big;
+	if (regnum >= RE_REG_MAX) goto too_big;
 	break;
 
       case '=':
@@ -1974,7 +1847,7 @@
 	  BUFPUSH(options);
 	}
 	if (casefold) {
-	  if (options & MBRE_OPTION_IGNORECASE)
+	  if (options & RE_OPTION_IGNORECASE)
 	    BUFPUSH(casefold_on);
 	  else
 	    BUFPUSH(casefold_off);
@@ -2004,10 +1877,10 @@
 	store_jump(fixup_alt_jump, jump, b);
       }
       if (options != stackp[-1]) {
-	if ((options ^ stackp[-1]) & MBRE_OPTION_IGNORECASE) {
-	  BUFPUSH((options&MBRE_OPTION_IGNORECASE)?casefold_off:casefold_on);
+	if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
+	  BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
 	}
-	if ((options ^ stackp[-1]) != MBRE_OPTION_IGNORECASE) {
+	if ((options ^ stackp[-1]) != RE_OPTION_IGNORECASE) {
 	  BUFPUSH(option_set);
 	  BUFPUSH(stackp[-1]);
 	}
@@ -2127,9 +2000,9 @@
       if (lower_bound < 0 || c != '}')
 	goto unfetch_interval;
 
-      if (lower_bound >= MBRE_DUP_MAX || upper_bound >= MBRE_DUP_MAX)
+      if (lower_bound >= RE_DUP_MAX || upper_bound >= RE_DUP_MAX)
 	FREE_AND_RETURN(stackb, "too big quantifier in {,}");
-      if (upper_bound < 0) upper_bound = MBRE_DUP_MAX;
+      if (upper_bound < 0) upper_bound = RE_DUP_MAX;
       if (lower_bound > upper_bound)
 	FREE_AND_RETURN(stackb, "can't do {n,m} with n > m");
 
@@ -2145,7 +2018,7 @@
 
       if (lower_bound == 0) {
 	zero_times_ok = 1;
-	if (upper_bound == MBRE_DUP_MAX) {
+	if (upper_bound == RE_DUP_MAX) {
 	  many_times_ok = 1;
 	  goto repeat;
 	}
@@ -2159,7 +2032,7 @@
 	  /* No need to repeat */
 	  break;
 	}
-	if (upper_bound == MBRE_DUP_MAX) {
+	if (upper_bound == RE_DUP_MAX) {
 	  many_times_ok = 1;
 	  zero_times_ok = 0;
 	  goto repeat;
@@ -2226,7 +2099,7 @@
 	 `upper_bound' is 1, though.)  */
       { /* If the upper bound is > 1, we need to insert
 	   more at the end of the loop.  */
-	unsigned int nbytes = (unsigned int)upper_bound == 1 ? 10 : 20;
+	unsigned nbytes = upper_bound == 1 ? 10 : 20;
 
 	GET_BUFFER_SPACE(nbytes);
 	/* Initialize lower bound of the `succeed_n', even
@@ -2300,7 +2173,7 @@
       case 'S':
       case 'd':
       case 'D':
-	while (b - bufp->buffer + 9 + (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH
+	while (b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH
 	       > bufp->allocated)
 	  EXTEND_BUFFER;
 
@@ -2312,8 +2185,8 @@
 	  BUFPUSH(charset_not);
 	}
 
-	BUFPUSH((1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH);
-	memset(b, 0, (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH + 2);
+	BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
+	memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
 	if (c == 's' || c == 'S') {
 	  SET_LIST_BIT(' ');
 	  SET_LIST_BIT('\t');
@@ -2331,10 +2204,10 @@
 
 	while ((int)b[-1] > 0 && b[b[-1] - 1] == 0) 
 	  b[-1]--; 
-	if (b[-1] != (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH)
-	  memmove(&b[(int)b[-1]], &b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH],
-		  2 + EXTRACT_UNSIGNED(&b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH])*8);
-	b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(int)b[-1]])*8;
+	if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
+	  memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
+		  2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
+	b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[b[-1]])*8;
 	break;
 
       case 'w':
@@ -2370,7 +2243,7 @@
 	break;
 
       case 'Z':
-	if ((bufp->options & MBRE_OPTION_SINGLELINE) == 0) {
+	if ((bufp->options & RE_OPTION_SINGLELINE) == 0) {
 	  BUFPUSH(endbuf2);
 	  break;
 	}
@@ -2387,6 +2260,7 @@
       case 'x':
 	had_mbchar = 0;
 	c = scan_hex(p, 2, &numlen);
+	if (numlen == 0) goto invalid_escape;
 	p += numlen;
 	had_num_literal = 1;
 	goto numeric_char;
@@ -2394,7 +2268,7 @@
 	/* octal */
       case '0':
 	had_mbchar = 0;
-	c = scan_oct(p, 3, &numlen);
+	c = scan_oct(p, 2, &numlen);
 	p += numlen;
 	had_num_literal = 1;
 	goto numeric_char;
@@ -2411,7 +2285,7 @@
 	  GET_UNSIGNED_NUMBER(c1);
 	  if (!ISDIGIT(c)) PATUNFETCH;
 
-	if (9 < c1 && c1 >= (unsigned int)regnum) {
+	if (9 < c1 && c1 >= regnum) {
 	    /* need to get octal */
 	  c = scan_oct(p0, 3, &numlen) & 0xff;
 	  p = p0 + numlen;
@@ -2442,7 +2316,7 @@
       break;
 
     case '#':
-      if (options & MBRE_OPTION_EXTENDED) {
+      if (options & RE_OPTION_EXTENDED) {
 	while (p != pend) {
 	  PATFETCH(c);
 	  if (c == '\n') break;
@@ -2456,7 +2330,7 @@
     case '\f':
     case '\r':
     case '\n':
-      if (options & MBRE_OPTION_EXTENDED)
+      if (options & RE_OPTION_EXTENDED)
 	break;
 
     default:
@@ -2509,23 +2383,7 @@
     if (*laststart == dummy_failure_jump) laststart += 3;
     else if (*laststart == try_next) laststart += 3;
     if (*laststart == anychar_repeat) {
-      bufp->options |= MBRE_OPTIMIZE_ANCHOR;
-    }
-    else if (*laststart == on_failure_jump) {
-      int mcnt;
-
-      laststart++;
-      EXTRACT_NUMBER_AND_INCR(mcnt, laststart);
-      if (*laststart == charset || *laststart == charset_not) {
-	p0 = laststart;
-	mcnt = *++p0;
-	p0 += mcnt+1;
-	mcnt = EXTRACT_UNSIGNED_AND_INCR(p0);
-	p0 += 8*mcnt;
-	if (*p0 == maybe_finalize_jump) {
-	  bufp->stclass = laststart;
-	}
-      }
+      bufp->options |= RE_OPTIMIZE_ANCHOR;
     }
   }
 
@@ -2535,14 +2393,14 @@
   if (laststart != b) {
     if (*laststart == start_memory) laststart += 3;
     if (*laststart == exactn) {
-      bufp->options |= MBRE_OPTIMIZE_EXACTN;
+      bufp->options |= RE_OPTIMIZE_EXACTN;
       bufp->must = laststart+1;
     }
   }
   if (!bufp->must) {
     bufp->must = calculate_must_string(bufp->buffer, b);
   }
-  if (current_mbctype == MBCTYPE_SJIS) bufp->options |= MBRE_OPTIMIZE_NO_BM;
+  if (current_mbctype == MBCTYPE_SJIS) bufp->options |= RE_OPTIMIZE_NO_BM;
   else if (bufp->must) {
     int i;
     int len = (unsigned char)bufp->must[0];
@@ -2550,12 +2408,12 @@
     for (i=1; i<len; i++) {
       if ((unsigned char)bufp->must[i] == 0xff ||
 	  (current_mbctype && ismbchar(bufp->must[i]))) {
-	bufp->options |= MBRE_OPTIMIZE_NO_BM;
+	bufp->options |= RE_OPTIMIZE_NO_BM;
 	break;
       }
     }
-    if (!(bufp->options & MBRE_OPTIMIZE_NO_BM)) {
-      bufp->must_skip = (int *) xmalloc((1 << MBRE_BYTEWIDTH)*sizeof(int));
+    if (!(bufp->options & RE_OPTIMIZE_NO_BM)) {
+      bufp->must_skip = (int *) xmalloc((1 << BYTEWIDTH)*sizeof(int));
       bm_init_skip(bufp->must_skip, (unsigned char*)bufp->must+1,
 		   (unsigned char)bufp->must[0],
 		   (unsigned char*)(MAY_TRANSLATE()?translate:0));
@@ -2566,7 +2424,7 @@
   bufp->regend = TMALLOC(regnum, unsigned char*);
   bufp->old_regstart = TMALLOC(regnum, unsigned char*);
   bufp->old_regend = TMALLOC(regnum, unsigned char*);
-  bufp->reg_info = TMALLOC(regnum, mbre_register_info_type);
+  bufp->reg_info = TMALLOC(regnum, register_info_type);
   bufp->best_regstart = TMALLOC(regnum, unsigned char*);
   bufp->best_regend = TMALLOC(regnum, unsigned char*);
   FREE_AND_RETURN(stackb, 0);
@@ -2592,21 +2450,20 @@
 
 void
 re_free_pattern(bufp)
-     struct mbre_pattern_buffer *bufp;
+     struct re_pattern_buffer *bufp;
 {
-  if(bufp){
-    if (bufp->buffer) xfree(bufp->buffer);
-    if (bufp->fastmap) xfree(bufp->fastmap);
-    if (bufp->must_skip) xfree(bufp->must_skip);
-
-    if (bufp->regstart) xfree(bufp->regstart);
-    if (bufp->regend) xfree(bufp->regend);
-    if (bufp->old_regstart) xfree(bufp->old_regstart);
-    if (bufp->old_regend) xfree(bufp->old_regend);
-    if (bufp->best_regstart) xfree(bufp->best_regstart);
-    if (bufp->best_regend) xfree(bufp->best_regend);
-    if (bufp->reg_info) xfree(bufp->reg_info);
-  }
+  xfree(bufp->buffer);
+  xfree(bufp->fastmap);
+  if (bufp->must_skip) xfree(bufp->must_skip);
+
+  xfree(bufp->regstart);
+  xfree(bufp->regend);
+  xfree(bufp->old_regstart);
+  xfree(bufp->old_regend);
+  xfree(bufp->best_regstart);
+  xfree(bufp->best_regend);
+  xfree(bufp->reg_info);
+  xfree(bufp);
 }
 
 /* Store a jump of the form <OPCODE> <relative address>.
@@ -2692,7 +2549,6 @@
 
    If you call this function, you must zero out pending_exact.  */
 
-#if 0
 static void
 insert_op(op, there, current_end)
      int op;
@@ -2706,7 +2562,7 @@
 
   there[0] = (char)op;
 }
-#endif
+
 
 /* Open up space at location THERE, and insert operation OP followed by
    NUM_1 and NUM_2.  CURRENT_END gives the end of the storage in use, so
@@ -2752,13 +2608,12 @@
 }
 
 static int
-slow_search(little, llen, big, blen, translate, re_mbctab)
+slow_search(little, llen, big, blen, translate)
      unsigned char *little;
      int llen;
      unsigned char *big;
      int blen;
      char *translate;
-     const unsigned char *re_mbctab;
 {
   unsigned char *bsave = big;
   unsigned char *bend = big + blen;
@@ -2870,16 +2725,16 @@
 }
 
 /* Given a pattern, compute a fastmap from it.  The fastmap records
-   which of the (1 << MBRE_BYTEWIDTH) possible characters can start a string
+   which of the (1 << BYTEWIDTH) possible characters can start a string
    that matches the pattern.  This fastmap is used by re_search to skip
    quickly over totally implausible text.