ます’s Blog - どうでもいい記事100選

どうでもいい記事100選

make test for 4.4.3RC1(続き)- mb_substr編

あー、忙しくて現実逃避する時間も無い。。_| ̄|○
先日のですが。
mbstring.func_overload」の機能を使って文字列関数をオーバーロードするとmb_substr関数のテストが失敗するようになりました(コレの3番目)。
何故、失敗するようになったかを調べてみました。まずはテスト。

<?php

  declare( encoding="EUC-JP" );

  echo PHP_VERSION."\n";
  var_dump( mb_language( ) );
  var_dump( mb_internal_encoding( ) );

  $euc_jp = "0123この文字列は日本語です。";

  var_dump( substr(    $euc_jp, 100, 10 ) );
  var_dump( mb_substr( $euc_jp, 100, 10 ) );

?>

結果
----
% ./php-4.3.3-cli ./mb_substr.php
4.3.3
string(8) "Japanese"
string(9) "eucJP-win"
bool(false)
string(0) ""

% ./php-4.3.9-cli ./mb_substr.php
4.3.9
string(8) "Japanese"
string(9) "eucJP-win"
bool(false)
string(0) ""

% ./php-4.3.11-cli ./mb_substr.php
4.3.11
string(8) "Japanese"
string(9) "eucJP-win"
bool(false)
string(0) ""

% ./php-4.4.1-cli ./mb_substr.php
4.4.1
string(8) "Japanese"
string(9) "eucJP-win"
bool(false)
string(0) ""

% ./php-4.4.2-cli ./mb_substr.php
4.4.2
string(8) "Japanese"
string(9) "eucJP-win"
bool(false)
string(0) ""

% ./php-4.4.3RC1-cli ./mb_substr.php
4.4.3RC1
string(8) "Japanese"
string(9) "eucJP-win"
bool(false)
string(0) ""

% ./php-4.3.3-cli -d mbstring.func_overload=2 ./mb_substr.php
4.3.3
string(8) "Japanese"
string(9) "eucJP-win"
string(0) ""
string(0) ""

% ./php-4.3.9-cli -d mbstring.func_overload=2 ./mb_substr.php
4.3.9
string(8) "Japanese"
string(9) "eucJP-win"
string(0) ""
string(0) ""

% ./php-4.3.11-cli -d mbstring.func_overload=2 ./mb_substr.php
4.3.11
string(8) "Japanese"
string(9) "eucJP-win"
string(0) ""
string(0) ""

% ./php-4.4.1-cli -d mbstring.func_overload=2 ./mb_substr.php
4.4.1
string(8) "Japanese"
string(9) "eucJP-win"
string(0) ""
string(0) ""

% ./php-4.4.2-cli -d mbstring.func_overload=2 ./mb_substr.php
4.4.2
string(8) "Japanese"
string(9) "eucJP-win"
bool(false)
bool(false)

% ./php-4.4.3RC1-cli -d mbstring.func_overload=2 ./mb_substr.php
4.4.3RC1
string(8) "Japanese"
string(9) "eucJP-win"
bool(false)
bool(false)

テストの結果から、4.4.1〜4.4.2間の変更で挙動が変わった事が分かります。
カンが鋭い人は、もう分かったかもしれませんが。(w
でもって、4.4.1〜4.4.2間の変更点は以下の通りです。長いけど。

--- mbstring.c	2005/09/21 13:19:19	1.142.2.47.2.1
+++ mbstring.c	2005/12/30 05:25:54	1.142.2.47.2.7
@@ -17,7 +17,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: mbstring.c,v 1.142.2.47.2.1 2005/09/21 13:19:19 iliaa Exp $ */
+/* $Id: mbstring.c,v 1.142.2.47.2.7 2005/12/30 05:25:54 hirokawa Exp $ */
 
 /*
  * PHP4 Multibyte String module "mbstring"
@@ -1555,7 +1555,7 @@
 		from_encoding = mbfl_no_encoding_invalid;
 		identd = mbfl_encoding_detector_new(
 				(enum mbfl_no_encoding *)info->from_encodings,
-				info->num_from_encodings);
+				info->num_from_encodings, 0);
 		if (identd) {
 			n = 0;
 			while (n < num) {
@@ -1902,7 +1902,7 @@
 	} else {
 		/* auto detect */
 		from_encoding = mbfl_no_encoding_invalid;
-		identd = mbfl_encoding_detector_new(elist, elistsz);
+		identd = mbfl_encoding_detector_new(elist, elistsz, 0);
 		if (identd != NULL) {
 			n = 0;
 			while (n < num) {
@@ -2404,6 +2404,11 @@
 		}
 	}
 
+	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
+		&& (from >= mbfl_strlen(&string))) {
+		RETURN_FALSE;
+	}
+
 	ret = mbfl_substr(&string, &result, from, len);
 	if (ret != NULL) {
 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);		/* the string is already strdup()'ed */
@@ -2485,6 +2490,13 @@
 		}
 	}
 
+	if (from > Z_STRLEN_PP(arg1)) {
+		RETURN_FALSE;
+	}
+	if (((unsigned) from + (unsigned) len) > Z_STRLEN_PP(arg1)) {
+		len = Z_STRLEN_PP(arg1) - from;
+	}
+
 	ret = mbfl_strcut(&string, &result, from, len);
 	if (ret != NULL) {
 		RETVAL_STRINGL(ret->val, ret->len, 0);		/* the string is already strdup()'ed */
@@ -2656,7 +2668,7 @@
 			string.no_encoding = from_encoding;
 		} else if (size > 1) {
 			/* auto detect */
-			from_encoding = mbfl_identify_encoding_no(&string, list, size);
+			from_encoding = mbfl_identify_encoding_no(&string, list, size, 0);
 			if (from_encoding != mbfl_no_encoding_invalid) {
 				string.no_encoding = from_encoding;
 			} else {
@@ -2924,16 +2936,17 @@
 }
 /* }}} */
 
-/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed]]])
+/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
 PHP_FUNCTION(mb_encode_mimeheader)
 {
-	pval **argv[4];
+	pval **argv[5];
 	enum mbfl_no_encoding charset, transenc;
 	mbfl_string  string, result, *ret;
 	char *p, *linefeed;
+	int indent;
 
-	if (ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 4 || zend_get_parameters_array_ex(ZEND_NUM_ARGS(), argv) == FAILURE) {
+	if (ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 5 || zend_get_parameters_array_ex(ZEND_NUM_ARGS(), argv) == FAILURE) {
 		WRONG_PARAM_COUNT;
 	}
 
@@ -2970,6 +2983,12 @@
 		linefeed = Z_STRVAL_PP(argv[3]);
 	}
 
+	indent = 0;
+	if (ZEND_NUM_ARGS() >= 5) {
+		convert_to_long_ex(argv[4]);
+		indent = Z_LVAL_PP(argv[4]);
+	}
+
 	convert_to_string_ex(argv[0]);
 	mbfl_string_init(&string);
 	mbfl_string_init(&result);
@@ -2977,7 +2996,7 @@
 	string.no_encoding = MBSTRG(current_internal_encoding);
 	string.val = Z_STRVAL_PP(argv[0]);
 	string.len = Z_STRLEN_PP(argv[0]);
-	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, 0);
+	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
 	if (ret != NULL) {
 		RETVAL_STRINGL((char *)ret->val, ret->len, 0)	/* the string is already strdup()'ed */
 	} else {
@@ -3195,7 +3214,7 @@
 		stack = (pval ***)safe_emalloc(stack_max, sizeof(pval **), 0);
 		if (stack != NULL) {
 			stack_level = 0;
-			identd = mbfl_encoding_detector_new(elist, elistsz);
+			identd = mbfl_encoding_detector_new(elist, elistsz, 0);
 			if (identd != NULL) {
 				n = 2;
 				while (n < argc || stack_level > 0) {
@@ -3460,6 +3479,22 @@
  *  Sends an email message with MIME scheme
  */
 #if HAVE_SENDMAIL
+#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)						\
+	if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {	\
+		pos += 3;											\
+		while (str[pos] == ' ' || str[pos] == '\t') {		\
+			pos++;											\
+		}                                               \
+		continue;											\
+	}													\
+	else if (str[pos] == '\n' && (str[pos + 1] == ' ' || str[pos + 1] == '\t')) {	\
+		pos += 2;											\
+		while (str[pos] == ' ' || str[pos] == '\t') {		\
+			pos++;											\
+		}												\
+		continue;											\
+	}													\
+
 PHP_FUNCTION(mb_send_mail)
 {
 	int argc, n;
@@ -3474,8 +3509,9 @@
 	    body_enc;	/* body transfar encoding */
 	mbfl_memory_device device;	/* automatic allocateable buffer for additional header */
 	const mbfl_language *lang;
-	char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
 	int err = 0;
+	char *to_r;
+	int to_len, i;
 
 	/* initialize */
 	mbfl_memory_device_init(&device, 0, 0);
@@ -3502,6 +3538,32 @@
 	convert_to_string_ex(argv[0]);
 	if (Z_STRVAL_PP(argv[0])) {
 		to = Z_STRVAL_PP(argv[0]);
+		to_len = Z_STRLEN_PP(argv[0]);
+		if (to_len > 0) {
+			to_r = estrndup(to, to_len);
+			for (; to_len; to_len--) {
+				if (!isspace((unsigned char) to_r[to_len - 1])) {
+					break;
+				}
+				to_r[to_len - 1] = '\0';
+			}
+			for (i = 0; to_r[i]; i++) {
+				if (iscntrl((unsigned char) to_r[i])) {
+						/* According to RFC 822, section 3.1.1 long headers may be
+separated into
+					 * parts using CRLF followed at least one linear-white-space
+character ('\t' or ' ').
+					 * To prevent these separators from being replaced with a space,
+we use the
+					 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
+					 */
+					SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
+					to_r[i] = ' ';
+				}
+			}
+		} else {
+			to_r = to;
+		}
 	} else {
 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
 		err = 1;
@@ -3516,7 +3578,7 @@
 		orig_str.no_encoding = MBSTRG(current_internal_encoding);
 		if (orig_str.no_encoding == mbfl_no_encoding_invalid
 		    || orig_str.no_encoding == mbfl_no_encoding_pass) {
-			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
+			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), 0);
 		}
 		pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
 		if (pstr != NULL) {
@@ -3539,7 +3601,7 @@
 
 		if (orig_str.no_encoding == mbfl_no_encoding_invalid
 		    || orig_str.no_encoding == mbfl_no_encoding_pass) {
-			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
+			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), 0);
 		}
 
 		pstr = NULL;
@@ -3596,18 +3658,19 @@
 		extra_cmd = Z_STRVAL_PP(argv[4]);
 	}
 
-	if (force_extra_parameters) {
-		extra_cmd = estrdup(force_extra_parameters);
-	} else if (extra_cmd) {
+	if (extra_cmd) {
 		extra_cmd = php_escape_shell_cmd(extra_cmd);
 	} 
 
-	if (!err && php_mail(to, subject, message, headers, extra_cmd TSRMLS_CC)) {
+	if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
 		RETVAL_TRUE;
 	} else {
 		RETVAL_FALSE;
 	}
 
+	if (to_r != to) {
+		efree(to_r);
+	}
 	if (extra_cmd) {
 		efree(extra_cmd);
 	}
@@ -3901,7 +3964,7 @@
 	mbfl_string_init(&string);
 	string.no_language = MBSTRG(current_language);
 
-	identd = mbfl_encoding_detector_new(elist, size);
+	identd = mbfl_encoding_detector_new(elist, size, 0);
 
 	if (identd) {
 		int n = 0;

でもって、原因となった変更点は以下の通り。

--- mbstring.c	2005/12/15 03:37:22	1.142.2.47.2.6
+++ mbstring.c	2005/12/30 05:25:54	1.142.2.47.2.7
@@ -17,7 +17,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: mbstring.c,v 1.142.2.47.2.6 2005/12/15 03:37:22 iliaa Exp $ */
+/* $Id: mbstring.c,v 1.142.2.47.2.7 2005/12/30 05:25:54 hirokawa Exp $ */
 
 /*
  * PHP4 Multibyte String module "mbstring"
@@ -1555,7 +1555,7 @@
 		from_encoding = mbfl_no_encoding_invalid;
 		identd = mbfl_encoding_detector_new(
 				(enum mbfl_no_encoding *)info->from_encodings,
-				info->num_from_encodings);
+				info->num_from_encodings, 0);
 		if (identd) {
 			n = 0;
 			while (n < num) {
@@ -1902,7 +1902,7 @@
 	} else {
 		/* auto detect */
 		from_encoding = mbfl_no_encoding_invalid;
-		identd = mbfl_encoding_detector_new(elist, elistsz);
+		identd = mbfl_encoding_detector_new(elist, elistsz, 0);
 		if (identd != NULL) {
 			n = 0;
 			while (n < num) {
@@ -2404,6 +2404,11 @@
 		}
 	}
 
+	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
+		&& (from >= mbfl_strlen(&string))) {
+		RETURN_FALSE;
+	}
+
 	ret = mbfl_substr(&string, &result, from, len);
 	if (ret != NULL) {
 		RETVAL_STRINGL((char *)ret->val, ret->len, 0);		/* the string is already strdup()'ed */
@@ -2663,7 +2668,7 @@
 			string.no_encoding = from_encoding;
 		} else if (size > 1) {
 			/* auto detect */
-			from_encoding = mbfl_identify_encoding_no(&string, list, size);
+			from_encoding = mbfl_identify_encoding_no(&string, list, size, 0);
 			if (from_encoding != mbfl_no_encoding_invalid) {
 				string.no_encoding = from_encoding;
 			} else {
@@ -3209,7 +3214,7 @@
 		stack = (pval ***)safe_emalloc(stack_max, sizeof(pval **), 0);
 		if (stack != NULL) {
 			stack_level = 0;
-			identd = mbfl_encoding_detector_new(elist, elistsz);
+			identd = mbfl_encoding_detector_new(elist, elistsz, 0);
 			if (identd != NULL) {
 				n = 2;
 				while (n < argc || stack_level > 0) {
@@ -3573,7 +3578,7 @@
 		orig_str.no_encoding = MBSTRG(current_internal_encoding);
 		if (orig_str.no_encoding == mbfl_no_encoding_invalid
 		    || orig_str.no_encoding == mbfl_no_encoding_pass) {
-			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
+			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), 0);
 		}
 		pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
 		if (pstr != NULL) {
@@ -3596,7 +3601,7 @@
 
 		if (orig_str.no_encoding == mbfl_no_encoding_invalid
 		    || orig_str.no_encoding == mbfl_no_encoding_pass) {
-			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
+			orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), 0);
 		}
 
 		pstr = NULL;
@@ -3959,7 +3964,7 @@
 	mbfl_string_init(&string);
 	string.no_language = MBSTRG(current_language);
 
-	identd = mbfl_encoding_detector_new(elist, size);
+	identd = mbfl_encoding_detector_new(elist, size, 0);
 
 	if (identd) {
 		int n = 0;

抜粋すると以下の部分です。思いっきり「RETURN_FALSE」してた。(w

+	if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
+		&& (from >= mbfl_strlen(&string))) {
+		RETURN_FALSE;
+	}
+

(テスト結果から察すると)普段substr関数を使っていた人から「挙動が違う!」って文句がきたから関数をオーバーロードした時*だけ*は仕様をsubstr関数に併せたのでは。。。という風に推測できますね。その割には(長い事)放置されてたなぁ。
テスト・プログラムは修正しないとな。。。一応、確認してみよっと。
っていうか、長さで判断しろよな。。。とか思ってしまいますが。
「""」や「false」とか使い分けるの激しく面倒な人なので。