ます’s Blog - どうでもいい記事100選

どうでもいい記事100選

mb_encode_mimeheader関数とmb_decode_mimeheader関数(続き)

先日の続き。理由が分かりました。下らなすぎて脱力。。。
コレの不具合影響を受けていたようです。とりあえず昔(PHP4.3.11)の状態に戻して動作の確認が取れました。
まだ完成形ではないのですが、ひとまずタコさんパッチを放流だっ!

--- php-4.4.0,orig/ext/mbstring/mbstring.c	2005-07-19 09:31:43.000000000 +0900
+++ php-4.4.0/ext/mbstring/mbstring.c	2005-07-19 17:47:47.000000000 +0900
@@ -2928,7 +2928,7 @@
 PHP_FUNCTION(mb_encode_mimeheader)
 {
 	pval **argv[4];
-	enum mbfl_no_encoding charset, transenc;
+	enum mbfl_no_encoding charset, transenc, target_no_encoding = MBSTRG(current_internal_encoding);
 	mbfl_string  string, result, *ret;
 	char *p, *linefeed;
 
@@ -2945,6 +2945,7 @@
 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(argv[1]));
 			RETURN_FALSE;
 		}
+		target_no_encoding = charset;
 	} else {
 		const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
 		if (lang != NULL) {
@@ -2973,7 +2974,7 @@
 	mbfl_string_init(&string);
 	mbfl_string_init(&result);
 	string.no_language = MBSTRG(current_language);
-	string.no_encoding = MBSTRG(current_internal_encoding);
+	string.no_encoding = target_no_encoding;
 	string.val = Z_STRVAL_PP(argv[0]);
 	string.len = Z_STRLEN_PP(argv[0]);
 	ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, 0);
@@ -2985,24 +2986,35 @@
 }
 /* }}} */
 
-/* {{{ proto string mb_decode_mimeheader(string string)
+/* {{{ proto string mb_decode_mimeheader(string string[, string encoding])
    Decodes the MIME "encoded-word" in the string */
 PHP_FUNCTION(mb_decode_mimeheader)
 {
-	pval **arg_str;
+	pval **arg_str, **arg_encoding;
 	mbfl_string string, result, *ret;
+	enum mbfl_no_encoding target_encoding = MBSTRG(current_internal_encoding);
 
-	if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg_str) == FAILURE) {
+	if ((ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg_str) == FAILURE) ||
+	    (ZEND_NUM_ARGS() == 2 && zend_get_parameters_ex(2, &arg_str, &arg_encoding) == FAILURE)||
+		ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 2) {
 		WRONG_PARAM_COUNT;
 	}
+	if(ZEND_NUM_ARGS() == 2){
+		convert_to_string_ex(arg_encoding);
+		target_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg_encoding));
+		if (target_encoding == mbfl_no_encoding_invalid) {
+			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg_encoding));
+			RETURN_FALSE;
+		}
+	}
 	convert_to_string_ex(arg_str);
 	mbfl_string_init(&string);
 	mbfl_string_init(&result);
 	string.no_language = MBSTRG(current_language);
-	string.no_encoding = MBSTRG(current_internal_encoding);
+	string.no_encoding = target_encoding;
 	string.val = (unsigned char *)Z_STRVAL_PP(arg_str);
 	string.len = Z_STRLEN_PP(arg_str);
-	ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
+	ret = mbfl_mime_header_decode(&string, &result, target_encoding);
 	if (ret != NULL) {
 		RETVAL_STRINGL((char *)ret->val, ret->len, 0)	/* the string is already strdup()'ed */
 	} else {

このタコさんパッチを適用する事で、以下のスクリプトが正しく動作する事を確認。

<?php
declare( encoding="EUC-JP" );
$ORIG = "表題";

echo "[EUC-JP][B]--------------------------------------\n";
$A = mb_encode_mimeheader( $ORIG."EUC-JP" );
var_dump( $A );

$B =  mb_decode_mimeheader( $A );
var_dump( $B );

echo "[SJIS][B]----------------------------------------\n";
$A = mb_convert_encoding( $ORIG, "SJIS", "EUC-JP" );
$A = mb_encode_mimeheader( $A."SJIS", "SJIS" );
var_dump( $A );

$B = mb_decode_mimeheader( $A, "SJIS" );
var_dump( mb_convert_encoding( $B, "EUC-JP", "SJIS" ) );

echo "[UTF-8][B]---------------------------------------\n";
$A = mb_convert_encoding( $ORIG, "UTF-8", "EUC-JP" );
$A = mb_encode_mimeheader( $A."UTF-8", "UTF-8" );
var_dump( $A );

$B = mb_decode_mimeheader( $A, "UTF-8" );
var_dump( mb_convert_encoding( $B, "EUC-JP", "UTF-8" ) );

echo "[EUC-JP][Q]--------------------------------------\n";
$A = mb_encode_mimeheader( $ORIG."EUC-JP", "EUC-JP", "Q" );
var_dump( $A );

$B =  mb_decode_mimeheader( $A );
var_dump( $B );

echo "[SJIS][Q]----------------------------------------\n";
$A = mb_convert_encoding( $ORIG, "SJIS", "EUC-JP" );
$A = mb_encode_mimeheader( $A."SJIS", "SJIS", "Q" );
var_dump( $A );

$B = mb_decode_mimeheader( $A, "SJIS" );
var_dump( mb_convert_encoding( $B, "EUC-JP", "SJIS" ) );

echo "[UTF-8][Q]---------------------------------------\n";
$A = mb_convert_encoding( $ORIG, "UTF-8", "EUC-JP" );
$A = mb_encode_mimeheader( $A."UTF-8", "UTF-8", "Q" );
var_dump( $A );

$B = mb_decode_mimeheader( $A, "UTF-8" );
var_dump( mb_convert_encoding( $B, "EUC-JP", "UTF-8" ) );
?>

結果
----
[EUC-JP][B]--------------------------------------
string(42) "=?ISO-2022-JP?B?GyRCST1CahsoQkVVQy1KUA==?="
string(10) "表題EUC-JP"
[SJIS][B]----------------------------------------
string(28) "=?Shift_JIS?B?lVyR6FNKSVM=?="
string(8) "表題SJIS"
[UTF-8][B]---------------------------------------
string(28) "=?UTF-8?B?6KGo6aGMVVRGLTg=?="
string(9) "表題UTF-8"
[EUC-JP][Q]--------------------------------------
string(31) "=?EUC-JP?Q?=C9=BD=C2=EAEUC-JP?="
string(10) "表題EUC-JP"
[SJIS][Q]----------------------------------------
string(32) "=?Shift_JIS?Q?=95=5C=91=E8SJIS?="
string(8) "表題SJIS"
[UTF-8][Q]---------------------------------------
string(37) "=?UTF-8?Q?=E8=A1=A8=E9=A1=8CUTF-=38?="
string(9) "表題UTF-8"

(まだ)mb_decode_mimeheader関数内部で変換処理を行っていないので完成形は是非適用したい。