mb_encode_mimeheader関数とmb_decode_mimeheader関数(続き)
先日の続き。理由が分かりました。下らなすぎて脱力。。。
コレの不具合影響を受けていたようです。とりあえず昔(PHP4.3.11)の状態に戻して動作の確認が取れました。
まだ完成形ではないのですが、ひとまずタコさんパッチを放流だっ!
--- php-4.4.0,orig/ext/mbstring/mbstring.c 2005-07-19 09:31:43.000000000 +0900 +++ php-4.4.0/ext/mbstring/mbstring.c 2005-07-19 17:47:47.000000000 +0900 @@ -2928,7 +2928,7 @@ PHP_FUNCTION(mb_encode_mimeheader) { pval **argv[4]; - enum mbfl_no_encoding charset, transenc; + enum mbfl_no_encoding charset, transenc, target_no_encoding = MBSTRG(current_internal_encoding); mbfl_string string, result, *ret; char *p, *linefeed; @@ -2945,6 +2945,7 @@ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(argv[1])); RETURN_FALSE; } + target_no_encoding = charset; } else { const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language)); if (lang != NULL) { @@ -2973,7 +2974,7 @@ mbfl_string_init(&string); mbfl_string_init(&result); string.no_language = MBSTRG(current_language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = target_no_encoding; string.val = Z_STRVAL_PP(argv[0]); string.len = Z_STRLEN_PP(argv[0]); ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, 0); @@ -2985,24 +2986,35 @@ } /* }}} */ -/* {{{ proto string mb_decode_mimeheader(string string) +/* {{{ proto string mb_decode_mimeheader(string string[, string encoding]) Decodes the MIME "encoded-word" in the string */ PHP_FUNCTION(mb_decode_mimeheader) { - pval **arg_str; + pval **arg_str, **arg_encoding; mbfl_string string, result, *ret; + enum mbfl_no_encoding target_encoding = MBSTRG(current_internal_encoding); - if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg_str) == FAILURE) { + if ((ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg_str) == FAILURE) || + (ZEND_NUM_ARGS() == 2 && zend_get_parameters_ex(2, &arg_str, &arg_encoding) == FAILURE)|| + ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 2) { WRONG_PARAM_COUNT; } + if(ZEND_NUM_ARGS() == 2){ + convert_to_string_ex(arg_encoding); + target_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg_encoding)); + if (target_encoding == mbfl_no_encoding_invalid) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg_encoding)); + RETURN_FALSE; + } + } convert_to_string_ex(arg_str); mbfl_string_init(&string); mbfl_string_init(&result); string.no_language = MBSTRG(current_language); - string.no_encoding = MBSTRG(current_internal_encoding); + string.no_encoding = target_encoding; string.val = (unsigned char *)Z_STRVAL_PP(arg_str); string.len = Z_STRLEN_PP(arg_str); - ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)); + ret = mbfl_mime_header_decode(&string, &result, target_encoding); if (ret != NULL) { RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */ } else {
このタコさんパッチを適用する事で、以下のスクリプトが正しく動作する事を確認。
<?php declare( encoding="EUC-JP" ); $ORIG = "表題"; echo "[EUC-JP][B]--------------------------------------\n"; $A = mb_encode_mimeheader( $ORIG."EUC-JP" ); var_dump( $A ); $B = mb_decode_mimeheader( $A ); var_dump( $B ); echo "[SJIS][B]----------------------------------------\n"; $A = mb_convert_encoding( $ORIG, "SJIS", "EUC-JP" ); $A = mb_encode_mimeheader( $A."SJIS", "SJIS" ); var_dump( $A ); $B = mb_decode_mimeheader( $A, "SJIS" ); var_dump( mb_convert_encoding( $B, "EUC-JP", "SJIS" ) ); echo "[UTF-8][B]---------------------------------------\n"; $A = mb_convert_encoding( $ORIG, "UTF-8", "EUC-JP" ); $A = mb_encode_mimeheader( $A."UTF-8", "UTF-8" ); var_dump( $A ); $B = mb_decode_mimeheader( $A, "UTF-8" ); var_dump( mb_convert_encoding( $B, "EUC-JP", "UTF-8" ) ); echo "[EUC-JP][Q]--------------------------------------\n"; $A = mb_encode_mimeheader( $ORIG."EUC-JP", "EUC-JP", "Q" ); var_dump( $A ); $B = mb_decode_mimeheader( $A ); var_dump( $B ); echo "[SJIS][Q]----------------------------------------\n"; $A = mb_convert_encoding( $ORIG, "SJIS", "EUC-JP" ); $A = mb_encode_mimeheader( $A."SJIS", "SJIS", "Q" ); var_dump( $A ); $B = mb_decode_mimeheader( $A, "SJIS" ); var_dump( mb_convert_encoding( $B, "EUC-JP", "SJIS" ) ); echo "[UTF-8][Q]---------------------------------------\n"; $A = mb_convert_encoding( $ORIG, "UTF-8", "EUC-JP" ); $A = mb_encode_mimeheader( $A."UTF-8", "UTF-8", "Q" ); var_dump( $A ); $B = mb_decode_mimeheader( $A, "UTF-8" ); var_dump( mb_convert_encoding( $B, "EUC-JP", "UTF-8" ) ); ?> 結果 ---- [EUC-JP][B]-------------------------------------- string(42) "=?ISO-2022-JP?B?GyRCST1CahsoQkVVQy1KUA==?=" string(10) "表題EUC-JP" [SJIS][B]---------------------------------------- string(28) "=?Shift_JIS?B?lVyR6FNKSVM=?=" string(8) "表題SJIS" [UTF-8][B]--------------------------------------- string(28) "=?UTF-8?B?6KGo6aGMVVRGLTg=?=" string(9) "表題UTF-8" [EUC-JP][Q]-------------------------------------- string(31) "=?EUC-JP?Q?=C9=BD=C2=EAEUC-JP?=" string(10) "表題EUC-JP" [SJIS][Q]---------------------------------------- string(32) "=?Shift_JIS?Q?=95=5C=91=E8SJIS?=" string(8) "表題SJIS" [UTF-8][Q]--------------------------------------- string(37) "=?UTF-8?Q?=E8=A1=A8=E9=A1=8CUTF-=38?=" string(9) "表題UTF-8"
(まだ)mb_decode_mimeheader関数内部で変換処理を行っていないので完成形は是非適用したい。