「mb_list_encodings()のパッチ書いてみた」に対する所感
この件ですが。個人的には無理して統合する必要は無いんじゃないのかなぁ。。。と思う次第です。
ゴッチャにしてしまうと、どれがどれだか判断できないと思うので。という訳で関数を分離してみたタコさんパッチ。
後、mb_get_info関数の「func_overload」が、ありえない値を返していたので修正。
--- php-5.1.2,orig/ext/mbstring/mbstring.h 2006-01-01 21:50:08.000000000 +0900 +++ php-5.1.2/ext/mbstring/mbstring.h 2006-02-24 10:18:42.000000000 +0900 @@ -112,6 +112,8 @@ PHP_FUNCTION(mb_convert_encoding); PHP_FUNCTION(mb_detect_encoding); PHP_FUNCTION(mb_list_encodings); +PHP_FUNCTION(mb_list_encodings_alias_names); +PHP_FUNCTION(mb_list_mime_names); PHP_FUNCTION(mb_convert_kana); PHP_FUNCTION(mb_encode_mimeheader); PHP_FUNCTION(mb_decode_mimeheader); --- php-5.1.2,orig/ext/mbstring/mbstring.c 2006-01-01 21:50:08.000000000 +0900 +++ php-5.1.2/ext/mbstring/mbstring.c 2006-02-24 10:23:35.000000000 +0900 @@ -212,6 +212,8 @@ PHP_FE(mb_convert_encoding, NULL) PHP_FE(mb_detect_encoding, NULL) PHP_FE(mb_list_encodings, NULL) + PHP_FE(mb_list_encodings_alias_names, NULL) + PHP_FE(mb_list_mime_names, NULL) PHP_FE(mb_convert_kana, NULL) PHP_FE(mb_encode_mimeheader, NULL) PHP_FE(mb_decode_mimeheader, NULL) @@ -2323,6 +2325,52 @@ } /* }}} */ +/* {{{ proto array mb_list_encodings_alias_names() + Returns an array of all supported encodings alias names */ +PHP_FUNCTION(mb_list_encodings_alias_names) +{ + const mbfl_encoding **encodings; + const mbfl_encoding *encoding; + int i, j; + zval *row; + + array_init(return_value); + i = 0; + encodings = mbfl_get_supported_encodings(); + while ((encoding = encodings[i++]) != NULL) { + if (encoding->aliases != NULL) { + MAKE_STD_ZVAL(row); + array_init(row); + add_assoc_zval(return_value, (char *) encoding->name, row); + j = 0; + while ((*encoding->aliases)[j] != NULL) { + add_next_index_string(row, (char *)(*encoding->aliases)[j], 1); + j++; + } + } + } +} +/* }}} */ + +/* {{{ proto array mb_list_mime_names() + Returns an array of all supported mime names */ +PHP_FUNCTION(mb_list_mime_names) +{ + const mbfl_encoding **encodings; + const mbfl_encoding *encoding; + int i; + + array_init(return_value); + i = 0; + encodings = mbfl_get_supported_encodings(); + while ((encoding = encodings[i++]) != NULL) { + if(encoding->mime_name != NULL) { + add_assoc_string(return_value, (char *) encoding->name, (char *) encoding->mime_name, 1); + } + } +} +/* }}} */ + /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]]) Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */ PHP_FUNCTION(mb_encode_mimeheader) @@ -3328,6 +3376,8 @@ char *typ = NULL; int typ_len; char *name; + const struct mb_overload_def *over_func; + zval *row; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { RETURN_FALSE; @@ -3344,8 +3394,19 @@ if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { add_assoc_string(return_value, "http_output", name, 1); } - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) { - add_assoc_string(return_value, "func_overload", name, 1); + if (MBSTRG(func_overload)){ + over_func = &(mb_ovld[0]); + MAKE_STD_ZVAL(row); + array_init(row); + add_assoc_zval(return_value, "func_overload", row); + while (over_func->type > 0) { + if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) { + add_assoc_string(row, over_func->orig_func, over_func->ovld_func, 1); + } + over_func++; + } + } else { + add_assoc_string(return_value, "func_overload", "no overload", 1); } } else if (!strcasecmp("internal_encoding", typ)) { if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { @@ -3360,9 +3421,18 @@ RETVAL_STRING(name, 1); } } else if (!strcasecmp("func_overload", typ)) { - if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) { - RETVAL_STRING(name, 1); - } + if (MBSTRG(func_overload)){ + over_func = &(mb_ovld[0]); + array_init(return_value); + while (over_func->type > 0) { + if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) { + add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1); + } + over_func++; + } + } else { + RETVAL_STRING("no overload", 1); + } } else { RETURN_FALSE; }
これを適用すると、以下の様な結果になります。
<?php print_r( mb_get_info( "func_overload" ) ); print_r( mb_get_info( "all" ) ); print_r( mb_list_encodings( ) ); print_r( mb_list_encodings_alias_names( ) ); print_r( mb_list_mime_names( ) ); ?> 結果 --- Array ( [mail] => mb_send_mail [strlen] => mb_strlen [strpos] => mb_strpos [strrpos] => mb_strrpos [substr] => mb_substr [strtolower] => mb_strtolower [strtoupper] => mb_strtoupper [substr_count] => mb_substr_count [ereg] => mb_ereg [eregi] => mb_eregi [ereg_replace] => mb_ereg_replace [eregi_replace] => mb_eregi_replace [split] => mb_split ) Array ( [internal_encoding] => EUC-JP [http_input] => pass [http_output] => pass [func_overload] => Array ( [mail] => mb_send_mail [strlen] => mb_strlen [strpos] => mb_strpos [strrpos] => mb_strrpos [substr] => mb_substr [strtolower] => mb_strtolower [strtoupper] => mb_strtoupper [substr_count] => mb_substr_count [ereg] => mb_ereg [eregi] => mb_eregi [ereg_replace] => mb_ereg_replace [eregi_replace] => mb_eregi_replace [split] => mb_split ) ) Array ( [0] => pass [1] => auto [2] => wchar [3] => byte2be [4] => byte2le [5] => byte4be [6] => byte4le [7] => BASE64 [8] => UUENCODE [9] => HTML-ENTITIES [10] => Quoted-Printable [11] => 7bit [12] => 8bit [13] => UCS-4 [14] => UCS-4BE [15] => UCS-4LE [16] => UCS-2 [17] => UCS-2BE [18] => UCS-2LE [19] => UTF-32 [20] => UTF-32BE [21] => UTF-32LE [22] => UTF-16 [23] => UTF-16BE [24] => UTF-16LE [25] => UTF-8 [26] => UTF-7 [27] => UTF7-IMAP [28] => ASCII [29] => EUC-JP [30] => SJIS [31] => eucJP-win [32] => SJIS-win [33] => JIS [34] => ISO-2022-JP [35] => Windows-1252 [36] => ISO-8859-1 [37] => ISO-8859-2 [38] => ISO-8859-3 [39] => ISO-8859-4 [40] => ISO-8859-5 [41] => ISO-8859-6 [42] => ISO-8859-7 [43] => ISO-8859-8 [44] => ISO-8859-9 [45] => ISO-8859-10 [46] => ISO-8859-13 [47] => ISO-8859-14 [48] => ISO-8859-15 [49] => ISO-8859-16 [50] => EUC-CN [51] => CP936 [52] => HZ [53] => EUC-TW [54] => BIG-5 [55] => EUC-KR [56] => UHC [57] => ISO-2022-KR [58] => Windows-1251 [59] => CP866 [60] => KOI8-R [61] => ArmSCII-8 ) Array ( [pass] => Array ( [0] => none ) [auto] => Array ( [0] => unknown ) [HTML-ENTITIES] => Array ( [0] => HTML [1] => html ) [Quoted-Printable] => Array ( [0] => qprint ) [8bit] => Array ( [0] => binary ) [UCS-4] => Array ( [0] => ISO-10646-UCS-4 [1] => UCS4 ) [UCS-2] => Array ( [0] => ISO-10646-UCS-2 [1] => UCS2 [2] => UNICODE ) [UTF-32] => Array ( [0] => utf32 ) [UTF-16] => Array ( [0] => utf16 ) [UTF-8] => Array ( [0] => utf8 ) [UTF-7] => Array ( [0] => utf7 ) [ASCII] => Array ( [0] => ANSI_X3.4-1968 [1] => iso-ir-6 [2] => ANSI_X3.4-1986 [3] => ISO_646.irv:1991 [4] => US-ASCII [5] => ISO646-US [6] => us [7] => IBM367 [8] => cp367 [9] => csASCII ) [EUC-JP] => Array ( [0] => EUC [1] => EUC_JP [2] => eucJP [3] => x-euc-jp ) [SJIS] => Array ( [0] => x-sjis [1] => SHIFT-JIS ) [eucJP-win] => Array ( [0] => eucJP-open ) [SJIS-win] => Array ( [0] => SJIS-open [1] => CP932 [2] => Windows-31J [3] => MS_Kanji ) [Windows-1252] => Array ( [0] => cp1252 ) [ISO-8859-1] => Array ( [0] => ISO_8859-1 [1] => latin1 ) [ISO-8859-2] => Array ( [0] => ISO_8859-2 [1] => latin2 ) [ISO-8859-3] => Array ( [0] => ISO_8859-3 [1] => latin3 ) [ISO-8859-4] => Array ( [0] => ISO_8859-4 [1] => latin4 ) [ISO-8859-5] => Array ( [0] => ISO_8859-5 [1] => cyrillic ) [ISO-8859-6] => Array ( [0] => ISO_8859-6 [1] => arabic ) [ISO-8859-7] => Array ( [0] => ISO_8859-7 [1] => greek ) [ISO-8859-8] => Array ( [0] => ISO_8859-8 [1] => hebrew ) [ISO-8859-9] => Array ( [0] => ISO_8859-9 [1] => latin5 ) [ISO-8859-10] => Array ( [0] => ISO_8859-10 [1] => latin6 ) [ISO-8859-13] => Array ( [0] => ISO_8859-13 ) [ISO-8859-14] => Array ( [0] => ISO_8859-14 [1] => latin8 ) [ISO-8859-15] => Array ( [0] => ISO_8859-15 ) [ISO-8859-16] => Array ( [0] => ISO_8859-16 ) [EUC-CN] => Array ( [0] => CN-GB [1] => EUC_CN [2] => eucCN [3] => x-euc-cn [4] => gb2312 ) [CP936] => Array ( [0] => CP-936 [1] => GBK ) [EUC-TW] => Array ( [0] => EUC_TW [1] => eucTW [2] => x-euc-tw ) [BIG-5] => Array ( [0] => CN-BIG5 [1] => BIG-FIVE [2] => BIGFIVE [3] => CP950 ) [EUC-KR] => Array ( [0] => EUC_KR [1] => eucKR [2] => x-euc-kr ) [UHC] => Array ( [0] => CP949 ) [Windows-1251] => Array ( [0] => CP1251 [1] => CP-1251 [2] => WINDOWS-1251 ) [CP866] => Array ( [0] => CP866 [1] => CP-866 [2] => IBM-866 ) [KOI8-R] => Array ( [0] => KOI8-R [1] => KOI8R ) [ArmSCII-8] => Array ( [0] => ArmSCII-8 [1] => ArmSCII8 [2] => ARMSCII-8 [3] => ARMSCII8 ) ) Array ( [BASE64] => BASE64 [UUENCODE] => x-uuencode [HTML-ENTITIES] => HTML-ENTITIES [Quoted-Printable] => Quoted-Printable [7bit] => 7bit [8bit] => 8bit [UCS-4] => UCS-4 [UCS-4BE] => UCS-4BE [UCS-4LE] => UCS-4LE [UCS-2] => UCS-2 [UCS-2BE] => UCS-2BE [UCS-2LE] => UCS-2LE [UTF-32] => UTF-32 [UTF-32BE] => UTF-32BE [UTF-32LE] => UTF-32LE [UTF-16] => UTF-16 [UTF-16BE] => UTF-16BE [UTF-16LE] => UTF-16LE [UTF-8] => UTF-8 [UTF-7] => UTF-7 [ASCII] => US-ASCII [EUC-JP] => EUC-JP [SJIS] => Shift_JIS [eucJP-win] => EUC-JP [SJIS-win] => Shift_JIS [JIS] => ISO-2022-JP [ISO-2022-JP] => ISO-2022-JP [Windows-1252] => Windows-1252 [ISO-8859-1] => ISO-8859-1 [ISO-8859-2] => ISO-8859-2 [ISO-8859-3] => ISO-8859-3 [ISO-8859-4] => ISO-8859-4 [ISO-8859-5] => ISO-8859-5 [ISO-8859-6] => ISO-8859-6 [ISO-8859-7] => ISO-8859-7 [ISO-8859-8] => ISO-8859-8 [ISO-8859-9] => ISO-8859-9 [ISO-8859-10] => ISO-8859-10 [ISO-8859-13] => ISO-8859-13 [ISO-8859-14] => ISO-8859-14 [ISO-8859-15] => ISO-8859-15 [ISO-8859-16] => ISO-8859-16 [EUC-CN] => CN-GB [CP936] => CP936 [HZ] => HZ-GB-2312 [EUC-TW] => EUC-TW [BIG-5] => BIG5 [EUC-KR] => EUC-KR [UHC] => UHC [ISO-2022-KR] => ISO-2022-KR [Windows-1251] => Windows-1251 [CP866] => CP866 [KOI8-R] => KOI8-R [ArmSCII-8] => ArmSCII-8 )
こういう風になれば、PHPスクリプト上で色んな技が応用できますよね。
MIMEエンコード名とエイリアス名は連想配列にしてみました。対比が分かりやすいので。
エイリアス名が無い場合はリストには出てこないので。悪しからず。
っていうか、忙しいのに現実逃避してどうすんだ。。。_| ̄|○