ます’s Blog - どうでもいい記事100選

どうでもいい記事100選

「mb_list_encodings()のパッチ書いてみた」に対する所感

このですが。個人的には無理して統合する必要は無いんじゃないのかなぁ。。。と思う次第です。
ゴッチャにしてしまうと、どれがどれだか判断できないと思うので。という訳で関数を分離してみたタコさんパッチ。
後、mb_get_info関数の「func_overload」が、ありえない値を返していたので修正。

--- php-5.1.2,orig/ext/mbstring/mbstring.h	2006-01-01 21:50:08.000000000 +0900
+++ php-5.1.2/ext/mbstring/mbstring.h	2006-02-24 10:18:42.000000000 +0900
@@ -112,6 +112,8 @@
 PHP_FUNCTION(mb_convert_encoding);
 PHP_FUNCTION(mb_detect_encoding);
 PHP_FUNCTION(mb_list_encodings);
+PHP_FUNCTION(mb_list_encodings_alias_names);
+PHP_FUNCTION(mb_list_mime_names);
 PHP_FUNCTION(mb_convert_kana);
 PHP_FUNCTION(mb_encode_mimeheader);
 PHP_FUNCTION(mb_decode_mimeheader);
--- php-5.1.2,orig/ext/mbstring/mbstring.c	2006-01-01 21:50:08.000000000 +0900
+++ php-5.1.2/ext/mbstring/mbstring.c	2006-02-24 10:23:35.000000000 +0900
@@ -212,6 +212,8 @@
 	PHP_FE(mb_convert_encoding,		NULL)
 	PHP_FE(mb_detect_encoding,		NULL)
 	PHP_FE(mb_list_encodings,		NULL)
+	PHP_FE(mb_list_encodings_alias_names,		NULL)
+	PHP_FE(mb_list_mime_names,		NULL)
 	PHP_FE(mb_convert_kana,			NULL)
 	PHP_FE(mb_encode_mimeheader,	NULL)
 	PHP_FE(mb_decode_mimeheader,	NULL)
@@ -2323,6 +2325,52 @@
 }
 /* }}} */
 
+/* {{{ proto array mb_list_encodings_alias_names()
+   Returns an array of all supported encodings alias names */
+PHP_FUNCTION(mb_list_encodings_alias_names)
+{
+	const mbfl_encoding **encodings;
+	const mbfl_encoding *encoding;
+	int i, j;
+	zval *row;
+
+	array_init(return_value);
+	i = 0;
+	encodings = mbfl_get_supported_encodings();
+	while ((encoding = encodings[i++]) != NULL) {
+		if (encoding->aliases != NULL) {
+			MAKE_STD_ZVAL(row);
+			array_init(row);
+			add_assoc_zval(return_value, (char *) encoding->name, row);
+			j = 0;
+			while ((*encoding->aliases)[j] != NULL) {
+				add_next_index_string(row, (char *)(*encoding->aliases)[j], 1);
+				j++;
+			}
+		}
+	}
+}
+/* }}} */
+
+/* {{{ proto array mb_list_mime_names()
+   Returns an array of all supported mime names */
+PHP_FUNCTION(mb_list_mime_names)
+{
+	const mbfl_encoding **encodings;
+	const mbfl_encoding *encoding;
+	int i;
+
+	array_init(return_value);
+	i = 0;
+	encodings = mbfl_get_supported_encodings();
+	while ((encoding = encodings[i++]) != NULL) {
+		if(encoding->mime_name != NULL) {
+			add_assoc_string(return_value, (char *) encoding->name, (char *) encoding->mime_name, 1);
+		}
+	}
+}
+/* }}} */
+
 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
 PHP_FUNCTION(mb_encode_mimeheader)
@@ -3328,6 +3376,8 @@
 	char *typ = NULL;
 	int typ_len;
 	char *name;
+	const struct mb_overload_def *over_func;
+	zval *row;
 
 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
 		RETURN_FALSE;
@@ -3344,8 +3394,19 @@
 		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
 			add_assoc_string(return_value, "http_output", name, 1);
 		}
-		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) {
-			add_assoc_string(return_value, "func_overload", name, 1);
+		if (MBSTRG(func_overload)){
+			over_func = &(mb_ovld[0]);
+			MAKE_STD_ZVAL(row);
+			array_init(row);
+			add_assoc_zval(return_value, "func_overload", row);
+			while (over_func->type > 0) {
+				if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
+					add_assoc_string(row, over_func->orig_func, over_func->ovld_func, 1);
+				}
+				over_func++;
+			}
+		} else {
+			add_assoc_string(return_value, "func_overload", "no overload", 1);
 		}
 	} else if (!strcasecmp("internal_encoding", typ)) {
 		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
@@ -3360,9 +3421,18 @@
 			RETVAL_STRING(name, 1);
 		}		
 	} else if (!strcasecmp("func_overload", typ)) {
-		if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) {
-			RETVAL_STRING(name, 1);
-		}
+			if (MBSTRG(func_overload)){
+				over_func = &(mb_ovld[0]);
+				array_init(return_value);
+				while (over_func->type > 0) {
+					if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
+						add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
+					}
+					over_func++;
+				}
+			} else {
+				RETVAL_STRING("no overload", 1);
+			}
 	} else {
 		RETURN_FALSE;
 	}

これを適用すると、以下の様な結果になります。

<?php
print_r( mb_get_info( "func_overload" ) );
print_r( mb_get_info( "all" ) );
print_r( mb_list_encodings( ) );
print_r( mb_list_encodings_alias_names( ) );
print_r( mb_list_mime_names( ) );
?>

結果
---
Array
(
    [mail] => mb_send_mail
    [strlen] => mb_strlen
    [strpos] => mb_strpos
    [strrpos] => mb_strrpos
    [substr] => mb_substr
    [strtolower] => mb_strtolower
    [strtoupper] => mb_strtoupper
    [substr_count] => mb_substr_count
    [ereg] => mb_ereg
    [eregi] => mb_eregi
    [ereg_replace] => mb_ereg_replace
    [eregi_replace] => mb_eregi_replace
    [split] => mb_split
)
Array
(
    [internal_encoding] => EUC-JP
    [http_input] => pass
    [http_output] => pass
    [func_overload] => Array
        (
            [mail] => mb_send_mail
            [strlen] => mb_strlen
            [strpos] => mb_strpos
            [strrpos] => mb_strrpos
            [substr] => mb_substr
            [strtolower] => mb_strtolower
            [strtoupper] => mb_strtoupper
            [substr_count] => mb_substr_count
            [ereg] => mb_ereg
            [eregi] => mb_eregi
            [ereg_replace] => mb_ereg_replace
            [eregi_replace] => mb_eregi_replace
            [split] => mb_split
        )

)
Array
(
    [0] => pass
    [1] => auto
    [2] => wchar
    [3] => byte2be
    [4] => byte2le
    [5] => byte4be
    [6] => byte4le
    [7] => BASE64
    [8] => UUENCODE
    [9] => HTML-ENTITIES
    [10] => Quoted-Printable
    [11] => 7bit
    [12] => 8bit
    [13] => UCS-4
    [14] => UCS-4BE
    [15] => UCS-4LE
    [16] => UCS-2
    [17] => UCS-2BE
    [18] => UCS-2LE
    [19] => UTF-32
    [20] => UTF-32BE
    [21] => UTF-32LE
    [22] => UTF-16
    [23] => UTF-16BE
    [24] => UTF-16LE
    [25] => UTF-8
    [26] => UTF-7
    [27] => UTF7-IMAP
    [28] => ASCII
    [29] => EUC-JP
    [30] => SJIS
    [31] => eucJP-win
    [32] => SJIS-win
    [33] => JIS
    [34] => ISO-2022-JP
    [35] => Windows-1252
    [36] => ISO-8859-1
    [37] => ISO-8859-2
    [38] => ISO-8859-3
    [39] => ISO-8859-4
    [40] => ISO-8859-5
    [41] => ISO-8859-6
    [42] => ISO-8859-7
    [43] => ISO-8859-8
    [44] => ISO-8859-9
    [45] => ISO-8859-10
    [46] => ISO-8859-13
    [47] => ISO-8859-14
    [48] => ISO-8859-15
    [49] => ISO-8859-16
    [50] => EUC-CN
    [51] => CP936
    [52] => HZ
    [53] => EUC-TW
    [54] => BIG-5
    [55] => EUC-KR
    [56] => UHC
    [57] => ISO-2022-KR
    [58] => Windows-1251
    [59] => CP866
    [60] => KOI8-R
    [61] => ArmSCII-8
)
Array
(
    [pass] => Array
        (
            [0] => none
        )

    [auto] => Array
        (
            [0] => unknown
        )

    [HTML-ENTITIES] => Array
        (
            [0] => HTML
            [1] => html
        )

    [Quoted-Printable] => Array
        (
            [0] => qprint
        )

    [8bit] => Array
        (
            [0] => binary
        )

    [UCS-4] => Array
        (
            [0] => ISO-10646-UCS-4
            [1] => UCS4
        )

    [UCS-2] => Array
        (
            [0] => ISO-10646-UCS-2
            [1] => UCS2
            [2] => UNICODE
        )

    [UTF-32] => Array
        (
            [0] => utf32
        )

    [UTF-16] => Array
        (
            [0] => utf16
        )

    [UTF-8] => Array
        (
            [0] => utf8
        )

    [UTF-7] => Array
        (
            [0] => utf7
        )

    [ASCII] => Array
        (
            [0] => ANSI_X3.4-1968
            [1] => iso-ir-6
            [2] => ANSI_X3.4-1986
            [3] => ISO_646.irv:1991
            [4] => US-ASCII
            [5] => ISO646-US
            [6] => us
            [7] => IBM367
            [8] => cp367
            [9] => csASCII
        )

    [EUC-JP] => Array
        (
            [0] => EUC
            [1] => EUC_JP
            [2] => eucJP
            [3] => x-euc-jp
        )

    [SJIS] => Array
        (
            [0] => x-sjis
            [1] => SHIFT-JIS
        )

    [eucJP-win] => Array
        (
            [0] => eucJP-open
        )

    [SJIS-win] => Array
        (
            [0] => SJIS-open
            [1] => CP932
            [2] => Windows-31J
            [3] => MS_Kanji
        )

    [Windows-1252] => Array
        (
            [0] => cp1252
        )

    [ISO-8859-1] => Array
        (
            [0] => ISO_8859-1
            [1] => latin1
        )

    [ISO-8859-2] => Array
        (
            [0] => ISO_8859-2
            [1] => latin2
        )

    [ISO-8859-3] => Array
        (
            [0] => ISO_8859-3
            [1] => latin3
        )

    [ISO-8859-4] => Array
        (
            [0] => ISO_8859-4
            [1] => latin4
        )

    [ISO-8859-5] => Array
        (
            [0] => ISO_8859-5
            [1] => cyrillic
        )

    [ISO-8859-6] => Array
        (
            [0] => ISO_8859-6
            [1] => arabic
        )

    [ISO-8859-7] => Array
        (
            [0] => ISO_8859-7
            [1] => greek
        )

    [ISO-8859-8] => Array
        (
            [0] => ISO_8859-8
            [1] => hebrew
        )

    [ISO-8859-9] => Array
        (
            [0] => ISO_8859-9
            [1] => latin5
        )

    [ISO-8859-10] => Array
        (
            [0] => ISO_8859-10
            [1] => latin6
        )

    [ISO-8859-13] => Array
        (
            [0] => ISO_8859-13
        )

    [ISO-8859-14] => Array
        (
            [0] => ISO_8859-14
            [1] => latin8
        )

    [ISO-8859-15] => Array
        (
            [0] => ISO_8859-15
        )

    [ISO-8859-16] => Array
        (
            [0] => ISO_8859-16
        )

    [EUC-CN] => Array
        (
            [0] => CN-GB
            [1] => EUC_CN
            [2] => eucCN
            [3] => x-euc-cn
            [4] => gb2312
        )

    [CP936] => Array
        (
            [0] => CP-936
            [1] => GBK
        )

    [EUC-TW] => Array
        (
            [0] => EUC_TW
            [1] => eucTW
            [2] => x-euc-tw
        )

    [BIG-5] => Array
        (
            [0] => CN-BIG5
            [1] => BIG-FIVE
            [2] => BIGFIVE
            [3] => CP950
        )

    [EUC-KR] => Array
        (
            [0] => EUC_KR
            [1] => eucKR
            [2] => x-euc-kr
        )

    [UHC] => Array
        (
            [0] => CP949
        )

    [Windows-1251] => Array
        (
            [0] => CP1251
            [1] => CP-1251
            [2] => WINDOWS-1251
        )

    [CP866] => Array
        (
            [0] => CP866
            [1] => CP-866
            [2] => IBM-866
        )

    [KOI8-R] => Array
        (
            [0] => KOI8-R
            [1] => KOI8R
        )

    [ArmSCII-8] => Array
        (
            [0] => ArmSCII-8
            [1] => ArmSCII8
            [2] => ARMSCII-8
            [3] => ARMSCII8
        )

)
Array
(
    [BASE64] => BASE64
    [UUENCODE] => x-uuencode
    [HTML-ENTITIES] => HTML-ENTITIES
    [Quoted-Printable] => Quoted-Printable
    [7bit] => 7bit
    [8bit] => 8bit
    [UCS-4] => UCS-4
    [UCS-4BE] => UCS-4BE
    [UCS-4LE] => UCS-4LE
    [UCS-2] => UCS-2
    [UCS-2BE] => UCS-2BE
    [UCS-2LE] => UCS-2LE
    [UTF-32] => UTF-32
    [UTF-32BE] => UTF-32BE
    [UTF-32LE] => UTF-32LE
    [UTF-16] => UTF-16
    [UTF-16BE] => UTF-16BE
    [UTF-16LE] => UTF-16LE
    [UTF-8] => UTF-8
    [UTF-7] => UTF-7
    [ASCII] => US-ASCII
    [EUC-JP] => EUC-JP
    [SJIS] => Shift_JIS
    [eucJP-win] => EUC-JP
    [SJIS-win] => Shift_JIS
    [JIS] => ISO-2022-JP
    [ISO-2022-JP] => ISO-2022-JP
    [Windows-1252] => Windows-1252
    [ISO-8859-1] => ISO-8859-1
    [ISO-8859-2] => ISO-8859-2
    [ISO-8859-3] => ISO-8859-3
    [ISO-8859-4] => ISO-8859-4
    [ISO-8859-5] => ISO-8859-5
    [ISO-8859-6] => ISO-8859-6
    [ISO-8859-7] => ISO-8859-7
    [ISO-8859-8] => ISO-8859-8
    [ISO-8859-9] => ISO-8859-9
    [ISO-8859-10] => ISO-8859-10
    [ISO-8859-13] => ISO-8859-13
    [ISO-8859-14] => ISO-8859-14
    [ISO-8859-15] => ISO-8859-15
    [ISO-8859-16] => ISO-8859-16
    [EUC-CN] => CN-GB
    [CP936] => CP936
    [HZ] => HZ-GB-2312
    [EUC-TW] => EUC-TW
    [BIG-5] => BIG5
    [EUC-KR] => EUC-KR
    [UHC] => UHC
    [ISO-2022-KR] => ISO-2022-KR
    [Windows-1251] => Windows-1251
    [CP866] => CP866
    [KOI8-R] => KOI8-R
    [ArmSCII-8] => ArmSCII-8
)

こういう風になれば、PHPスクリプト上で色んな技が応用できますよね。
MIMEエンコード名とエイリアス名は連想配列にしてみました。対比が分かりやすいので。
エイリアス名が無い場合はリストには出てこないので。悪しからず。
っていうか、忙しいのに現実逃避してどうすんだ。。。_| ̄|○