PHP中GBK和UTF8编码处理
时间:2008-09-11 来源:bj2008_0201
PHP中GBK和UTF8编码处理
一、编码范围
1. GBK (GB2312/GB18030)
\x00-\xff GBK双字节编码范围
\x20-\x7f ASCII
\xa1-\xff 中文
\x80-\xff 中文
2. UTF-8 (Unicode)
\u4e00-\u9fa5 (中文)
\x3130-\x318F (韩文
\xAC00-\xD7A3 (韩文)
\u0800-\u4e00 (日文)
ps: 韩文是大于[\u9fa5]的字符
正则例子:
preg_replace("/([\x80-\xff])/","",$str);
preg_replace("/([u4e00-u9fa5])/","",$str);
二、代码例子
//判断内容里有没有中文-GBK (PHP)
function[color="#000000"] check_is_chinese($s){
[color="#000000"] return[color="#000000"] preg_match('/[\x80-\xff]./',[color="#000000"] $s);
}
//获取字符串长度-GBK (PHP)
function[color="#000000"] gb_strlen($str){
[color="#000000"] $count[color="#000000"] =[color="#000000"] 0;
[color="#000000"] for($i=[color="#000000"]0;[color="#000000"] $istrlen($str);[color="#000000"] $i++){
[color="#000000"] $s[color="#000000"] =[color="#000000"] substr($str,[color="#000000"] $i,[color="#000000"] 1);
[color="#000000"] if[color="#000000"] (preg_match("/[\x80-\xff]/",[color="#000000"] $s))[color="#000000"] ++$i;
[color="#000000"] ++$count;
[color="#000000"] }
[color="#000000"] return[color="#000000"] $count;
}
//截取字符串字串-GBK (PHP)
function[color="#000000"] gb_substr($str,[color="#000000"] $len){
[color="#000000"] $count[color="#000000"] =[color="#000000"] 0;
[color="#000000"] for($i=[color="#000000"]0;[color="#000000"] $istrlen($str);[color="#000000"] $i++){
[color="#000000"] if($count[color="#000000"] ==[color="#000000"] $len)[color="#000000"] break;
[color="#000000"] if(preg_match("/[\x80-\xff]/",[color="#000000"] substr($str,[color="#000000"] $i,[color="#000000"] 1)))[color="#000000"] ++$i;
[color="#000000"] ++$count;
}
[color="#000000"] return[color="#000000"] substr($str,[color="#000000"] 0,[color="#000000"] $i);
}
//统计字符串长度-UTF8 (PHP)
function[color="#000000"] utf8_strlen($str)[color="#000000"] {
[color="#000000"] $count[color="#000000"] =[color="#000000"] 0;
[color="#000000"] for($i[color="#000000"] =[color="#000000"] 0;[color="#000000"] $i[color="#000000"] [color="#000000"] strlen($str);[color="#000000"] $i++){
[color="#000000"] $value[color="#000000"] =[color="#000000"] ord($str[$i]);
[color="#000000"] if($value[color="#000000"] >[color="#000000"] 127)[color="#000000"] {
[color="#000000"] $count++;
[color="#000000"] if($value[color="#000000"] >=[color="#000000"] 192 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 223)[color="#000000"] $i++;
[color="#000000"] elseif($value[color="#000000"] >=[color="#000000"] 224 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 239)[color="#000000"] $i[color="#000000"] =[color="#000000"] $i[color="#000000"] +[color="#000000"] 2;
[color="#000000"] elseif($value[color="#000000"] >=[color="#000000"] 240 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 247)[color="#000000"] $i[color="#000000"] =[color="#000000"] $i[color="#000000"] +[color="#000000"] 3;
[color="#000000"] else[color="#000000"] die('Not a UTF-8 compatible string');
[color="#000000"] }
[color="#000000"] $count++;
[color="#000000"] }
[color="#000000"] return[color="#000000"] $count;
}
//截取字符串-UTF8(PHP)
function[color="#000000"] utf8_substr($str,$position,$length){
[color="#000000"] $start_position[color="#000000"] =[color="#000000"] strlen($str);
[color="#000000"] $start_byte[color="#000000"] =[color="#000000"] 0;
[color="#000000"] $end_position[color="#000000"] =[color="#000000"] strlen($str);
[color="#000000"] $count[color="#000000"] =[color="#000000"] 0;
[color="#000000"] for($i[color="#000000"] =[color="#000000"] 0;[color="#000000"] $i[color="#000000"] [color="#000000"] strlen($str);[color="#000000"] $i++){
[color="#000000"] if($count[color="#000000"] >=[color="#000000"] $position[color="#000000"] &&[color="#000000"] $start_position[color="#000000"] >[color="#000000"] $i){
[color="#000000"] $start_position[color="#000000"] =[color="#000000"] $i;
[color="#000000"] $start_byte[color="#000000"] =[color="#000000"] $count;
[color="#000000"] }
[color="#000000"] if(($count-$start_byte)>=$length)[color="#000000"] {
[color="#000000"] $end_position[color="#000000"] =[color="#000000"] $i;
[color="#000000"] break;
[color="#000000"] }
$value[color="#000000"] =[color="#000000"] ord($str[$i]);
[color="#000000"] if($value[color="#000000"] >[color="#000000"] 127){
[color="#000000"] $count++;
[color="#000000"] if($value[color="#000000"] >=[color="#000000"] 192 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 223)[color="#000000"] $i++;
[color="#000000"] elseif($value[color="#000000"] >=[color="#000000"] 224 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 239)[color="#000000"] $i[color="#000000"] =[color="#000000"] $i[color="#000000"] +[color="#000000"] 2;
[color="#000000"] elseif($value[color="#000000"] >=[color="#000000"] 240 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 247)[color="#000000"] $i[color="#000000"] =[color="#000000"] $i[color="#000000"] +[color="#000000"] 3;
[color="#000000"] else[color="#000000"] die('Not a UTF-8 compatible string');
[color="#000000"] }
[color="#000000"] $count++;
[color="#000000"] }
[color="#000000"] return(substr($str,$start_position,$end_position-$start_position));
}
一、编码范围
1. GBK (GB2312/GB18030)
\x00-\xff GBK双字节编码范围
\x20-\x7f ASCII
\xa1-\xff 中文
\x80-\xff 中文
2. UTF-8 (Unicode)
\u4e00-\u9fa5 (中文)
\x3130-\x318F (韩文
\xAC00-\xD7A3 (韩文)
\u0800-\u4e00 (日文)
ps: 韩文是大于[\u9fa5]的字符
正则例子:
preg_replace("/([\x80-\xff])/","",$str);
preg_replace("/([u4e00-u9fa5])/","",$str);
二、代码例子
//判断内容里有没有中文-GBK (PHP)
function[color="#000000"] check_is_chinese($s){
[color="#000000"] return[color="#000000"] preg_match('/[\x80-\xff]./',[color="#000000"] $s);
}
//获取字符串长度-GBK (PHP)
function[color="#000000"] gb_strlen($str){
[color="#000000"] $count[color="#000000"] =[color="#000000"] 0;
[color="#000000"] for($i=[color="#000000"]0;[color="#000000"] $istrlen($str);[color="#000000"] $i++){
[color="#000000"] $s[color="#000000"] =[color="#000000"] substr($str,[color="#000000"] $i,[color="#000000"] 1);
[color="#000000"] if[color="#000000"] (preg_match("/[\x80-\xff]/",[color="#000000"] $s))[color="#000000"] ++$i;
[color="#000000"] ++$count;
[color="#000000"] }
[color="#000000"] return[color="#000000"] $count;
}
//截取字符串字串-GBK (PHP)
function[color="#000000"] gb_substr($str,[color="#000000"] $len){
[color="#000000"] $count[color="#000000"] =[color="#000000"] 0;
[color="#000000"] for($i=[color="#000000"]0;[color="#000000"] $istrlen($str);[color="#000000"] $i++){
[color="#000000"] if($count[color="#000000"] ==[color="#000000"] $len)[color="#000000"] break;
[color="#000000"] if(preg_match("/[\x80-\xff]/",[color="#000000"] substr($str,[color="#000000"] $i,[color="#000000"] 1)))[color="#000000"] ++$i;
[color="#000000"] ++$count;
}
[color="#000000"] return[color="#000000"] substr($str,[color="#000000"] 0,[color="#000000"] $i);
}
//统计字符串长度-UTF8 (PHP)
function[color="#000000"] utf8_strlen($str)[color="#000000"] {
[color="#000000"] $count[color="#000000"] =[color="#000000"] 0;
[color="#000000"] for($i[color="#000000"] =[color="#000000"] 0;[color="#000000"] $i[color="#000000"] [color="#000000"] strlen($str);[color="#000000"] $i++){
[color="#000000"] $value[color="#000000"] =[color="#000000"] ord($str[$i]);
[color="#000000"] if($value[color="#000000"] >[color="#000000"] 127)[color="#000000"] {
[color="#000000"] $count++;
[color="#000000"] if($value[color="#000000"] >=[color="#000000"] 192 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 223)[color="#000000"] $i++;
[color="#000000"] elseif($value[color="#000000"] >=[color="#000000"] 224 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 239)[color="#000000"] $i[color="#000000"] =[color="#000000"] $i[color="#000000"] +[color="#000000"] 2;
[color="#000000"] elseif($value[color="#000000"] >=[color="#000000"] 240 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 247)[color="#000000"] $i[color="#000000"] =[color="#000000"] $i[color="#000000"] +[color="#000000"] 3;
[color="#000000"] else[color="#000000"] die('Not a UTF-8 compatible string');
[color="#000000"] }
[color="#000000"] $count++;
[color="#000000"] }
[color="#000000"] return[color="#000000"] $count;
}
//截取字符串-UTF8(PHP)
function[color="#000000"] utf8_substr($str,$position,$length){
[color="#000000"] $start_position[color="#000000"] =[color="#000000"] strlen($str);
[color="#000000"] $start_byte[color="#000000"] =[color="#000000"] 0;
[color="#000000"] $end_position[color="#000000"] =[color="#000000"] strlen($str);
[color="#000000"] $count[color="#000000"] =[color="#000000"] 0;
[color="#000000"] for($i[color="#000000"] =[color="#000000"] 0;[color="#000000"] $i[color="#000000"] [color="#000000"] strlen($str);[color="#000000"] $i++){
[color="#000000"] if($count[color="#000000"] >=[color="#000000"] $position[color="#000000"] &&[color="#000000"] $start_position[color="#000000"] >[color="#000000"] $i){
[color="#000000"] $start_position[color="#000000"] =[color="#000000"] $i;
[color="#000000"] $start_byte[color="#000000"] =[color="#000000"] $count;
[color="#000000"] }
[color="#000000"] if(($count-$start_byte)>=$length)[color="#000000"] {
[color="#000000"] $end_position[color="#000000"] =[color="#000000"] $i;
[color="#000000"] break;
[color="#000000"] }
$value[color="#000000"] =[color="#000000"] ord($str[$i]);
[color="#000000"] if($value[color="#000000"] >[color="#000000"] 127){
[color="#000000"] $count++;
[color="#000000"] if($value[color="#000000"] >=[color="#000000"] 192 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 223)[color="#000000"] $i++;
[color="#000000"] elseif($value[color="#000000"] >=[color="#000000"] 224 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 239)[color="#000000"] $i[color="#000000"] =[color="#000000"] $i[color="#000000"] +[color="#000000"] 2;
[color="#000000"] elseif($value[color="#000000"] >=[color="#000000"] 240 &&[color="#000000"] $value[color="#000000"] =[color="#000000"] 247)[color="#000000"] $i[color="#000000"] =[color="#000000"] $i[color="#000000"] +[color="#000000"] 3;
[color="#000000"] else[color="#000000"] die('Not a UTF-8 compatible string');
[color="#000000"] }
[color="#000000"] $count++;
[color="#000000"] }
[color="#000000"] return(substr($str,$start_position,$end_position-$start_position));
}
相关阅读 更多 +