2012-12-19补充:
-------------------------------------------------------------------------------------------------------------
写了一个测试了一下,很不错!
function parseHost($httpurl)
{
$httpurl = strtolower( trim($httpurl) );
if(empty($httpurl)) return ;
$regx1 = '/https?:\/\/(([^\/\?#]+\.)?([^\/\?#-\.]+\.)(com\.cn|org\.cn|net\.cn|com\.jp|co\.jp|com\.kr|com\.tw)(\:[0-9]+)?)/i';
$regx2 = '/https?:\/\/(([^\/\?#]+\.)?([^\/\?#-\.]+\.)(cn|com|org|net|cc|biz|hk|jp|kr|name|me|tw|la)(\:[0-9]+)?)/i';
$host = $tophost = '';
if(preg_match($regx1,$httpurl,$matches))
{
$host = $matches[1];
} elseif(preg_match($regx2, $httpurl, $matches)) {
$host = $matches[1];
}
if($matches) $tophost = $matches[2] == 'www.' ? $host:$matches[3].$matches[4].$matches[5];
return array($host,$tophost);
}
测试代码:
//test
$arr=array(
'http://www.35dalu.com.cn:8080/ABCDEFGH.PHP?A=1&B=2#0000!',
'http://www.a.b.c.35dalu.com.cn:8080/ABCDEFGH.PHP?A=1&B=2#0000!',
'http://www.a.b.c.35dalu.com.cn:8080/ABCDEFGH.PHP?A=1&B=2#0000!',
'http://www.35dalu.com/#asdfasdfasd',
'www.35dalu.com',
'35dalu',
'http://a.b.c.d.com.cn:3306/asdfasdfadsfasd.asp?3adf=&adsfa#dead!dfadf^gadgad',
);
echo "
";
foreach ($arr as &$value) {
echo $value."\n";
print_r( parseHost($value));
echo "\n\n";
}
echo "
";输出如下:
http://www.35dalu.com.cn:8080/ABCDEFGH.PHP?A=1&B=2#0000!
Array
(
[0] => www.35dalu.com.cn:8080
[1] => www.35dalu.com.cn:8080
)
http://www.a.b.c.35dalu.com.cn:8080/ABCDEFGH.PHP?A=1&B=2#0000!
Array
(
[0] => www.a.b.c.35dalu.com.cn:8080
[1] => 35dalu.com.cn:8080
)
http://www.a.b.c.35dalu.com.cn:8080/ABCDEFGH.PHP?A=1&B=2#0000!
Array
(
[0] => www.a.b.c.35dalu.com.cn:8080
[1] => 35dalu.com.cn:8080
)
http://www.35dalu.com/#asdfasdfasd
Array
(
[0] => www.35dalu.com
[1] => www.35dalu.com
)
www.35dalu.com
Array
(
[0] =>
[1] =>
)
35dalu
Array
(
[0] =>
[1] =>
)
http://a.b.c.d.com.cn:3306/asdfasdfadsfasd.asp?3adf=&adsfa#dead!dfadf^gadgad
Array
(
[0] => a.b.c.d.com.cn:3306
[1] => d.com.cn:3306
)