卷曲 file_get_contents/get_meta_tags 编码

发布于 2024-10-20 17:56:59 字数 1824 浏览 3 评论 0原文

所以我使用 CURL 来替换 PHP 中的 file_get_contents 和 get_meta_tags 功能:

<?php

class CURL{


    public static function file_get_contents($url){

        $ch = curl_init();

        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);

        $data = curl_exec($ch);
        curl_close($ch);

        iconv("Windows-1252","UTF-8",$text);

        return $data;


    }


    public static function get_meta_tags($url){

        $html = self::file_get_contents($url);
        self::get_meta_tags_html($html);



    }

    public static function get_meta_tags_html($html){

        //parsing begins here:
        $doc = new DOMDocument();
        @$doc->loadHTML($html);
        //$nodes = $doc->getElementsByTagName('title');

        //get and display what you need:
        //$title = $nodes->item(0)->nodeValue;

        $metas = $doc->getElementsByTagName('meta');

        $return = array();

        for ($i = 0; $i < $metas->length; $i++)
        {
            $meta = $metas->item($i);
            if($meta->getAttribute('name') == 'title')
               $return["title"] = $meta->getAttribute('content');
            if($meta->getAttribute('name') == 'description')
                $return['description'] = $meta->getAttribute('content');
            if($meta->getAttribute('name') == 'keywords')
                $return['keywords'] = $meta->getAttribute('content');
        }

        return $return;

    }


}


?>

但是当我调用 CURL::get_meta_tags 时,在包含外文字母(例如日语)的网站上,它将返回奇怪的字符而不是日语字母,而如果我使用内置的 php get_meta_tags,它将返回正确的字符...

我应该如何修改此代码,以便 CURL::get_meta_tags 也正确返回外来字符,就像内置的 php get_meta_tags 一样

so I'm using CURL to replace the file_get_contents and get_meta_tags functionality in PHP:

<?php

class CURL{


    public static function file_get_contents($url){

        $ch = curl_init();

        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);

        $data = curl_exec($ch);
        curl_close($ch);

        iconv("Windows-1252","UTF-8",$text);

        return $data;


    }


    public static function get_meta_tags($url){

        $html = self::file_get_contents($url);
        self::get_meta_tags_html($html);



    }

    public static function get_meta_tags_html($html){

        //parsing begins here:
        $doc = new DOMDocument();
        @$doc->loadHTML($html);
        //$nodes = $doc->getElementsByTagName('title');

        //get and display what you need:
        //$title = $nodes->item(0)->nodeValue;

        $metas = $doc->getElementsByTagName('meta');

        $return = array();

        for ($i = 0; $i < $metas->length; $i++)
        {
            $meta = $metas->item($i);
            if($meta->getAttribute('name') == 'title')
               $return["title"] = $meta->getAttribute('content');
            if($meta->getAttribute('name') == 'description')
                $return['description'] = $meta->getAttribute('content');
            if($meta->getAttribute('name') == 'keywords')
                $return['keywords'] = $meta->getAttribute('content');
        }

        return $return;

    }


}


?>

but then when I call CURL::get_meta_tags, on a site that has foreign letters in it such as Japanese, it will return weird characters instead of the Japanese letters whereas if I use the built in php get_meta_tags, it will return the correct character...

how should I modify this code such that CURL::get_meta_tags also return foreign characters properly just like the built in php get_meta_tags

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

风吹过旳痕迹 2024-10-27 17:56:59

您更有可能只是尝试使用错误的编码显示文本。

如果您使用标头函数设置字符集,它看起来应该是正确的。

header('Content-Type: text/html; charset=utf-8');

您可以检查收到的元标记中的字符集(如果已设置)并使用它。

It is more likely that you are just trying to display the text with the wrong encoding.

If you set the character set using the header function it should look correct.

header('Content-Type: text/html; charset=utf-8');

You could check what the character-set is in the meta tag you receive if it was set, and use that.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文