如何解析 Google Alerts 中的数据?

发布于 2024-07-21 14:12:09 字数 134 浏览 7 评论 0原文

首先,除了解析 Google 发送给您的电子邮件文本之外,您如何将 Google Alerts 信息存入数据库?

似乎没有 Google Alerts API。

如果您必须解析文本,您将如何解析电子邮件的相关部分?

Firstly, How would you get Google Alerts information into a database other than to parse the text of the email message that Google sends you?

It seems that there is no Google Alerts API.

If you must parse text, how would you go about parsing out the relevant pieces of the email message?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

欢烬 2024-07-28 14:12:09

创建警报时,将“传送至”设置为“Feed”,然后您可以像使用任何其他 Feed 一样使用该 Feed XML。 这更容易解析并消化到数据库中。

When you create the alert, set the "Deliver To" to "Feed" and then you can consume the feed XML as you would any other feed. This is much easier to parse and digest into a database.

峩卟喜欢 2024-07-28 14:12:09
class googleAlerts{
    public function createAlert($alert){
        $USERNAME = '[email protected]';
        $PASSWORD = 'YYYYYY';
        $COOKIEFILE = 'cookies.txt';

        $ch = curl_init();
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
        curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($ch, CURLOPT_COOKIEJAR, $COOKIEFILE);
        curl_setopt($ch, CURLOPT_COOKIEFILE, $COOKIEFILE);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
        curl_setopt($ch, CURLOPT_TIMEOUT, 120);

        curl_setopt($ch, CURLOPT_URL,
            'https://accounts.google.com/ServiceLogin?hl=en&service=alerts&continue=http://www.google.com/alerts/manage');
        $data = curl_exec($ch);

        $formFields = $this->getFormFields($data);

        $formFields['Email']  = $USERNAME;
        $formFields['Passwd'] = $PASSWORD;
        unset($formFields['PersistentCookie']);

        $post_string = '';
        foreach($formFields as $key => $value) {
            $post_string .= $key . '=' . urlencode($value) . '&';
        }

        $post_string = substr($post_string, 0, -1);

        curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
        curl_setopt($ch, CURLOPT_POST, 1);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);

        $result = curl_exec($ch);

        if (strpos($result, '<title>') === false) {
            return false;

        } else {
            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts');
            curl_setopt($ch, CURLOPT_POST, 0);
            curl_setopt($ch, CURLOPT_POSTFIELDS, null);

            $result = curl_exec($ch);

            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/create');
            curl_setopt($ch, CURLOPT_POST, 0);
            $result = curl_exec($ch);
            //var_dump($result);
            $result = $this->getFormFieldsCreate($result);
            $result['q'] = $alert;
            $result['t'] = '7';
            $result['f'] = '1';
            $result['l'] = '0';
            $result['e'] = 'feed';
            unset($result['PersistentCookie']);

            $post_string = '';
            foreach($result as $key => $value) {
                $post_string .= $key . '=' . urlencode($value) . '&';
            }

            $post_string = substr($post_string, 0, -1);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
            $result = curl_exec($ch);
            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/manage');
            $result = curl_exec($ch);
            if (preg_match_all('%'.$alert.'(?=</a>).*?<a href=[\'"]http://www.google.com/alerts/feeds/([^\'"]+)%i', $result, $matches)) {
                return ('http://www.google.com/alerts/feeds/'.$matches[1][0]);
            } else {
                return false;
            }


        }
    }

    private function getFormFields($data)
    {
        if (preg_match('/(<form.*?id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            die('didnt find login form');
        }
    }
    private function getFormFieldsCreate($data)
    {
        if (preg_match('/(<form.*?name=.?.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            die('didnt find login form1');
        }
    }


    private function getInputs($form)
    {
        $inputs = array();

        $elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);

        if ($elements > 0) {
            for($i = 0; $i < $elements; $i++) {
                $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);

                if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
                    $name  = $name[1];
                    $value = '';

                    if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
                        $value = $value[1];
                    }

                    $inputs[$name] = $value;
                }
            }
        }

        return $inputs;
    }
}
$alert = new googleAlerts;
echo $alert->createAlert('YOUR ALERT');

它将返回您新创建的警报的 rss feed 链接

class googleAlerts{
    public function createAlert($alert){
        $USERNAME = '[email protected]';
        $PASSWORD = 'YYYYYY';
        $COOKIEFILE = 'cookies.txt';

        $ch = curl_init();
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
        curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($ch, CURLOPT_COOKIEJAR, $COOKIEFILE);
        curl_setopt($ch, CURLOPT_COOKIEFILE, $COOKIEFILE);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
        curl_setopt($ch, CURLOPT_TIMEOUT, 120);

        curl_setopt($ch, CURLOPT_URL,
            'https://accounts.google.com/ServiceLogin?hl=en&service=alerts&continue=http://www.google.com/alerts/manage');
        $data = curl_exec($ch);

        $formFields = $this->getFormFields($data);

        $formFields['Email']  = $USERNAME;
        $formFields['Passwd'] = $PASSWORD;
        unset($formFields['PersistentCookie']);

        $post_string = '';
        foreach($formFields as $key => $value) {
            $post_string .= $key . '=' . urlencode($value) . '&';
        }

        $post_string = substr($post_string, 0, -1);

        curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
        curl_setopt($ch, CURLOPT_POST, 1);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);

        $result = curl_exec($ch);

        if (strpos($result, '<title>') === false) {
            return false;

        } else {
            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts');
            curl_setopt($ch, CURLOPT_POST, 0);
            curl_setopt($ch, CURLOPT_POSTFIELDS, null);

            $result = curl_exec($ch);

            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/create');
            curl_setopt($ch, CURLOPT_POST, 0);
            $result = curl_exec($ch);
            //var_dump($result);
            $result = $this->getFormFieldsCreate($result);
            $result['q'] = $alert;
            $result['t'] = '7';
            $result['f'] = '1';
            $result['l'] = '0';
            $result['e'] = 'feed';
            unset($result['PersistentCookie']);

            $post_string = '';
            foreach($result as $key => $value) {
                $post_string .= $key . '=' . urlencode($value) . '&';
            }

            $post_string = substr($post_string, 0, -1);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
            $result = curl_exec($ch);
            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/manage');
            $result = curl_exec($ch);
            if (preg_match_all('%'.$alert.'(?=</a>).*?<a href=[\'"]http://www.google.com/alerts/feeds/([^\'"]+)%i', $result, $matches)) {
                return ('http://www.google.com/alerts/feeds/'.$matches[1][0]);
            } else {
                return false;
            }


        }
    }

    private function getFormFields($data)
    {
        if (preg_match('/(<form.*?id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            die('didnt find login form');
        }
    }
    private function getFormFieldsCreate($data)
    {
        if (preg_match('/(<form.*?name=.?.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            die('didnt find login form1');
        }
    }


    private function getInputs($form)
    {
        $inputs = array();

        $elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);

        if ($elements > 0) {
            for($i = 0; $i < $elements; $i++) {
                $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);

                if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
                    $name  = $name[1];
                    $value = '';

                    if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
                        $value = $value[1];
                    }

                    $inputs[$name] = $value;
                }
            }
        }

        return $inputs;
    }
}
$alert = new googleAlerts;
echo $alert->createAlert('YOUR ALERT');

It will return link to rss feed of your newly created alert

り繁华旳梦境 2024-07-28 14:12:09

我在此处找到了 Google Alerts API。 它非常小,我还没有测试过。

I found a Google Alerts API here. It's pretty minimal and I haven't tested it.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文