如何用 C 在 PCRE 中编写适当的模式

发布于 2024-12-12 05:12:04 字数 2383 浏览 0 评论 0原文

现在我有一个字符串,其中包含许多子字符串,例如“href =“http://www.AAA.com””和其他字符, 这是我的问题,在我的 C 代码中我写:

char pattern[] = "/^href.*>$/g";

我想获取长字符串中的所有网址。但这不起作用。有人可以帮助我吗?我们将不胜感激您的帮助。 这是代码:

#define PCRE_STATIC //
#include <stdio.h>  
#include <string.h>  
#include <pcre.h>  
#define OVECCOUNT 30 /* should be a multiple of 3 */  
#define EBUFLEN 128  
#define BUFLEN 1024  

int main()  
{  
    pcre  *re;  
    const char *error;  
    int  erroffset;  
    int  ovector[OVECCOUNT];  
    int  rc, i;  
    char  src[] =  "<a href=\"http://union.elong.com/r/hotel/2000000000855850825\" target=\"_blank\">ss</a></td></tr><tr><td><a href=\"http://123.sogou.com/sub/fanyi.html\" targedd</a></td><td><a href=\"http://123.sogou.com/sub/fantizi.html\" target=\"_blank\">繁 体 字</a></td><td><a href=\"http://123.sogou.com/sub/kuaidi.htm>快递查询</a></td></tr><tr><td><a href=\"http://q.stock.sohu.com/index.shtm>股票行情</a></td><td><a href=\"http://www.chinamobile.com/service/billservice/>话费查询</a></td><td><a href=\"http://auto.sohu.com/s2004/weizhangchaxun.shtml>交通违章</a></td></tr><tr><td>";
    char  pattern[] = "/^href.*>$/g";

    re = pcre_compile(pattern,
                      0,
                      &error,
                      &erroffset,
                      NULL);

    if (re == NULL) {
        printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
        return 1;  
    }  
    rc = pcre_exec(re,
                   NULL,
                   src,
                   strlen(src),
                   0,
                   PCRE_MULTILINE,
                   ovector,
                   OVECCOUNT);

    if (rc < 0) {
        if (rc == PCRE_ERROR_NOMATCH) printf("Sorry, no match ...\n");
        else printf("Matching error %d\n", rc);
        pcre_free(re);  
        return 1;  
    }  
    printf("\nOK, %d has matched ...\n\n",rc);
    for (i = 0; i < rc; i++) {
        char *substring_start = src + ovector[2*i];  
        int substring_length = ovector[2*i+1] - ovector[2*i];  
        printf("$%2d: %.*s\n", i, substring_length, substring_start);
    }  
    pcre_free(re);
    return 0;  
}  

Now I have a string which has many substring like "href="http://www.AAA.com"" and other characters,
Here my question,In my C code I write :

char pattern[] = "/^href.*>$/g";

and I want to pick up all the urls in the long string. But it doesn's work.Can SomeBody help me? Your help will be appreciated.
Here is the Code:

#define PCRE_STATIC //
#include <stdio.h>  
#include <string.h>  
#include <pcre.h>  
#define OVECCOUNT 30 /* should be a multiple of 3 */  
#define EBUFLEN 128  
#define BUFLEN 1024  

int main()  
{  
    pcre  *re;  
    const char *error;  
    int  erroffset;  
    int  ovector[OVECCOUNT];  
    int  rc, i;  
    char  src[] =  "<a href=\"http://union.elong.com/r/hotel/2000000000855850825\" target=\"_blank\">ss</a></td></tr><tr><td><a href=\"http://123.sogou.com/sub/fanyi.html\" targedd</a></td><td><a href=\"http://123.sogou.com/sub/fantizi.html\" target=\"_blank\">繁 体 字</a></td><td><a href=\"http://123.sogou.com/sub/kuaidi.htm>快递查询</a></td></tr><tr><td><a href=\"http://q.stock.sohu.com/index.shtm>股票行情</a></td><td><a href=\"http://www.chinamobile.com/service/billservice/>话费查询</a></td><td><a href=\"http://auto.sohu.com/s2004/weizhangchaxun.shtml>交通违章</a></td></tr><tr><td>";
    char  pattern[] = "/^href.*>$/g";

    re = pcre_compile(pattern,
                      0,
                      &error,
                      &erroffset,
                      NULL);

    if (re == NULL) {
        printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
        return 1;  
    }  
    rc = pcre_exec(re,
                   NULL,
                   src,
                   strlen(src),
                   0,
                   PCRE_MULTILINE,
                   ovector,
                   OVECCOUNT);

    if (rc < 0) {
        if (rc == PCRE_ERROR_NOMATCH) printf("Sorry, no match ...\n");
        else printf("Matching error %d\n", rc);
        pcre_free(re);  
        return 1;  
    }  
    printf("\nOK, %d has matched ...\n\n",rc);
    for (i = 0; i < rc; i++) {
        char *substring_start = src + ovector[2*i];  
        int substring_length = ovector[2*i+1] - ovector[2*i];  
        printf("$%2d: %.*s\n", i, substring_length, substring_start);
    }  
    pcre_free(re);
    return 0;  
}  

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

煞人兵器 2024-12-19 05:12:04

试试这个正则表达式。

myregexp = pcre_compile("href\\s*=\\s*(['\"])(.*?)\\1", 0, &error, &erroroffset, NULL);

示例代码:

pcre *myregexp;
const char *error;
int erroroffset;
int offsetcount;
int offsets[(2+1)*3]; // (max_capturing_groups+1)*3
myregexp = pcre_compile("href\\s*=\\s*(['\"])(.*?)\\1", 0, &error, &erroroffset, NULL);
if (myregexp != NULL) {
    offsetcount = pcre_exec(myregexp, NULL, subject, strlen(subject), 0, 0, offsets, (2+1)*3);
    while (offsetcount > 0) {
        // match offset = offsets[0];
        // match length = offsets[1] - offsets[0];
        if (pcre_get_substring(subject, &offsets, offsetcount, 0, &result) >= 0) {
            // Do something with match we just stored into result
        }
        offsetcount = pcre_exec(myregexp, NULL, subject, strlen(subject), 0, offsets[1], offsets, (2+1)*3);
    } 
} else {
    // Syntax error in the regular expression at erroroffset
}

Try this regex.

myregexp = pcre_compile("href\\s*=\\s*(['\"])(.*?)\\1", 0, &error, &erroroffset, NULL);

Sample code :

pcre *myregexp;
const char *error;
int erroroffset;
int offsetcount;
int offsets[(2+1)*3]; // (max_capturing_groups+1)*3
myregexp = pcre_compile("href\\s*=\\s*(['\"])(.*?)\\1", 0, &error, &erroroffset, NULL);
if (myregexp != NULL) {
    offsetcount = pcre_exec(myregexp, NULL, subject, strlen(subject), 0, 0, offsets, (2+1)*3);
    while (offsetcount > 0) {
        // match offset = offsets[0];
        // match length = offsets[1] - offsets[0];
        if (pcre_get_substring(subject, &offsets, offsetcount, 0, &result) >= 0) {
            // Do something with match we just stored into result
        }
        offsetcount = pcre_exec(myregexp, NULL, subject, strlen(subject), 0, offsets[1], offsets, (2+1)*3);
    } 
} else {
    // Syntax error in the regular expression at erroroffset
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文