iframe 内访问

发布于 2024-11-03 09:11:29 字数 2350 浏览 1 评论 0原文

我正在使用 linkify 派生代码的一小段从 Firefox 扩展访问网页内的所有文本 inode。看起来像这样，所以没有什么特别有趣的：

var notInTags=[
    'a', 'head', 'noscript', 'option', 'script', 'style', 'title', 'textarea'
    ];

    var xpath = ".//text()[not(ancestor::"+notInTags.join(') and not(ancestor::')+")]";

    var candidates = window.content.document.evaluate(xpath, window.content.document, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);

有没有人对从现有的 iframe 中获取文本节点的最佳方法提出建议？具体来说，使用

iframe.window.content.document

可能会让我到达任何地方（似乎没有），还是我找错了树？

干杯:)

编辑 2：这是完整更新的功能（再次）

    rsfindmod.searchiframes= function(candidates){
//This fixes cases where a redirecting page uses frames (Primarily search engines etc)
    const urlRegex = /\b(https?:\/\/[^\s+\"\<\>]+)/ig;
    var framesets = window.content.document.getElementsByTagName('frame','iframe','frameset');

    for (var i = 0; i < framesets.length; i++) {
    if (urlRegex.test(framesets[i])) {
    alert('test');
    var document2 = framesets[i].contentDocument;
    var notInTags=[
    'a', 'head', 'noscript', 'option', 'script', 'style', 'title', 'textarea'
    ];

    var xpath = ".//text()[not(ancestor::"+notInTags.join(') and not(ancestor::')+")]";

    var textnodes = document2.evaluate(xpath, document2, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
    for(var i = 0; i < textnodes.length; i++) {
        candidates.push(textnodes[i]);
    }
}   
}
}

编辑 3：一个稍微好一点的功能？

rsfindmod.searchiframes= function(candidates, frame, documentList){
//This fixes cases where a redirecting page uses frames (Primarily search engines etc)
    const framesets = frame.frames;

    for (var i = 0; i < framesets.length; i++) {
    var document2 = framesets[i].contentWindow.document;
    var notInTags=[
    'a', 'head', 'noscript', 'option', 'script', 'style', 'title', 'textarea'
    ];
    alert('test');
    var xpath = ".//text()[not(ancestor::"+notInTags.join(') and not(ancestor::')+")]";

    var textnodes = document2.evaluate(xpath, document2, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
    for(var i = 0; i < textnodes.length; i++) {
        candidates.push(textnodes[i]);
        alert('test1'); 
    }

}
}

原文

I'm using a short snippet of linkify derived code to access all the text inodes within a webpage from a Firefox extension. This looks like this, so nothing particularly interesting:

var notInTags=[
    'a', 'head', 'noscript', 'option', 'script', 'style', 'title', 'textarea'
    ];

    var xpath = ".//text()[not(ancestor::"+notInTags.join(') and not(ancestor::')+")]";

    var candidates = window.content.document.evaluate(xpath, window.content.document, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);

Does anyone have a suggestion on the best way to grab the text nodes from any iframes present as well please?
Specifically, is using

iframe.window.content.document

likely to get me anywhere (Doesn't seem to), or am I am I barking up the wrong tree?

Cheers :)

Edit 2: This is the complete updated function (again)

    rsfindmod.searchiframes= function(candidates){
//This fixes cases where a redirecting page uses frames (Primarily search engines etc)
    const urlRegex = /\b(https?:\/\/[^\s+\"\<\>]+)/ig;
    var framesets = window.content.document.getElementsByTagName('frame','iframe','frameset');

    for (var i = 0; i < framesets.length; i++) {
    if (urlRegex.test(framesets[i])) {
    alert('test');
    var document2 = framesets[i].contentDocument;
    var notInTags=[
    'a', 'head', 'noscript', 'option', 'script', 'style', 'title', 'textarea'
    ];

    var xpath = ".//text()[not(ancestor::"+notInTags.join(') and not(ancestor::')+")]";

    var textnodes = document2.evaluate(xpath, document2, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
    for(var i = 0; i < textnodes.length; i++) {
        candidates.push(textnodes[i]);
    }
}   
}
}

Edit 3: A slightly better function?

rsfindmod.searchiframes= function(candidates, frame, documentList){
//This fixes cases where a redirecting page uses frames (Primarily search engines etc)
    const framesets = frame.frames;

    for (var i = 0; i < framesets.length; i++) {
    var document2 = framesets[i].contentWindow.document;
    var notInTags=[
    'a', 'head', 'noscript', 'option', 'script', 'style', 'title', 'textarea'
    ];
    alert('test');
    var xpath = ".//text()[not(ancestor::"+notInTags.join(') and not(ancestor::')+")]";

    var textnodes = document2.evaluate(xpath, document2, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
    for(var i = 0; i < textnodes.length; i++) {
        candidates.push(textnodes[i]);
        alert('test1'); 
    }

}
}

分享到QQ

分享到微博