在node.js中使用wappalyzer和puppeteer

发布于 2025-02-13 07:25:43 字数 1223 浏览 1 评论 0原文

我正在尝试构建一个刮板来自动监视Web项目。

到目前为止，脚本还在运行，但是现在我想添加一个功能，该功能可以自动分析我在项目中使用的库。这项工作最强大的脚本是Wappalyser。他们有一个节点软件包（ https://www.npmjs.com/package/package/wappalyzer ）并写有您可以将其与Pupperteer结合使用。

我设法运行了PupperTeer并记录了控制台中网站的源代码，但是我没有正确的方法将源代码传递给Wappalyzer Analyszze函数。

你们有提示我吗？

我尝试了此代码，但是正在获得typeError：url.split不是函数

function getLibarys(url) {

  (async () => {
    const browser = await puppeteer.launch({ headless: true });
    const page = await browser.newPage();
    await page.goto(url);

// get source code with puppeteer
const html = await page.content();

const wappalyzer = new Wappalyzer();

(async function () {
  try {
    await wappalyzer.init()

    // Optionally set additional request headers
    const headers = {}

    const site = await wappalyzer.open(page, headers)

    // Optionally capture and output errors
    site.on('error', console.error)

    const results = await site.analyze()

    console.log(JSON.stringify(results, null, 2))
  } catch (error) {
    console.error(error)
  }

  await wappalyzer.destroy()
})()
await browser.close()
  })()
}

原文

I am trying to build a scraper to monitor web projects automatically.

So far so good, the script is running, but now I want to add a feature that automatically analyses what libraries I used in the projects. The most powerful script for this job is wappalyser. They have a node package (https://www.npmjs.com/package/wappalyzer) and it's written that you can use it combined with pupperteer.

I managed to run pupperteer and to log the source code of the sites in the console, but I don't get the right way to pass the source code to the wappalyzer analyse function.

Do you guys have a hint for me?

I tryed this code but a am getting a TypeError: url.split is not a function

function getLibarys(url) {

  (async () => {
    const browser = await puppeteer.launch({ headless: true });
    const page = await browser.newPage();
    await page.goto(url);

// get source code with puppeteer
const html = await page.content();

const wappalyzer = new Wappalyzer();

(async function () {
  try {
    await wappalyzer.init()

    // Optionally set additional request headers
    const headers = {}

    const site = await wappalyzer.open(page, headers)

    // Optionally capture and output errors
    site.on('error', console.error)

    const results = await site.analyze()

    console.log(JSON.stringify(results, null, 2))
  } catch (error) {
    console.error(error)
  }

  await wappalyzer.destroy()
})()
await browser.close()
  })()
}

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

狼亦尘 2025-02-20 07:25:43

通过使用Wappalyzer的示例代码修复了它。

function getLibarys(url) {


const Wappalyzer = require('wappalyzer');

 
  const options = {
    debug: false,
    delay: 500,
    headers: {},
    maxDepth: 3,
    maxUrls: 10,
    maxWait: 5000,
    recursive: true,
    probe: true,
    proxy: false,
    userAgent: 'Wappalyzer',
    htmlMaxCols: 2000,
    htmlMaxRows: 2000,
    noScripts: false,
    noRedirect: false,
  };
  
  const wappalyzer = new Wappalyzer(options)
  
  ;(async function() {
    try {
      await wappalyzer.init()


  // Optionally set additional request headers
  const headers = {}

  const site = await wappalyzer.open(url, headers)

  // Optionally capture and output errors
  site.on('error', console.error)

  const results = await site.analyze()

  console.log(JSON.stringify(results, null, 2))
} catch (error) {
  console.error(error)
}

await wappalyzer.destroy()
  })()
}

Fixed it by using the sample code from wappalyzer.

function getLibarys(url) {


const Wappalyzer = require('wappalyzer');

 
  const options = {
    debug: false,
    delay: 500,
    headers: {},
    maxDepth: 3,
    maxUrls: 10,
    maxWait: 5000,
    recursive: true,
    probe: true,
    proxy: false,
    userAgent: 'Wappalyzer',
    htmlMaxCols: 2000,
    htmlMaxRows: 2000,
    noScripts: false,
    noRedirect: false,
  };
  
  const wappalyzer = new Wappalyzer(options)
  
  ;(async function() {
    try {
      await wappalyzer.init()


  // Optionally set additional request headers
  const headers = {}

  const site = await wappalyzer.open(url, headers)

  // Optionally capture and output errors
  site.on('error', console.error)

  const results = await site.analyze()

  console.log(JSON.stringify(results, null, 2))
} catch (error) {
  console.error(error)
}

await wappalyzer.destroy()
  })()
}

回复收藏 0 原文

深居我梦 2025-02-20 07:25:43

我不知道您是否仍然需要答案。但这是Wappalyzer合作者告诉我的：

通常，您会这样运行Wappalyzer：

const Wappalyzer = require('wappalyzer')

const wappalyzer = new Wappalyzer()

await wappalyzer.init() // Launches a Puppeteer instance

const site = await wappalyzer.open(url)

如果您想使用自己的浏览器实例，则可以跳过Wappalyzer.init（）并将实例分配给Wappalyzer.browser：

const Wappalyzer = require('wappalyzer')

const wappalyzer = new Wappalyzer()

wappalyzer.browser = await puppeteer.launch() // Use your own Puppeteer launch logic

const site = await wappalyzer.open(url)

您可以找到讨论在这里。
希望这会有所帮助。

I do not know if you still need an answer to this. But this is what a wappalyzer collaborator told me:

Normally you'd run Wappalyzer like this:

const Wappalyzer = require('wappalyzer')

const wappalyzer = new Wappalyzer()

await wappalyzer.init() // Launches a Puppeteer instance

const site = await wappalyzer.open(url)

If you want to use your own browser instance, you can skip wappalyzer.init() and assign the instance to wappalyzer.browser:

const Wappalyzer = require('wappalyzer')

const wappalyzer = new Wappalyzer()

wappalyzer.browser = await puppeteer.launch() // Use your own Puppeteer launch logic

const site = await wappalyzer.open(url)

You can find the discussion here.
Hope this helps.

回复收藏 0 原文

~没有更多了~