滚动在使用Puppeteer刮擦Google地图时不起作用
我正在抓取Google Maps的数据,但是发生的事情是,它仅返回了我的前10个结果,而不是在此之后返回。我认为滚动功能存在一些问题。
const puppeteer = require('puppeteer');
function extractItems() {
const extractedElements = document.querySelectorAll('.MyEned span.wiI7pd');
const items = [];
for (let element of extractedElements) {
items.push(element.innerText);
}
return items;
}
async function scrapeItems(
page,
extractItems,
itemCount,
scrollDelay = 2000,
) {
let items = [];
try {
let previousHeight;
while (items.length < itemCount) {
items = await page.evaluate(extractItems);
previousHeight = await page.evaluate('div.m6QErb.DxyBCb.scrollHeight');//selector for scroller
await page.evaluate('window.scrollTo(0, div.m6QErb.DxyBCb.scrollHeight)');
await page.waitForFunction(`div.m6QErb.DxyBCb.scrollHeight > ${previousHeight}`);
await page.waitForTimeout(scrollDelay);
}
} catch(e) { }
return items;
}
(async () => {
let browser = await puppeteer.connect();
browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const [page] = await browser.pages();
page.setViewport({ width: 1280, height: 926 });
await page.goto('https://www.google.com/maps/place/Ace+Florist+%26+Flower+Delivery/@40.8265438,-73.5011026,15z/data=!4m7!3m6!1s0x0:0x9062074cae10c10f!8m2!3d40.8265438!4d-73.5011026!9m1!1b1');
// Auto-scroll and extract desired items from the page. Currently set to extract eight items.
const items = await scrapeItems(page, extractItems, 30);
console.log(items)
await browser.close();
})();
I am scraping google maps places data, but what is happening is that it only returns me the first 10 results of the user reviews, not after that. I think there is some problem with scroll functionality.
const puppeteer = require('puppeteer');
function extractItems() {
const extractedElements = document.querySelectorAll('.MyEned span.wiI7pd');
const items = [];
for (let element of extractedElements) {
items.push(element.innerText);
}
return items;
}
async function scrapeItems(
page,
extractItems,
itemCount,
scrollDelay = 2000,
) {
let items = [];
try {
let previousHeight;
while (items.length < itemCount) {
items = await page.evaluate(extractItems);
previousHeight = await page.evaluate('div.m6QErb.DxyBCb.scrollHeight');//selector for scroller
await page.evaluate('window.scrollTo(0, div.m6QErb.DxyBCb.scrollHeight)');
await page.waitForFunction(`div.m6QErb.DxyBCb.scrollHeight > ${previousHeight}`);
await page.waitForTimeout(scrollDelay);
}
} catch(e) { }
return items;
}
(async () => {
let browser = await puppeteer.connect();
browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const [page] = await browser.pages();
page.setViewport({ width: 1280, height: 926 });
await page.goto('https://www.google.com/maps/place/Ace+Florist+%26+Flower+Delivery/@40.8265438,-73.5011026,15z/data=!4m7!3m6!1s0x0:0x9062074cae10c10f!8m2!3d40.8265438!4d-73.5011026!9m1!1b1');
// Auto-scroll and extract desired items from the page. Currently set to extract eight items.
const items = await scrapeItems(page, extractItems, 30);
console.log(items)
await browser.close();
})();
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
data:image/s3,"s3://crabby-images/d5906/d59060df4059a6cc364216c4d63ceec29ef7fe66" alt="扫码二维码加入Web技术交流群"
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(3)
此代码正常运行:
This code is working ok:
因此,我只是发现我在评估时必须添加
document.queryselector
滚动高度以及检查滚动高度大于以前的高度。
So I just found out that I have to add
document.querySelector
while evaluatingscroll height and also when checking the scroll height is greater than previous Height.
您可以使用以下解决方案之一在Google Maps上使用
Puppeteer
滚动评论页面。解决方案#1:
where
scrollContainer
是带有滚动的元素(在评论页面上,它是.dxybcb
)和page> page
是Puppeteer的页面。解决方案2:
在这种情况下,
卷轴
必须是滚动容器中的项目选择器(.jftief
)。您可以从我的博客文章“ nofollow noreferrer”> Web用nodejs刮擦Google Maps评论。
You can scroll the reviews page on Google Maps with
Puppeteer
using one of the solutions below.Solution #1:
Where
scrollContainer
is an element with scroll (on the reviews page it is.DxyBCb
) andpage
is the Puppeteer's page.Solution #2:
In this case,
scrollElements
must be the items selector in the scroll container (.jftiEf
).You can read more about scraping Google Maps Reviews from my blog post Web Scraping Google Maps Reviews with Nodejs.