mirror of
https://github.com/sbrow/nix.git
synced 2026-02-27 21:31:45 -05:00
feat: Added crawler template.
This commit is contained in:
28
templates/crawler/index.js
Normal file
28
templates/crawler/index.js
Normal file
@@ -0,0 +1,28 @@
|
||||
import { PlaywrightCrawler, Dataset } from 'crawlee';
|
||||
|
||||
async function main() {
|
||||
const startUrls = [
|
||||
'https://ipchicken.com',
|
||||
];
|
||||
|
||||
await crawler.run(startUrls);
|
||||
}
|
||||
|
||||
const crawler = new PlaywrightCrawler({
|
||||
requestHandler: async ({ request, page, enqueueLinks, log }) => {
|
||||
const basic = await page.locator('p[align=center] b').innerText();
|
||||
|
||||
const ip = basic.split('\n')[0]
|
||||
|
||||
log.info(`Your ip is: '${ip}'`);
|
||||
},
|
||||
// maxRequestsPerCrawl: 50,
|
||||
launchContext: {
|
||||
launchOptions: {
|
||||
executablePath: process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH,
|
||||
},
|
||||
},
|
||||
headless: true,
|
||||
});
|
||||
|
||||
await main();
|
||||
Reference in New Issue
Block a user