Stuck for no reason?
i have a list that has 1.8k urls, but when
await crawler.queue(urls)
it seems stuck randomly without timeout?
const fs = require('fs')
const _ = require('lodash')
const writeJsonFile = require('write-json-file')
const HCCrawler = require('headless-chrome-crawler')
const RedisCache = require('headless-chrome-crawler/cache/redis')
const cache = new RedisCache({ host: '127.0.0.1', port: 6379 })
let urls = getUrls()
let count = urls.length
async function p1() {
const crawler = await HCCrawler.launch({
cache,
persistCache: true,
evaluatePage: (() => ({
title: $('#litZQMC').text(),
html: $('#divScroll').html()
})),
onSuccess: async resp => {
const { result: { title, html } } = resp
if (fs.existsSync(`files/${title}.txt`)) {
console.log('skip', count--, title)
} else {
await writeJsonFile(`files/${title}.txt`, html)
console.log('done', count--, title)
}
},
onError: err => {
console.log(err)
}
})
await crawler.queue(urls)
await crawler.onIdle()
await crawler.close()
}
async function queue() {
await p1()
}
queue()
- Version: 1.8.0
- Platform / OS version: osx
- Node.js version: v8.11.3