鍍金池/ 問答/HTML/ Node爬蟲異步流程難控制,怎么修改成Promise或者Async/Await的

Node爬蟲異步流程難控制,怎么修改成Promise或者Async/Await的形式

如下代碼,我想打印出最終的結(jié)果ans數(shù)組,都很難打印,因?yàn)榇蛴〔僮魇峭降?,老是打印是空的,?duì)異步流程控制的話,Promise或者Async/Await的形式應(yīng)該怎么修改?

const cheerio = require('cheerio');
const http = require('http');
const iconv = require('iconv-lite');

let baseUrl = "http://www.ygdy8.net/html/gndy/dyzz/list_23_";
let Host = "http://www.ygdy8.net/";

const totalPage = 2; //指定爬多少頁(yè)數(shù)據(jù)
let ans = [];
//獲取頁(yè)面電影數(shù)據(jù)
function getTitleHref(url,page) {
  let startUrl = url+page+".html";
  http.get(startUrl,function(res) {
    const { statusCode } = res;
    let chunks = [];
    res.on('data',function(chunk){
      chunks.push(chunk);
    });
    res.on('end',function(){
      let title = [];
      
      let html = iconv.decode(Buffer.concat(chunks),'gb2312');
      let $ = cheerio.load(html, {decodeEntities: false});
      // console.log($);
      $('.co_content8 .ulink').each(function(i,d) {
        let $d = $(d);
        let titleHref = [];
        titleHref.push({
          href: $d.attr('href')
        });
        getLink(titleHref,(ans)=>{console.log(ans)})
      });
      // console.log(ans);
    });  
  });
}


// /*
//獲取種子鏈接
function getLink(titleHref,cb) {
  console.log('進(jìn)入getLink');
  console.log(titleHref);
  if(titleHref) {
    titleHref.forEach(function(v,k) {
      console.log('~~~~~~~~~~~~~~~~~~~~');
      let infoUrl = Host + v.href;
      // console.log(infoUrl);
    
        http.get(infoUrl,function(res) {
          const { statusCode } = res;
          const contentType = res.headers['content-type'];
        
          let error;
          if (statusCode !== 200) {
            error = new Error('請(qǐng)求失敗。\n' +
                             `狀態(tài)碼: ${statusCode}`);
          } 
          if (error) {
            console.error(error.message);
            // 消耗響應(yīng)數(shù)據(jù)以釋放內(nèi)存
            res.resume();
            return;
          }
          console.log('進(jìn)入getlink http');
          let chunks = [];
          res.on('data',function(chunk) {  
            chunks.push(chunk);
          });
          res.on('end', function(){
            try {
              let html = iconv.decode(Buffer.concat(chunks),'gb2312');
              let $ = cheerio.load(html, {decodeEntities: false});
              let bt = '';
              bt = $('#Zoom td').children('a').attr('href');
              // console.log(bt);
              // console.log(typeof bt)
              ans.push(bt);
              cb(ans);
            }catch (e) {
              console.error('bt',e.message);
            }
          })
        }).on('error', (e) => {
          console.error(`錯(cuò)誤: ${e.message}`);
        });
    });
  }
};
// */
for(let i = 1; i <= totalPage; i++) {
  getTitleHref(baseUrl,i);
};



回答
編輯回答
傻叼

對(duì)http.get進(jìn)行封裝 拋出一個(gè)Promise實(shí)例,在end事件中resolve,在error事件中reject。
將forEach改成Promise.all+map即可
await Promise.all就是你想要的結(jié)果

2018年1月29日 09:11