Node.js 爬电影天堂种子

封装爬虫模块index.js

const puppeteer = require("puppeteer");
function timeout() {
  return new Promise((resolve, reject) => {
    setTimeout(resolve, 2000);
  });
}
module.exports = async (url,callback) => {
    //打开浏览器chrome 路由
    const browser = await puppeteer.launch({
      executablePath: "/Applications/Google\ Chrome.app/"+
        "Contents/MacOS/Google\ Chrome",
        headless: "new" // 添加这一行
    });
    // 创建自页面
    const page = await browser.newPage();
    // 打开一个网页
    await page.goto(url, {
        waitUtil: "networkidle2"
    });
    await timeout();
    // 在页面中执行js
    let results = await page.evaluate(callback);
    // 关闭Chrome
    await browser.close();
    // 返货js执行结构
    return results;
}

爬去数据

const puppeteer = require("./index")
const { SingleBar, Presets } = require('cli-progress');
const fs = require('fs');
function timeout() {
   return new Promise((resolve, reject) => {
      setTimeout(resolve, 3000);
   });
}
const url ='https://www.ygdy8.net/html/gndy/china/index.html'
// 获取首页内容
const result = puppeteer(url, () => {
   let ul = document.querySelectorAll("a[class='ulink']");
   let result = [];
   for (let i = 0; i <= ul.length - 1; i++) {
      let url = ul[i].href
      let name = ul[i].text
      result.push({name:name,url:url});
   }
   return result;
}).then((res)=>{
//获取电影的名字和链接
   let data =[];
   if(res.length >=2){
      let n =1;
      if(res.length % 2 ==0){
         n =res.length/2
      }else{
         n = res.length -1/2
      }
      let data =[];
      for (let i=1;i<=n ;i++){
         let categorize = res[(i-1)*2].name ||"";
         let categorizeUrl = res[(i-1)*2].url|| "";
         let movieName = res[(i-1)*2+1].name||"";
         let movieUrl = res[(i-1)*2+1].url||"";
         data.push({
            categorize:(categorize.match(/\[(.*?)\]/g)[0])
                .replace(/\[|\]/g, ""),
            categorizeUrl:categorizeUrl,
            movieName:(movieName.match(/《(.*?)》/g)[0])
                .replace(/《|》/g, ""),
            movieUrl:movieUrl
         })
      }
      return data
   }
}).then(async res=>{
// 打开每一页获取bt链接
   if(res.length>0){
      const progressBar = new SingleBar({}, Presets.shades_classic);
      progressBar.start(res.length, 0);
      for (let i =0;i<=res.length-1;i++){
         await new Promise(resolve => setTimeout(resolve, 3000));
         let result= await puppeteer(
             res[i].movieUrl,()=>{
            if(res.length>0){
      const progressBar = new SingleBar({}, Presets.shades_classic);
      progressBar.start(res.length, 0);
      for (let i =0;i<=res.length-1;i++){
         await new Promise(resolve => setTimeout(resolve, 3000));
         let result= await puppeteer(res[i].movieUrl,()=>{
            return {
               bt:document.querySelector("a[href^='magnet:']").
                   getAttribute('href')||"",
               img:document.querySelector('#Zoom img').getAttribute('src')||''
            };
         });
         res[i].bt =result.bt;
         res[i].img =result.img;
         progressBar.update(i + 1);
      }
      progressBar.stop();
      return res;
   }else{
      console.log('没有获取到列表')
      process.exit()
   }
            }
         });
         res[i].bt =result;
         progressBar.update(i + 1);
      }
      progressBar.stop();
      return res;
   }else{
      console.log('没有获取到列表')
      process.exit()
   }
}).then(res=>{
   fs.writeFile(
       './data.json', 
       JSON.stringify(res,null, 4), (err) => {
         if (err) throw err;
         console.log('Data written to file');
   });
});
This entry was posted in node.js. Bookmark the permalink.

发表回复