How Node JS and Puppeteer Assist in Web Scraping Of New York Times News Articles?

mkdir puppeteer cd puppeteer npm install puppeteer --save
const puppeteer = require('puppeteer'); puppeteer.launch({ headless: true, args: ['--user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3312.0 Safari/537.36"'] }).then(async browser => { const page = await browser.newPage(); await page.goto("https://www.nytimes.com/"); await page.waitForSelector('body'); var rposts = await page.evaluate(() => { }); console.log(rposts); await browser.close(); }).catch(function(error) { console.error(error); });
const puppeteer = require('puppeteer'); puppeteer.launch({ headless: true, args: ['--user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3312.0 Safari/537.36"'] }).then(async browser => { const page = await browser.newPage(); await page.goto("https://www.nytimes.com/"); await page.waitForSelector('body'); var rposts = await page.evaluate(() => { let posts = document.body.querySelectorAll('.assetWrapper'); postItems = []; posts.forEach((item) => { let title = '' let summary = '' let link = '' try{ title = item.querySelector('h2').innerText; if (title!=''){ summary = item.querySelector('p').innerText; link = item.querySelector('a').href; postItems.push({title: title, link: link, summary: summary}); } }catch(e){ } }); var items = { "posts": postItems }; return items; }); console.log(rposts); await browser.close(); }).catch(function(error) { console.error(error); });
summary = item.querySelector('p').innerText;
  • There are thousands of high-speed spinning proxies scattered over the globe.
  • With our IP rotation service.
  • Our User-Agent-String rotation is automated (which simulates requests from different, valid web browsers and web browser versions).
  • With our CAPTCHA-solving technology that works automatically.
curl https://xbyte.io/?key=API_KEY&url=https://example.com

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store