Web pages are getting more and more complicated these days and more often than not, take more than a few seconds to fully load.
One example that I wanted for my Node-RED dashboard is a screenshot of the high altitude balloon tracking website.
When you take a screenshot of it, this is what I was seeing:
Pretty much just the opening splash screen and not much else.
Not exactly useful.
Digging into the why I found the screenshot node uses Puppeteer (headless Chrome) to take the screen shot.
The problem is that the node does not wait for the website to load, it just pauses a second or so then takes the shot.
While Ok for very simple websites, I have found a lot of websites need different amounts of time to fully load, so just putting a hard number of seconds to delay is not an ideal solution.
Over the past few weeks I dug into Stackoverflow and found some different fragments of sample code to encourage Puppeteer to wait for all scripts, images and so on to load before it takes the screenshot.
I have grafted all them together and updated code into the JavaScript for the node and have tested it many times. Here is the code:
module.exports = function (RED) {
function ScreenshotNode(config) {
RED.nodes.createNode(this, config);
let node = this;
let path = config.path;
let puppeteer = require('puppeteer');
let option = {};
const waitTillHTMLRendered = async (page, timeout = 120000) => {
const checkDurationMsecs = 1000;
const maxChecks = timeout / checkDurationMsecs;
let lastHTMLSize = 0;
let checkCounts = 1;
let countStableSizeIterations = 0;
const minStableSizeIterations = 3;
while(checkCounts++ <= maxChecks){
let html = await page.content();
let currentHTMLSize = html.length;
let bodyHTMLSize = await page.evaluate(() => document.body.innerHTML.length);
// console.log('last: ', lastHTMLSize, ' <> curr: ', currentHTMLSize, " body html size: ", bodyHTMLSize);
if(lastHTMLSize != 0 && currentHTMLSize == lastHTMLSize)
countStableSizeIterations++;
else
countStableSizeIterations = 0; //reset the counter
if(countStableSizeIterations >= minStableSizeIterations) {
// console.log("Page rendered fully..");
break;
}
lastHTMLSize = currentHTMLSize;
await page.waitFor(checkDurationMsecs);
}
};
if (path) {
option.executablePath = path;
}
node.on('input', function (msg) {
let url;
if (msg.url) {
url = msg.url;
} else if (config.url) {
url = config.url;
} else {
// set to default.
url = 'http://www.example.com/';
}
puppeteer.launch(option).then(async browser => {
const option = {
type: 'png',
fullPage: true,
encoding: 'base64'
};
const page = await browser.newPage();
// await page.goto(url);
await page.goto(url, {'timeout': 100000, 'waitUntil':'load'});
await waitTillHTMLRendered(page);
const data = await page.content();
const base64String = await page.screenshot(option);
await browser.close();
msg.payload = base64String;
node.send(msg);
});
});
}
RED.nodes.registerType("screenshot", ScreenshotNode);
}
Just so we are clear, I am not a programmer, and you should not use this code, I just display it as an example of what is possible.
Cut and paste that code into a text file, save it as screenshot.js
and save it to your computer.
BEFORE you copy it over to your Node-RED, IT IS CRITICAL that you first stop Node-RED from running.
Update the file by overwriting the one that is there now. The location will depend on where/how you have installed Node-RED but look for something like .node-red/node_modules/node-red-contrib-web-page-screenshot directory and copy the screenshot.js file over the top of the one in there. (Be sure and get the web-page-screenshot directory, NOT the contrib-screenshot directory if you have that node installed as well).
Then start Node-RED again the usual way you do.
Use the node in exactly the same way you have been.
The difference now is that the node will wait what ever time the website requires before it takes the screen shot. (Up to 45 seconds).
So now we get an image like this:
A much more helpful screenshot.
I don't know how to contact the node author to ask if they would consider taking a look at the issue. The .js file does not get changed on Node-RED updates or restarts, so I have only needed to copy the file over once in a few months.... seems a small price to pay for a big improvement in output.