Hello,
I need to extract just two values from the page at the RCP web site. A sample sub-string is shown below. For the keyword Approve, I need to retrieve the corresponding value 44.0:
<div class="value">
<span style="background: #000000;">44.0</span>
</div>
<div class="desc">Approve </div>
I understand the cheerio would be the best utility for this purpose. I've entered the basic "load" instruction in the Function node but don't quite understand the next steps. Is there some interactive helper tool (a la JSONata) to drill down the object model?
I've wired up the HTML node in the flow for parallel evaluation (and self-learning). How could I use cheerio here?
All comments welcome. Tried RTxM for cheerio but obviously didn't get past the Selector control.
Kind regards.
[{"id":"d1e9b9ad.1c8d88","type":"comment","z":"e9169fe6.11ecc8","name":"Data Analysis Exercises","info":"","x":130,"y":80,"wires":[]},{"id":"e5fd9a77.835468","type":"inject","z":"e9169fe6.11ecc8","name":"POTUS Job Approval","topic":"JobApproval","payload":"https://www.realclearpolitics.com/epolls/other/president_trump_job_approval-6179.html","payloadType":"str","repeat":"60","crontab":"","once":false,"onceDelay":0.1,"x":150,"y":140,"wires":[["72d99cfb.0c2804"]]},{"id":"72d99cfb.0c2804","type":"http request","z":"e9169fe6.11ecc8","name":"RCP POTUS Poll","method":"GET","ret":"txt","paytoqs":false,"url":"https://www.realclearpolitics.com/epolls/other/president_trump_job_approval-6179.html","tls":"","proxy":"","authType":"","x":390,"y":140,"wires":[["abaf1307.7c2df8","20650b1a.78eaac"]]},{"id":"1e786658.63578a","type":"debug","z":"e9169fe6.11ecc8","name":"RCP POTUS debug","active":true,"tosidebar":true,"console":false,"tostatus":false,"complete":"payload","targetType":"msg","x":880,"y":140,"wires":[]},{"id":"20650b1a.78eaac","type":"html","z":"e9169fe6.11ecc8","name":"RCP POTUS filter","property":"payload","outproperty":"payload","tag":"candidate","ret":"html","as":"single","x":610,"y":220,"wires":[["69ac9252.e90754"]],"info":"https://www.realclearpolitics.com/epolls/other/president_trump_job_approval-6179.html\n\n<tbody>\n<tr>\n <td class=\"candidate\">\n <div class=\"value\">\n <span style=\"background: #000000;\">44.0</span>\n </div>\n <div class=\"desc\">Approve </div>\n </td>\n</tr>\n<tr>\n <td class=\"candidate\">\n <div class=\"value\">\n <span style=\"background: #ff0000;\">53.4</span>\n </div>\n <div class=\"desc\">Disapprove \n <span style=\"color: #ff0000;\">+9.4</span>\n </div>\n </td>\n</tr>\n</tbody>"},{"id":"abaf1307.7c2df8","type":"function","z":"e9169fe6.11ecc8","name":"RCP page scrape","func":"const cheerio = global.get('cheerio')\nconst $ = cheerio.load(msg.payload)\nreturn msg;","outputs":1,"noerr":0,"x":610,"y":140,"wires":[["1e786658.63578a"]],"info":"https://www.realclearpolitics.com/epolls/other/president_trump_job_approval-6179.html\n\n<tbody>\n<tr>\n <td class=\"candidate\">\n <div class=\"value\">\n <span style=\"background: #000000;\">44.0</span>\n </div>\n <div class=\"desc\">Approve </div>\n </td>\n</tr>\n<tr>\n <td class=\"candidate\">\n <div class=\"value\">\n <span style=\"background: #ff0000;\">53.4</span>\n </div>\n <div class=\"desc\">Disapprove \n <span style=\"color: #ff0000;\">+9.4</span>\n </div>\n </td>\n</tr>\n</tbody>"},{"id":"69ac9252.e90754","type":"debug","z":"e9169fe6.11ecc8","name":" RCP http request","active":true,"tosidebar":true,"console":false,"tostatus":false,"complete":"payload","targetType":"msg","x":870,"y":220,"wires":[]}]