Parsing HTML filter out two values

Hi! I would like to filter out two values from a homepage: the article number: and the price.

I've been trying for a few hours now but it doesn't work.

Maybe someone can help me there.

I have circled the two values in the picture.

I tried this for you:

[
    {
        "id": "9eee6b55a7fac9c9",
        "type": "http request",
        "z": "ebe8134f6b5af26c",
        "name": "",
        "method": "GET",
        "ret": "txt",
        "paytoqs": "ignore",
        "url": "https://www.ruddog.eu/Hiro-Seiko-MTC1-Titan-Alum-Hex-Socket-Screw-Set-S-Black",
        "tls": "",
        "persist": false,
        "proxy": "",
        "authType": "",
        "senderr": false,
        "x": 550,
        "y": 480,
        "wires": [
            [
                "bc3f4457783188a5",
                "13fea09c0c74a327"
            ]
        ]
    },
    {
        "id": "719173114ca09950",
        "type": "inject",
        "z": "ebe8134f6b5af26c",
        "name": "",
        "props": [
            {
                "p": "payload"
            }
        ],
        "repeat": "",
        "crontab": "",
        "once": false,
        "onceDelay": 0.1,
        "topic": "",
        "payload": "true",
        "payloadType": "bool",
        "x": 390,
        "y": 480,
        "wires": [
            [
                "9eee6b55a7fac9c9"
            ]
        ]
    },
    {
        "id": "bc3f4457783188a5",
        "type": "html",
        "z": "ebe8134f6b5af26c",
        "name": "",
        "property": "payload",
        "outproperty": "payload",
        "tag": "p[class=\"text-muted product-sku\"]",
        "ret": "text",
        "as": "single",
        "x": 800,
        "y": 480,
        "wires": [
            [
                "96af5751210385d5"
            ]
        ]
    },
    {
        "id": "13fea09c0c74a327",
        "type": "html",
        "z": "ebe8134f6b5af26c",
        "name": "",
        "property": "payload",
        "outproperty": "payload",
        "tag": "span[class=\"price text-nowrap\"]",
        "ret": "text",
        "as": "single",
        "x": 790,
        "y": 520,
        "wires": [
            [
                "8031ed2a8ea40d02"
            ]
        ]
    },
    {
        "id": "8f44b9ee281e742b",
        "type": "debug",
        "z": "ebe8134f6b5af26c",
        "name": "",
        "active": true,
        "tosidebar": true,
        "console": false,
        "tostatus": false,
        "complete": "payload",
        "targetType": "msg",
        "statusVal": "",
        "statusType": "auto",
        "x": 1430,
        "y": 500,
        "wires": []
    },
    {
        "id": "96af5751210385d5",
        "type": "change",
        "z": "ebe8134f6b5af26c",
        "name": "",
        "rules": [
            {
                "t": "set",
                "p": "topic",
                "pt": "msg",
                "to": "Artikel",
                "tot": "str"
            },
            {
                "t": "set",
                "p": "payload",
                "pt": "msg",
                "to": "payload[0]",
                "tot": "msg"
            }
        ],
        "action": "",
        "property": "",
        "from": "",
        "to": "",
        "reg": false,
        "x": 1070,
        "y": 480,
        "wires": [
            [
                "87eb375f7b15276f"
            ]
        ]
    },
    {
        "id": "8031ed2a8ea40d02",
        "type": "change",
        "z": "ebe8134f6b5af26c",
        "name": "",
        "rules": [
            {
                "t": "set",
                "p": "topic",
                "pt": "msg",
                "to": "Preis",
                "tot": "str"
            },
            {
                "t": "set",
                "p": "payload",
                "pt": "msg",
                "to": "payload[0]",
                "tot": "msg"
            },
            {
                "t": "set",
                "p": "payload",
                "pt": "msg",
                "to": "$trim(payload)\t",
                "tot": "jsonata"
            }
        ],
        "action": "",
        "property": "",
        "from": "",
        "to": "",
        "reg": false,
        "x": 1070,
        "y": 520,
        "wires": [
            [
                "87eb375f7b15276f"
            ]
        ]
    },
    {
        "id": "87eb375f7b15276f",
        "type": "join",
        "z": "ebe8134f6b5af26c",
        "name": "",
        "mode": "custom",
        "build": "object",
        "property": "payload",
        "propertyType": "msg",
        "key": "topic",
        "joiner": "\\n",
        "joinerType": "str",
        "accumulate": false,
        "timeout": "",
        "count": "2",
        "reduceRight": false,
        "reduceExp": "",
        "reduceInit": "",
        "reduceInitType": "",
        "reduceFixup": "",
        "x": 1270,
        "y": 500,
        "wires": [
            [
                "8f44b9ee281e742b"
            ]
        ]
    }
]

The values can further be further extracted.

Hi! Thanks a lot, that will definitely help me.

Hi! I remembered something else. Is it possible to log in to the homepage and then read out the data?

Hi,

Once you go a bit past the basic HTTP Request needs (for example: logging in and navigating between a set of web pages) you might need to start using WebDriver IO (WDIO): npm install node-red-contrib-wdio

This (in conjunction with a shell session running 'selenium-standalone start' can let you automate even very complex web page interactions including 'scraping' web sites or pushing data to them.

Cheers,

Paul

This topic was automatically closed 60 days after the last reply. New replies are no longer allowed.