I build a “simple” subflow that can parse jsgf grammars to output all possible permutations defined by the grammar.
https://flows.nodered.org/flow/c24c2981ed955374c0eef6083d0e5f7c
This can be used to quickly create a corpus of possible sentences to for example train a domain specific language model to be used with speech recognition systems like deepspeech, coqui, Kaldi or pocketsphinx.
It mostly parses the JSpeech Grammar Format for this. This is done in pure JavaScript with no dependencies needed. The subflow uses only function nodes for this.
Be aware that you can probably break the subflow pretty easily by feeding grammars with errors in them. It also doesn’t support all features of the jsgf syntax. A list of limitations can be found in the readme.
It can also be used to easily rig a intent recognition for text input in conjunction with node-red-contrib-fuzzywuzzy and tags in the grammar. I build in options to correctly format the output for this to work out of the box.
This could be used to for example extract intents from a telegram bot.
An example flow is here (needs node-red-contrib-fuzzywuzzy):
[{"id":"a71fc6a3.13964","type":"subflow","name":"jsgf permutator","info":"","category":"","in":[{"x":60,"y":120,"wires":[{"id":"64935c66.d7f52c"},{"id":"a981f80e.432d8"}]}],"out":[{"x":1160,"y":120,"wires":[{"id":"4d6f9aa6.a03db4","port":0}]}],"env":[{"name":"tags","type":"str","value":"remove","ui":{"icon":"font-awesome/fa-bookmark-o","label":{"en-US":"Tags"},"type":"select","opts":{"opts":[{"l":{"en-US":"remove"},"v":"remove"},{"l":{"en-US":"leave"},"v":"leave"},{"l":{"en-US":"move to left"},"v":"left"}]}}},{"name":"outputType","type":"str","value":"array","ui":{"label":{"en-US":"output as"},"type":"select","opts":{"opts":[{"l":{"en-US":"Array"},"v":"array"},{"l":{"en-US":"String"},"v":"string"}]}}}],"color":"#E7E7AE","icon":"node-red/split.svg","status":{"x":1160,"y":220,"wires":[{"id":"a981f80e.432d8","port":0},{"id":"be781da.d15a96","port":0}]}},{"id":"64935c66.d7f52c","type":"function","z":"a71fc6a3.13964","name":"ruleTokenizer","func":"function ruleTokenizer (rule) {\n let ruleArrRaw = rule.split(\"\");\n let ruleArr = [];\n let outputToken = \"\";\n ruleArrRaw.forEach(token => {\n if (token.match(/[a-zA-Z0-9\\ä\\ö\\ü\\ß\\-\\_\\{\\}\\<\\>\\/]/) !== null) {\n outputToken += token;\n } else if (token.match(/[\\[\\]\\|\\(\\)]/) !== null) {\n if (outputToken.length !== 0) {\n ruleArr.push(outputToken);\n outputToken = \"\";\n }\n ruleArr.push(token);\n } else if (token.match(/\\s/) !== null) {\n if (outputToken.length !== 0) {\n ruleArr.push(outputToken);\n outputToken = \"\";\n }\n }\n });\n if (outputToken.length !== 0) {\n ruleArr.push(outputToken);\n }\n return ruleArr;\n}\nlet rawRules = msg.payload.replace(/\\/\\/(.*)\\r?\\n|\\r/g, \" \").replace(/\\r?\\n|\\r/g, \" \").replace(/\\s{2,}/g, \" \").replace(/\\/[0-9]?[\\.0-9]*\\//g, \"\");\nrawRules = rawRules.split(\";\").map(rule => rule.trim()).filter(rule => rule.length > 0);\nlet ruleName = false;\ntry {\n ruleName = rawRules.filter(rule => rule.match(/^grammar\\s/g))[0].replace(/^grammar\\s/g, \"\");\n} catch (error) {\n node.warn(\"no grammar name\");\n}\n(ruleName) ? flow.set(\"ruleName\", ruleName) : flow.set(\"ruleName\", false);\nrawRules = rawRules.filter(rule => rule.match(/\\=/g));\nlet rulesObj = {};\nrawRules.forEach(rule => {\n rule = rule.split(\"=\").map(rule => rule.trim());\n let isPublic =(rule[0].match(/^public/g) !== null) ? true : false;\n let name = rule[0].match(/\\<(.+)\\>/g).toString().replace(/\\<|\\>/g, \"\");\n rulesObj[name] = {\n rule: ruleTokenizer(rule[1]),\n public: isPublic\n }\n});\nmsg.payload = rulesObj;\nreturn msg;","outputs":1,"noerr":0,"initialize":"","finalize":"","x":250,"y":120,"wires":[["6ba0d6a0.b6ada"]]},{"id":"77f7190d.b0eef","type":"function","z":"a71fc6a3.13964","name":"ruleOptionals","func":"let finalResults = [];\nlet iterateOptionalsRule = 0;\nlet toDo = [];\nlet iterations = 0;\nfunction optionalsCreator (tokens) {\n if (optionalsChecker(tokens)) {\n let check = tokens.indexOf(\"[\");\n let level = 1;\n let withIt = [];\n let withoutIt = [];\n let results = [];\n for (i = check+1; i < tokens.length; i++) {\n if (tokens[i] === \"]\") {\n level -= 1;\n } else if (tokens[i] === \"[\") {\n level += 1;\n }\n if (level === 0) {\n withIt = tokens.filter((token,index) => index !== check && index !== i);\n withoutIt = tokens.filter((token,index) => index < check || index > i);\n break;\n }\n }\n results.push(withIt, withoutIt);\n results.forEach(result => {\n if (optionalsChecker(result)) {\n toDo.push(result);\n } else {\n finalResults.push(result);\n }\n });\n if (toDo.length === 0) {\n if (iterateOptionalsRule < optionalsIterationLength - 1) {\n iterateOptionalsRule += 1;\n toDo.push(ruleArr[iterateOptionalsRule]);\n nextOne();\n } else {\n node.send({payload:finalResults});\n }\n } else {\n nextOne();\n }\n } else {\n finalResults.push(tokens);\n if (iterateOptionalsRule < optionalsIterationLength - 1) {\n iterateOptionalsRule += 1;\n toDo.push(ruleArr[iterateOptionalsRule]);\n nextOne();\n } else {\n node.send({payload:finalResults});\n }\n }\n return;\n}\nfunction optionalsChecker (inputArray) {\n if (inputArray.includes(\"[\") && inputArray.includes(\"]\")) {\n return true;\n }\n return false;\n}\nfunction nextOne () {\n iterations += 1;\n let next = toDo.shift();\n if (Array.isArray(next)) {\n if (iterations >= 1000) {\n iterations = 0;\n setTimeout(()=>{\n optionalsCreator(next);\n },0);\n } else {\n optionalsCreator(next);\n }\n }\n return;\n}\nlet ruleArr = msg.payload;\nlet optionalsIterationLength = ruleArr.length;\noptionalsCreator(ruleArr[iterateOptionalsRule]);\nreturn;","outputs":1,"noerr":0,"x":810,"y":120,"wires":[["4d6f9aa6.a03db4"]]},{"id":"6ba0d6a0.b6ada","type":"function","z":"a71fc6a3.13964","name":"ruleExpander","func":"let currentRule = 0;\nlet ruleObj = msg.payload;\nconst ruleKeys = Object.keys(ruleObj);\nfunction iterateRules (rule) {\n if (ruleObj[ruleKeys[currentRule]].public) {\n rule.unshift(\"(\");\n rule.push(\")\");\n for (i=0;i<rule.length;i++) {\n if (rule[i].match(/\\<(.*)\\>/) !== null) {\n let sub = rule[i].replace(\"<\",\"\").replace(\">\",\"\");\n rule[i] = ruleObj[sub].rule;\n rule[i].unshift(\"(\");\n rule[i].push(\")\");\n }\n }\n rule = rule.flat();\n let test = false;\n rule.forEach(token => {\n if (token.match(/\\<(.*)\\>/) !== null) {\n test = true;\n }\n });\n ruleObj[ruleKeys[currentRule]].rule = rule;\n if (test) {\n iterateRules(ruleObj[ruleKeys[currentRule]].rule);\n } else if (currentRule < ruleKeys.length - 1) {\n currentRule += 1;\n iterateRules(ruleObj[ruleKeys[currentRule]].rule);\n } else {\n makeIntermediateArr(ruleObj);\n }\n } else if (currentRule < ruleKeys.length - 1) {\n currentRule += 1;\n iterateRules(ruleObj[ruleKeys[currentRule]].rule);\n } else {\n makeIntermediateArr(ruleObj);\n }\n}\nfunction makeIntermediateArr (inputObj) {\n let intermediateArr = [];\n for (var rule in inputObj) {\n if (inputObj[rule].public) {\n if (!Array.isArray(inputObj[rule].rule[0])) {\n intermediateArr.push([inputObj[rule].rule]);\n } else {\n intermediateArr.push(inputObj[rule].rule);\n }\n }\n }\n intermediateArr = intermediateArr.flat(1);\n node.send({payload:intermediateArr});\n}\niterateRules(ruleObj[ruleKeys[currentRule]].rule);\nreturn;","outputs":1,"noerr":0,"x":430,"y":120,"wires":[["e49fdb1e.c06768"]]},{"id":"4d6f9aa6.a03db4","type":"function","z":"a71fc6a3.13964","name":"unTokenizer","func":"let outputArr = [];\nlet combine = \"\";\nlet tags = \"\";\nconst addTags = env.get(\"tags\");\nmsg.payload.forEach(sentence => {\n if (Array.isArray(sentence)) {\n sentence.forEach(token => {\n if (token !== \"(\" && token !== \")\") {\n if (token.match(/\\{(.*)\\}/g) !== null) {\n switch (addTags) {\n case \"remove\":\n break;\n case \"leave\":\n combine = combine + token + \" \";\n break;\n case \"left\":\n tags = tags + token + \";\";\n break;\n }\n } else {\n combine = combine + token + \" \";\n }\n }\n });\n if (tags !== \"\") {\n tags = tags.slice(0,-1).replace(/[\\{\\}]/g,\"\");\n combine = tags + \":\" + combine;\n }\n if (combine !== \"\") {\n combine = combine.trim();\n if (!outputArr.includes(combine)) { outputArr.push(combine); }\n }\n combine = \"\";\n tags = \"\";\n }\n});\nif (env.get(\"outputType\") === \"string\") {\n let outputString = \"\";\n outputArr.forEach(sentence => {\n outputString = outputString + sentence + \"\\n\";\n });\n msg.payload = outputString;\n} else {\n msg.payload = outputArr;\n}\nconst ruleName = flow.get(\"ruleName\");\nif (ruleName) {\n msg.topic = ruleName;\n}\nreturn msg;","outputs":1,"noerr":0,"x":990,"y":120,"wires":[["be781da.d15a96"]]},{"id":"e49fdb1e.c06768","type":"function","z":"a71fc6a3.13964","name":"ruleAlternatives","func":"let iterateAlternativesRule = 0;\nlet toDo = [];\nlet output = [];\nlet iterations = 0;\nfunction makeAlternatives (inputTokens) {\n if (alternativesChecker(inputTokens)) {\n let firstSeparator = 0;\n let start = 0;\n let end = inputTokens.length;\n let levelParentheses = 1;\n for (i=0;i<inputTokens.length;i++) {\n if (inputTokens[i] === \"|\") {\n firstSeparator = i;\n break;\n }\n }\n for (i=firstSeparator-1;i>=0;i--) {\n if (inputTokens[i] === \")\") {\n levelParentheses += 1;\n }\n if (inputTokens[i] === \"(\") {\n levelParentheses -= 1;\n }\n if (levelParentheses === 0) {\n start = i;\n break;\n } \n }\n levelParentheses = 1;\n for (i=firstSeparator+1;i<inputTokens.length;i++) {\n if (inputTokens[i] === \"(\") {\n levelParentheses += 1;\n }\n if (inputTokens[i] === \")\") {\n levelParentheses -= 1;\n }\n if (levelParentheses === 0) {\n end = i;\n break;\n }\n }\n levelParentheses = 0;\n let previousSeparator = start;\n let alternativesArr = [];\n for (i=start;i<=end;i++) {\n if (inputTokens[i] === \"(\") {\n levelParentheses += 1;\n }\n if (inputTokens[i] === \")\") {\n levelParentheses -= 1;\n }\n if ((inputTokens[i] === \"|\" && levelParentheses === 1) || levelParentheses === 0) {\n let thisAlternative = inputTokens.slice(previousSeparator + 1, i);\n alternativesArr.push(thisAlternative);\n previousSeparator = i;\n }\n }\n let alternativeBlockLength = end - start + 1;\n let outputAlternatives = [];\n for (i=0;i<alternativesArr.length;i++) {\n let intermediate = [...inputTokens];\n intermediate.splice(start, alternativeBlockLength,alternativesArr[i]);\n intermediate = intermediate.flat();\n outputAlternatives.push(intermediate);\n }\n outputAlternatives.forEach(alternative => {\n if (alternativesChecker(alternative)) {\n toDo.push(alternative);\n } else {\n output.push(alternative);\n }\n });\n if (toDo.length === 0) {\n if (iterateAlternativesRule < alternativesIterationLength - 1) {\n iterateAlternativesRule += 1;\n toDo.push(ruleArr[iterateAlternativesRule]);\n nextOne();\n } else {\n node.send({payload:output});\n }\n } else {\n nextOne();\n }\n } else {\n output.push(inputTokens);\n if (iterateAlternativesRule < alternativesIterationLength - 1) {\n iterateAlternativesRule += 1;\n toDo.push(ruleArr[iterateAlternativesRule]);\n nextOne();\n } else {\n node.send({payload:output});\n }\n }\n return;\n}\nfunction alternativesChecker (inputArr) {\n if (inputArr.includes(\"|\")) {\n return true;\n }\n return false;\n}\nfunction nextOne () {\n iterations += 1;\n let next = toDo.shift();\n if (Array.isArray(next)) {\n if (iterations >= 100) {\n iterations = 0;\n setTimeout(()=>{\n makeAlternatives(next);\n },0);\n } else {\n makeAlternatives(next);\n }\n }\n return;\n}\nlet ruleArr = msg.payload\nlet alternativesIterationLength = ruleArr.length;\nmakeAlternatives(ruleArr[iterateAlternativesRule]);\nreturn;","outputs":1,"noerr":0,"x":620,"y":120,"wires":[["77f7190d.b0eef"]]},{"id":"a981f80e.432d8","type":"change","z":"a71fc6a3.13964","name":"","rules":[{"t":"set","p":"payload","pt":"msg","to":"processing...","tot":"str"}],"action":"","property":"","from":"","to":"","reg":false,"x":260,"y":180,"wires":[[]]},{"id":"be781da.d15a96","type":"change","z":"a71fc6a3.13964","name":"","rules":[{"t":"set","p":"payload","pt":"msg","to":"","tot":"str"}],"action":"","property":"","from":"","to":"","reg":false,"x":1000,"y":180,"wires":[[]]},{"id":"2a0a51eb.f50a76","type":"fuzzy-match","z":"cff51aa6.4b8cd","name":"","inputOptions":"message","choices":"","scorer":"ratio","limit":"1","cutoff":"50","x":950,"y":300,"wires":[["1a7440d4.c5c9b7"],[]]},{"id":"e0bc0762.853d6","type":"debug","z":"cff51aa6.4b8cd","name":"","active":true,"tosidebar":true,"console":false,"tostatus":false,"complete":"payload","targetType":"msg","statusVal":"","statusType":"auto","x":730,"y":440,"wires":[]},{"id":"9e4f3a7f.c0016","type":"change","z":"cff51aa6.4b8cd","name":"","rules":[{"t":"move","p":"payload","pt":"msg","to":"choices","tot":"msg"}],"action":"","property":"","from":"","to":"","reg":false,"x":750,"y":380,"wires":[["2a0a51eb.f50a76"]]},{"id":"1a7440d4.c5c9b7","type":"debug","z":"cff51aa6.4b8cd","name":"","active":true,"tosidebar":true,"console":false,"tostatus":false,"complete":"true","targetType":"full","statusVal":"","statusType":"auto","x":1110,"y":300,"wires":[]},{"id":"60d70bbe.6a83dc","type":"inject","z":"cff51aa6.4b8cd","name":"turn on the television","props":[{"p":"payload"}],"repeat":"","crontab":"","once":false,"onceDelay":0.1,"topic":"","payload":"turn on the television","payloadType":"str","x":600,"y":200,"wires":[["99faf22b.4bff68"]]},{"id":"5798bd03.beb7cc","type":"template","z":"cff51aa6.4b8cd","name":"MediaDeviceGrammar","field":"payload","fieldType":"msg","format":"text","syntax":"plain","template":"<media_options> = on {ON} | off {OFF} | volume (up {volumeUp} | down {volumeDown} ){command};\n<media_devices> = television {tv} | radio {radio} | stereo {hifi};\npublic <media_rules> = [turn the] <media_devices> <media_options>;","output":"str","x":320,"y":380,"wires":[["bf749bab.913408"]]},{"id":"5c5d5df9.1b4814","type":"inject","z":"cff51aa6.4b8cd","name":"","props":[{"p":"payload"},{"p":"topic","vt":"str"}],"repeat":"","crontab":"","once":false,"onceDelay":0.1,"topic":"","payload":"","payloadType":"date","x":120,"y":380,"wires":[["5798bd03.beb7cc"]]},{"id":"c3da846.a50e878","type":"inject","z":"cff51aa6.4b8cd","name":"turn the radio volume down","props":[{"p":"payload"}],"repeat":"","crontab":"","once":false,"onceDelay":0.1,"topic":"","payload":"turn the radio volume down","payloadType":"str","x":620,"y":260,"wires":[["99faf22b.4bff68"]]},{"id":"2863e0cb.4bf55","type":"debug","z":"cff51aa6.4b8cd","name":"","active":true,"tosidebar":true,"console":false,"tostatus":false,"complete":"false","statusVal":"","statusType":"auto","x":950,"y":200,"wires":[]},{"id":"fda3201e.8f275","type":"inject","z":"cff51aa6.4b8cd","name":"stereo off","props":[{"p":"payload"}],"repeat":"","crontab":"","once":false,"onceDelay":0.1,"topic":"","payload":"stereo off","payloadType":"str","x":560,"y":320,"wires":[["99faf22b.4bff68"]]},{"id":"99faf22b.4bff68","type":"change","z":"cff51aa6.4b8cd","name":"","rules":[],"action":"","property":"","from":"","to":"","reg":false,"x":815,"y":260,"wires":[["2a0a51eb.f50a76","2863e0cb.4bf55"]],"l":false},{"id":"77cf5950.11233","type":"comment","z":"cff51aa6.4b8cd","name":"inject first !!!","info":"","x":110,"y":320,"wires":[]},{"id":"60fd4298.47e78c","type":"comment","z":"cff51aa6.4b8cd","name":"than try it out :)","info":"","x":560,"y":140,"wires":[]},{"id":"3f2bff4e.397d1","type":"comment","z":"cff51aa6.4b8cd","name":"have a look at the simple jsgf grammar example","info":"","x":400,"y":440,"wires":[]},{"id":"bf749bab.913408","type":"subflow:a71fc6a3.13964","z":"cff51aa6.4b8cd","name":"","env":[{"name":"tags","value":"left","type":"str"},{"name":"outputType","value":"string","type":"str"}],"x":540,"y":380,"wires":[["9e4f3a7f.c0016","e0bc0762.853d6"]]}]
Have fun with it but be aware it work in progress.
Johannes