I'm trying to traverse through a large CSV file.
Each line, I do some data remodelling and convert it to XML format.
Lastly, I save all of it into an XML file.
In my actual implementation, the resulting remodelled XML data is 10x larger than the source CSV data.
I have a use case where the source CSV file is 150mb in size.
Processing it causes the flow to get the following error:
31 Oct 14:48:02 - [info] Starting flows
31 Oct 14:48:02 - [info] Started flows
<--- Last few GCs --->
[21128:0000019B4DC35950] 2735663 ms: Scavenge 1394.8 (1418.9) -> 1394.2 (1419.4) MB, 1.6 / 0.0 ms (average mu = 0.108, current mu = 0.058) allocation failure
[21128:0000019B4DC35950] 2735666 ms: Scavenge 1395.0 (1419.4) -> 1394.4 (1420.4) MB, 1.6 / 0.0 ms (average mu = 0.108, current mu = 0.058) allocation failure
<--- JS stacktrace --->
==== JS stack trace =========================================
0: ExitFrame [pc: 000000CDE7EDC5C1]
1: StubFrame [pc: 000000CDE7E93770]
Security context: 0x00578631e6e9 <JSObject>
2: /* anonymous */ [000003C4FB451A51] [C:\Users\Oliverio\AppData\Roaming\npm\node_modules\node-red\node_modules\@node-red\nodes\core\parsers\70-CSV.js:~53] [pc=000000CDE7F554C6](this=0x03c4fb450741 <CSVNode map = 000000F6EAE53351>,msg=0x03b986e6de81 <Object map = 000000AC18062FF9>)
3: arguments adaptor frame:...
FATAL ERROR: Ineffective mark-compacts near heap limit Allocation failed - JavaScript heap out of memory
1: 00007FF67D6EDD8A v8::internal::GCIdleTimeHandler::GCIdleTimeHandler+4506
2: 00007FF67D6C8886 node::MakeCallback+4534
3: 00007FF67D6C9200 node_module_register+2032
4: 00007FF67D9E30DE v8::internal::FatalProcessOutOfMemory+846
5: 00007FF67D9E300F v8::internal::FatalProcessOutOfMemory+639
6: 00007FF67DBC9804 v8::internal::Heap::MaxHeapGrowingFactor+9620
7: 00007FF67DBC07E6 v8::internal::ScavengeJob::operator=+24550
8: 00007FF67DBBEE3C v8::internal::ScavengeJob::operator=+17980
9: 00007FF67DBC7B87 v8::internal::Heap::MaxHeapGrowingFactor+2327
10: 00007FF67DBC7C06 v8::internal::Heap::MaxHeapGrowingFactor+2454
11: 00007FF67DCF1EA7 v8::internal::Factory::NewFillerObject+55
12: 00007FF67DD6F096 v8::internal::operator<<+73494
13: 000000CDE7EDC5C1
Below is a simplified usable flow.
[{"id":"8246d16e.1031","type":"csv","z":"e3115784.470e78","name":"","sep":",","hdrin":true,"hdrout":"","multi":"one","ret":"\\n","temp":"","skip":"0","strings":true,"x":230,"y":800,"wires":[["dfbc2170.6be13"]]},{"id":"dfbc2170.6be13","type":"function","z":"e3115784.470e78","name":"... processing ...","func":"// do stuff using objects from CSV data\n\nvar payload = {\n 'data' : msg.payload\n}\nmsg.payload = payload;\n\nreturn msg;","outputs":1,"noerr":0,"x":420,"y":800,"wires":[["f239ac82.05c6b","44cd37d.17bd5c8"]]},{"id":"f239ac82.05c6b","type":"join","z":"e3115784.470e78","name":"","mode":"custom","build":"array","property":"payload","propertyType":"msg","key":"topic","joiner":"\\n","joinerType":"str","accumulate":false,"timeout":"","count":"","reduceRight":false,"reduceExp":"","reduceInit":"","reduceInitType":"","reduceFixup":"","x":610,"y":800,"wires":[["a6d95c7e.77529"]]},{"id":"63a572a3.46641c","type":"file in","z":"e3115784.470e78","name":"","filename":"D:\\\\input.csv","format":"utf8","chunk":false,"sendError":false,"encoding":"none","x":350,"y":720,"wires":[["8246d16e.1031"]]},{"id":"b5ae8844.cda118","type":"inject","z":"e3115784.470e78","name":"","topic":"","payload":"","payloadType":"str","repeat":"","crontab":"","once":false,"onceDelay":0.1,"x":170,"y":720,"wires":[["63a572a3.46641c"]]},{"id":"a6d95c7e.77529","type":"xml","z":"e3115784.470e78","name":"","property":"payload","attr":"","chr":"","x":770,"y":800,"wires":[["e3c09f42.6669a"]]},{"id":"e3c09f42.6669a","type":"file","z":"e3115784.470e78","name":"","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"true","encoding":"none","x":320,"y":880,"wires":[[]]},{"id":"44cd37d.17bd5c8","type":"debug","z":"e3115784.470e78","name":"","active":true,"tosidebar":true,"console":false,"tostatus":false,"complete":"payload","targetType":"msg","x":630,"y":760,"wires":[]}]
My thought is that this error is caused by the flow holding so much data in the Join node.
So I tried appending the XML data per iteration and delete the msg.payload before feeding the message to the Join node so it doesn't accumulate data.
Even still, I get the same memory error.
[{"id":"ff6dfcd9.3b485","type":"csv","z":"e3115784.470e78","name":"","sep":",","hdrin":true,"hdrout":"","multi":"one","ret":"\\n","temp":"","skip":"0","strings":true,"x":250,"y":1360,"wires":[["49eb118.9c789f"]]},{"id":"91d66d0a.47f51","type":"file in","z":"e3115784.470e78","name":"","filename":"D:\\\\input.csv","format":"utf8","chunk":false,"sendError":false,"encoding":"none","x":570,"y":1280,"wires":[["ff6dfcd9.3b485"]]},{"id":"16a1faf8.0c8f35","type":"inject","z":"e3115784.470e78","name":"","topic":"","payload":"","payloadType":"str","repeat":"","crontab":"","once":false,"onceDelay":0.1,"x":130,"y":1280,"wires":[["9a0195c4.e912a8"]]},{"id":"b160866c.7d43c8","type":"xml","z":"e3115784.470e78","name":"","property":"payload","attr":"","chr":"","x":590,"y":1360,"wires":[["33168309.80579c"]]},{"id":"cb8460d8.bf1ab","type":"file","z":"e3115784.470e78","name":"","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"false","encoding":"none","x":560,"y":1440,"wires":[["f169c443.e88388"]]},{"id":"abdd4fbf.9f166","type":"function","z":"e3115784.470e78","name":"add XML header","func":"var xmlheader = '<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>';\nmsg.payload = xmlheader + '<root>' + msg.payload + '</root>';\n\nreturn msg;","outputs":1,"noerr":0,"x":740,"y":1600,"wires":[["6d74331b.7d57fc"]]},{"id":"f3fd4235.24a64","type":"join","z":"e3115784.470e78","name":"","mode":"auto","build":"array","property":"payload","propertyType":"msg","key":"topic","joiner":"\\n","joinerType":"str","accumulate":false,"timeout":"","count":"","reduceRight":false,"reduceExp":"","reduceInit":"","reduceInitType":"","reduceFixup":"","x":590,"y":1520,"wires":[["c3abd32c.97362"]]},{"id":"6d74331b.7d57fc","type":"file","z":"e3115784.470e78","name":"","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"true","encoding":"none","x":960,"y":1600,"wires":[[]]},{"id":"f169c443.e88388","type":"change","z":"e3115784.470e78","name":"","rules":[{"t":"delete","p":"payload","pt":"msg"}],"action":"","property":"","from":"","to":"","reg":false,"x":410,"y":1520,"wires":[["f3fd4235.24a64"]]},{"id":"c3abd32c.97362","type":"file in","z":"e3115784.470e78","name":"","filename":"D:\\\\output.xml","format":"utf8","chunk":false,"sendError":false,"encoding":"none","x":520,"y":1600,"wires":[["abdd4fbf.9f166"]]},{"id":"9a0195c4.e912a8","type":"file","z":"e3115784.470e78","name":"Delete D:\\\\output.xml","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"delete","encoding":"none","x":340,"y":1280,"wires":[["91d66d0a.47f51"]]},{"id":"49eb118.9c789f","type":"function","z":"e3115784.470e78","name":"... processing ...","func":"// do stuff using objects from CSV data\n\nvar payload = {\n 'data' : msg.payload\n}\nmsg.payload = payload;\n\nreturn msg;","outputs":1,"noerr":0,"x":420,"y":1360,"wires":[["b160866c.7d43c8"]]},{"id":"33168309.80579c","type":"function","z":"e3115784.470e78","name":"remove XML header","func":"var xmlheader = '<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>';\nmsg.payload = msg.payload.replace(xmlheader, '');\n\nreturn msg;","outputs":1,"noerr":0,"x":340,"y":1440,"wires":[["cb8460d8.bf1ab"]]},{"id":"bb4ce364.d43c8","type":"comment","z":"e3115784.470e78","name":"get CSV data","info":"","x":770,"y":1280,"wires":[]},{"id":"1a7927c6.b817c8","type":"comment","z":"e3115784.470e78","name":"send message per CSV row data","info":"","x":830,"y":1360,"wires":[]},{"id":"b6e446bf.0fa5b8","type":"comment","z":"e3115784.470e78","name":"append each XML data to output file","info":"","x":840,"y":1440,"wires":[]},{"id":"ab51f488.fa9d88","type":"comment","z":"e3115784.470e78","name":"delete msg.payload to avoid storing huge data in Join node","info":"","x":910,"y":1520,"wires":[]},{"id":"f15bd66.af1e928","type":"comment","z":"e3115784.470e78","name":"put the XML header back in to the file","info":"","x":230,"y":1600,"wires":[]}]
I've read a separate post that has similar issue with node memory:
I think that this might just be the case.
If so, how should I process large files like this in Node-RED?