Memory issues processing and writing a large file

I'm trying to traverse through a large CSV file.
Each line, I do some data remodelling and convert it to XML format.
Lastly, I save all of it into an XML file.

In my actual implementation, the resulting remodelled XML data is 10x larger than the source CSV data.
I have a use case where the source CSV file is 150mb in size.
Processing it causes the flow to get the following error:

31 Oct 14:48:02 - [info] Starting flows
31 Oct 14:48:02 - [info] Started flows

<--- Last few GCs --->

[21128:0000019B4DC35950]  2735663 ms: Scavenge 1394.8 (1418.9) -> 1394.2 (1419.4) MB, 1.6 / 0.0 ms  (average mu = 0.108, current mu = 0.058) allocation failure
[21128:0000019B4DC35950]  2735666 ms: Scavenge 1395.0 (1419.4) -> 1394.4 (1420.4) MB, 1.6 / 0.0 ms  (average mu = 0.108, current mu = 0.058) allocation failure


<--- JS stacktrace --->

==== JS stack trace =========================================

    0: ExitFrame [pc: 000000CDE7EDC5C1]
    1: StubFrame [pc: 000000CDE7E93770]
Security context: 0x00578631e6e9 <JSObject>
    2: /* anonymous */ [000003C4FB451A51] [C:\Users\Oliverio\AppData\Roaming\npm\node_modules\node-red\node_modules\@node-red\nodes\core\parsers\70-CSV.js:~53] [pc=000000CDE7F554C6](this=0x03c4fb450741 <CSVNode map = 000000F6EAE53351>,msg=0x03b986e6de81 <Object map = 000000AC18062FF9>)
    3: arguments adaptor frame:...

FATAL ERROR: Ineffective mark-compacts near heap limit Allocation failed - JavaScript heap out of memory
 1: 00007FF67D6EDD8A v8::internal::GCIdleTimeHandler::GCIdleTimeHandler+4506
 2: 00007FF67D6C8886 node::MakeCallback+4534
 3: 00007FF67D6C9200 node_module_register+2032
 4: 00007FF67D9E30DE v8::internal::FatalProcessOutOfMemory+846
 5: 00007FF67D9E300F v8::internal::FatalProcessOutOfMemory+639
 6: 00007FF67DBC9804 v8::internal::Heap::MaxHeapGrowingFactor+9620
 7: 00007FF67DBC07E6 v8::internal::ScavengeJob::operator=+24550
 8: 00007FF67DBBEE3C v8::internal::ScavengeJob::operator=+17980
 9: 00007FF67DBC7B87 v8::internal::Heap::MaxHeapGrowingFactor+2327
10: 00007FF67DBC7C06 v8::internal::Heap::MaxHeapGrowingFactor+2454
11: 00007FF67DCF1EA7 v8::internal::Factory::NewFillerObject+55
12: 00007FF67DD6F096 v8::internal::operator<<+73494
13: 000000CDE7EDC5C1

Below is a simplified usable flow.

node-red-question

[{"id":"8246d16e.1031","type":"csv","z":"e3115784.470e78","name":"","sep":",","hdrin":true,"hdrout":"","multi":"one","ret":"\\n","temp":"","skip":"0","strings":true,"x":230,"y":800,"wires":[["dfbc2170.6be13"]]},{"id":"dfbc2170.6be13","type":"function","z":"e3115784.470e78","name":"... processing ...","func":"// do stuff using objects from CSV data\n\nvar payload = {\n    'data' : msg.payload\n}\nmsg.payload = payload;\n\nreturn msg;","outputs":1,"noerr":0,"x":420,"y":800,"wires":[["f239ac82.05c6b","44cd37d.17bd5c8"]]},{"id":"f239ac82.05c6b","type":"join","z":"e3115784.470e78","name":"","mode":"custom","build":"array","property":"payload","propertyType":"msg","key":"topic","joiner":"\\n","joinerType":"str","accumulate":false,"timeout":"","count":"","reduceRight":false,"reduceExp":"","reduceInit":"","reduceInitType":"","reduceFixup":"","x":610,"y":800,"wires":[["a6d95c7e.77529"]]},{"id":"63a572a3.46641c","type":"file in","z":"e3115784.470e78","name":"","filename":"D:\\\\input.csv","format":"utf8","chunk":false,"sendError":false,"encoding":"none","x":350,"y":720,"wires":[["8246d16e.1031"]]},{"id":"b5ae8844.cda118","type":"inject","z":"e3115784.470e78","name":"","topic":"","payload":"","payloadType":"str","repeat":"","crontab":"","once":false,"onceDelay":0.1,"x":170,"y":720,"wires":[["63a572a3.46641c"]]},{"id":"a6d95c7e.77529","type":"xml","z":"e3115784.470e78","name":"","property":"payload","attr":"","chr":"","x":770,"y":800,"wires":[["e3c09f42.6669a"]]},{"id":"e3c09f42.6669a","type":"file","z":"e3115784.470e78","name":"","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"true","encoding":"none","x":320,"y":880,"wires":[[]]},{"id":"44cd37d.17bd5c8","type":"debug","z":"e3115784.470e78","name":"","active":true,"tosidebar":true,"console":false,"tostatus":false,"complete":"payload","targetType":"msg","x":630,"y":760,"wires":[]}]

My thought is that this error is caused by the flow holding so much data in the Join node.
So I tried appending the XML data per iteration and delete the msg.payload before feeding the message to the Join node so it doesn't accumulate data.

Even still, I get the same memory error.

[{"id":"ff6dfcd9.3b485","type":"csv","z":"e3115784.470e78","name":"","sep":",","hdrin":true,"hdrout":"","multi":"one","ret":"\\n","temp":"","skip":"0","strings":true,"x":250,"y":1360,"wires":[["49eb118.9c789f"]]},{"id":"91d66d0a.47f51","type":"file in","z":"e3115784.470e78","name":"","filename":"D:\\\\input.csv","format":"utf8","chunk":false,"sendError":false,"encoding":"none","x":570,"y":1280,"wires":[["ff6dfcd9.3b485"]]},{"id":"16a1faf8.0c8f35","type":"inject","z":"e3115784.470e78","name":"","topic":"","payload":"","payloadType":"str","repeat":"","crontab":"","once":false,"onceDelay":0.1,"x":130,"y":1280,"wires":[["9a0195c4.e912a8"]]},{"id":"b160866c.7d43c8","type":"xml","z":"e3115784.470e78","name":"","property":"payload","attr":"","chr":"","x":590,"y":1360,"wires":[["33168309.80579c"]]},{"id":"cb8460d8.bf1ab","type":"file","z":"e3115784.470e78","name":"","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"false","encoding":"none","x":560,"y":1440,"wires":[["f169c443.e88388"]]},{"id":"abdd4fbf.9f166","type":"function","z":"e3115784.470e78","name":"add XML header","func":"var xmlheader = '<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>';\nmsg.payload = xmlheader + '<root>' + msg.payload + '</root>';\n\nreturn msg;","outputs":1,"noerr":0,"x":740,"y":1600,"wires":[["6d74331b.7d57fc"]]},{"id":"f3fd4235.24a64","type":"join","z":"e3115784.470e78","name":"","mode":"auto","build":"array","property":"payload","propertyType":"msg","key":"topic","joiner":"\\n","joinerType":"str","accumulate":false,"timeout":"","count":"","reduceRight":false,"reduceExp":"","reduceInit":"","reduceInitType":"","reduceFixup":"","x":590,"y":1520,"wires":[["c3abd32c.97362"]]},{"id":"6d74331b.7d57fc","type":"file","z":"e3115784.470e78","name":"","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"true","encoding":"none","x":960,"y":1600,"wires":[[]]},{"id":"f169c443.e88388","type":"change","z":"e3115784.470e78","name":"","rules":[{"t":"delete","p":"payload","pt":"msg"}],"action":"","property":"","from":"","to":"","reg":false,"x":410,"y":1520,"wires":[["f3fd4235.24a64"]]},{"id":"c3abd32c.97362","type":"file in","z":"e3115784.470e78","name":"","filename":"D:\\\\output.xml","format":"utf8","chunk":false,"sendError":false,"encoding":"none","x":520,"y":1600,"wires":[["abdd4fbf.9f166"]]},{"id":"9a0195c4.e912a8","type":"file","z":"e3115784.470e78","name":"Delete D:\\\\output.xml","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"delete","encoding":"none","x":340,"y":1280,"wires":[["91d66d0a.47f51"]]},{"id":"49eb118.9c789f","type":"function","z":"e3115784.470e78","name":"... processing ...","func":"// do stuff using objects from CSV data\n\nvar payload = {\n    'data' : msg.payload\n}\nmsg.payload = payload;\n\nreturn msg;","outputs":1,"noerr":0,"x":420,"y":1360,"wires":[["b160866c.7d43c8"]]},{"id":"33168309.80579c","type":"function","z":"e3115784.470e78","name":"remove XML header","func":"var xmlheader = '<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>';\nmsg.payload = msg.payload.replace(xmlheader, '');\n\nreturn msg;","outputs":1,"noerr":0,"x":340,"y":1440,"wires":[["cb8460d8.bf1ab"]]},{"id":"bb4ce364.d43c8","type":"comment","z":"e3115784.470e78","name":"get CSV data","info":"","x":770,"y":1280,"wires":[]},{"id":"1a7927c6.b817c8","type":"comment","z":"e3115784.470e78","name":"send message per CSV row data","info":"","x":830,"y":1360,"wires":[]},{"id":"b6e446bf.0fa5b8","type":"comment","z":"e3115784.470e78","name":"append each XML data to output file","info":"","x":840,"y":1440,"wires":[]},{"id":"ab51f488.fa9d88","type":"comment","z":"e3115784.470e78","name":"delete msg.payload to avoid storing huge data in Join node","info":"","x":910,"y":1520,"wires":[]},{"id":"f15bd66.af1e928","type":"comment","z":"e3115784.470e78","name":"put the XML header back in to the file","info":"","x":230,"y":1600,"wires":[]}]

I've read a separate post that has similar issue with node memory:

I think that this might just be the case.
If so, how should I process large files like this in Node-RED?

So I tried appending the XML data per iteration and delete the msg.payload before feeding the message to the Join node so it doesn't accumulate data.

If you want to iterate, you need to use output msg per line in the file-in node.
Still going to be heavy impact.

I don't think node-red is the right tool to perform these action on such a large csv, a csv that size should belong in a database where you can do all kinds of operations on the data and iterate more efficiently (if the data is indexed)

1 Like

Yes. Using msg per line on the input should allow this to work. You shouldn’t need the join hopefully ax you can also output a line at a time as well.
I have been processing video files extracting metadata no problem.

1 Like

Yes, I agree with that.
But for some reason, I am being asked to do this in Node-RED. :sweat_smile:

I was sure I tried this approach as well and still the error occurred.
But now I got it to work perfectly.

Thanks for the advice, @bakman2 and @dceejay!

In the off-chance someone needs the working flow, here it is:

[{"id":"df914794.c9f6e8","type":"file in","z":"2f489ad1.76f266","name":"","filename":"D:\\\\input.csv","format":"lines","chunk":false,"sendError":false,"encoding":"utf8","x":250,"y":460,"wires":[["328ac413.acc99c"]]},{"id":"8df35fa2.dbc47","type":"inject","z":"2f489ad1.76f266","name":"","topic":"","payload":"","payloadType":"str","repeat":"","crontab":"","once":false,"onceDelay":0.1,"x":190,"y":380,"wires":[["66f0794c.570df8"]]},{"id":"570fcc0b.84ea84","type":"xml","z":"2f489ad1.76f266","name":"","property":"payload","attr":"","chr":"","x":510,"y":640,"wires":[["6c9c1e41.ae2d6"]]},{"id":"799a9147.53447","type":"file","z":"2f489ad1.76f266","name":"","filename":"D:\\\\output.xml","appendNewline":true,"createDir":false,"overwriteFile":"false","encoding":"none","x":920,"y":640,"wires":[["7108c578.37529c"]]},{"id":"6c9c1e41.ae2d6","type":"function","z":"2f489ad1.76f266","name":"remove XML header","func":"var xmlheader = '<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>';\nmsg.payload = msg.payload.replace(xmlheader, '');\n\nreturn msg;","outputs":1,"noerr":0,"x":700,"y":640,"wires":[["799a9147.53447"]]},{"id":"a36b9cf8.344aa","type":"function","z":"2f489ad1.76f266","name":"... processing ...","func":"// do stuff using objects from CSV data\n\nvar payload = {\n    'data' : msg.payload\n}\nmsg.payload = payload;\n\nreturn msg;","outputs":1,"noerr":0,"x":340,"y":640,"wires":[["570fcc0b.84ea84"]]},{"id":"328ac413.acc99c","type":"function","z":"2f489ad1.76f266","name":"prepend columns","func":"if (msg.parts.index === 0) {\n    flow.set('columns', msg.payload);\n    return;\n}\n\nmsg.payload = flow.get('columns') + '\\r\\n' + msg.payload;\n\nreturn msg;","outputs":1,"noerr":0,"x":450,"y":460,"wires":[["74db8414.0aee8c"]]},{"id":"73a1bfc5.0dbdf","type":"change","z":"2f489ad1.76f266","name":"parts -> temp","rules":[{"t":"set","p":"temp","pt":"msg","to":"parts","tot":"msg"},{"t":"delete","p":"parts","pt":"msg"}],"action":"","property":"","from":"","to":"","reg":false,"x":290,"y":560,"wires":[["51b5ec4f.137694"]]},{"id":"526fe89.1624a18","type":"change","z":"2f489ad1.76f266","name":"temp -> parts","rules":[{"t":"set","p":"parts","pt":"msg","to":"temp","tot":"msg"},{"t":"delete","p":"temp","pt":"msg"}],"action":"","property":"","from":"","to":"","reg":false,"x":610,"y":560,"wires":[["a36b9cf8.344aa"]]},{"id":"51b5ec4f.137694","type":"csv","z":"2f489ad1.76f266","name":"","sep":",","hdrin":true,"hdrout":"","multi":"one","ret":"\\n","temp":"","skip":"0","strings":false,"x":450,"y":560,"wires":[["526fe89.1624a18"]]},{"id":"66f0794c.570df8","type":"change","z":"2f489ad1.76f266","name":"add XML header","rules":[{"t":"set","p":"payload","pt":"msg","to":"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><root>","tot":"str"}],"action":"","property":"","from":"","to":"","reg":false,"x":360,"y":380,"wires":[["a8f14a7e.c64508"]]},{"id":"a8f14a7e.c64508","type":"file","z":"2f489ad1.76f266","name":"","filename":"D:\\\\output.xml","appendNewline":true,"createDir":false,"overwriteFile":"true","encoding":"none","x":560,"y":380,"wires":[["df914794.c9f6e8"]]},{"id":"cd5bad53.87abc","type":"file","z":"2f489ad1.76f266","name":"","filename":"D:\\\\output.xml","appendNewline":false,"createDir":false,"overwriteFile":"false","encoding":"utf8","x":840,"y":720,"wires":[[]]},{"id":"2bb5a351.28925c","type":"change","z":"2f489ad1.76f266","name":"add XML footer","rules":[{"t":"set","p":"payload","pt":"msg","to":"</root>","tot":"str"}],"action":"","property":"","from":"","to":"","reg":false,"x":640,"y":720,"wires":[["cd5bad53.87abc"]]},{"id":"7108c578.37529c","type":"switch","z":"2f489ad1.76f266","name":"msg.complete is not null","property":"complete","propertyType":"msg","rules":[{"t":"nnull"}],"checkall":"true","repair":false,"outputs":1,"x":410,"y":720,"wires":[["2bb5a351.28925c"]]},{"id":"450cfa3.d6f2a04","type":"comment","z":"2f489ad1.76f266","name":"save and restore msg.parts because CSV node uses it","info":"","x":420,"y":520,"wires":[]},{"id":"74db8414.0aee8c","type":"function","z":"2f489ad1.76f266","name":"set msg.complete to last item","func":"if (msg.parts.index + 1 === msg.parts.count) {\n    msg.complete = true;\n}\n\nreturn msg;","outputs":1,"noerr":0,"x":700,"y":460,"wires":[["73a1bfc5.0dbdf"]]}]

When you use the file in node in per line mode it add a msg.parts property that includes the number of the line - and when complete, the total number of lines - so you could possibly use those a) to trigger adding the header - and then b) adding the tailer.

A post was split to a new topic: Converting CSV to mysql causes error with blank csv columns