onnx-mlir: onnx-mlir killed while generating .so file for onnx.CategoryMapper with 65537+ string constants

On my environment, onnx-mlir is killed while generating .so file for onnx.CategoryMapper with 100K string constants.

$ ./test_categorymapper.sh
**** Generating test_categorymapper_100000-onnxbasic.mlir
**** Compiling test_categorymapper_100000-onnxbasic.mlir
time /home1/negishi/src/dlc.git/onnx-mlir.opt/build/Debug/bin/onnx-mlir -v -mcpu=z14 --EmitLib --preserveBitcode --preserveLLVMIR test_categorymapper_100000-onnxbasic.mlir
./test_categorymapper.sh: line 28: 4051033 Killed                  /home1/negishi/src/dlc.git/onnx-mlir.opt/build/Debug/bin/onnx-mlir -v -mcpu=z14 --EmitLib --preserveBitcode --preserveLLVMIR test_categorymapper_100000-onnxbasic.mlir

real    11m39.898s
user    7m16.200s
sys     1m33.217s

Please reproduce the issue by the following sh script, which generates the input file and compile it by onnx-mlir. (Please fix the line beginning with “ONNXMLIR=” to specify the onnx-mlir command in your environment.)


ELEMENTNUM=100000
INPUTFILE="test_categorymapper_$ELEMENTNUM-onnxbasic.mlir"

TIME="time"
ONNXMLIR="./build/Debug/bin/onnx-mlir"
ONNXMLIR="/home/negishi/src/dlc.git/onnx-mlir.opt/build/Debug/bin/onnx-mlir"
COMPILEOPTION="-v -mcpu=z14 --EmitLib --preserveBitcode --preserveLLVMIR"

echo "**** Generating $INPUTFILE"
(
echo "module attributes {} {"
echo "  func.func @main_graph(%arg0: tensor<?x!onnx.String>) -> (tensor<*xi64>) {"
echo -n "    %0 = \"onnx.CategoryMapper\"(%arg0) {cats_int64s = [0"
for a in `seq 1 $ELEMENTNUM`; do echo -n ", $a"; done
echo -n "], cats_strings = [\"0\""
for a in `seq 1 $ELEMENTNUM`; do echo -n ", \"$a\""; done
echo "], default_int64 = 74353 : si64, default_string = \"_Unused\", onnx_node_name = \"CategoryMapper_4\"} : (tensor<?x!onnx.String>) -> tensor<*xi64>"
echo "   return %0 : tensor<*xi64>"
echo "  }"
echo "  \"onnx.EntryPoint\"() {func = @main_graph} : () -> ()"
echo "}"
) > $INPUTFILE

echo "**** Compiling $INPUTFILE"
CMD="$TIME $ONNXMLIR $COMPILEOPTION $INPUTFILE"
echo "$CMD"
eval "$CMD"

About this issue

Most upvoted comments

Created a patch in LLVM to fix this issue: https://reviews.llvm.org/D148487

Good news: I can fix the memory issue. Bad news: we need to modify LLVM.

It is related to constant folding of InsertValue. If an array has N string, there would be N InsertValue to insert constant addresses in the the array… Each time InsertValue is folded, a new constant array of N elements are created and not freed. This issue is like the memory consumption issue in our compiler when doing constant propagation.

I temporarily fix this issue by this patch:

diff --git a/llvm/include/llvm/IR/ConstantFold.h b/llvm/include/llvm/IR/ConstantFold.h
index 77f5f0eb174a..713b5056991f 100644
--- a/llvm/include/llvm/IR/ConstantFold.h
+++ b/llvm/include/llvm/IR/ConstantFold.h
@@ -47,6 +47,9 @@ namespace llvm {
                                                 ArrayRef<unsigned> Idxs);
   Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
                                                ArrayRef<unsigned> Idxs);
+  Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
+                                               ArrayRef<unsigned> Idxs,
+                                               bool gc);
   Constant *ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V);
   Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
                                           Constant *V2);
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 59131a4264fc..a61ecb03e809 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -775,15 +775,17 @@ Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,

 Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
                                                    Constant *Val,
-                                                   ArrayRef<unsigned> Idxs) {
+                                                   ArrayRef<unsigned> Idxs,
+                                                   bool gc) {
   // Base case: no indices, so replace the entire value.
   if (Idxs.empty())
     return Val;

   unsigned NumElts;
-  if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+  if (StructType *ST = dyn_cast<StructType>(Agg->getType())) {
+    gc = false;
     NumElts = ST->getNumElements();
-  else
+  } else
     NumElts = cast<ArrayType>(Agg->getType())->getNumElements();

   SmallVector<Constant*, 32> Result;
@@ -792,16 +794,24 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     if (!C) return nullptr;

     if (Idxs[0] == i)
-      C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1));
+      C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1), gc);

     Result.push_back(C);
   }

   if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
     return ConstantStruct::get(ST, Result);
-  return ConstantArray::get(cast<ArrayType>(Agg->getType()), Result);
+
+  Constant *res = ConstantArray::get(cast<ArrayType>(Agg->getType()), Result);
+  if (gc && Agg->hasZeroLiveUses())
+    Agg->destroyConstant();
+  return res;
 }

+Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
+                                                   ArrayRef<unsigned> Idxs) {
+  return ConstantFoldInsertValueInstruction(Agg, Val, Idxs, true);
+}
 Constant *llvm::ConstantFoldUnaryInstruction(unsigned Opcode, Constant *C) {
   assert(Instruction::isUnaryOp(Opcode) && "Non-unary instruction detected");

By this patch we can compile @negiyas’s example in 9 minutes with around 1.6 GB memory comsumption:

tungld@53315fb59bc7:~/dl/string-array-llvm$ time ./fail.sh
**** Generating test_categorymapper_100000-onnxbasic.mlir
**** Compiling test_categorymapper_100000-onnxbasic.mlir
time /home/tungld/dl/onnx-mlir/build/Debug/bin/onnx-mlir -v -mcpu=z14 --EmitLib --preserveBitcode --preserveLLVMIR test_categorymapper_100000-onnxbasic.mlir
[/home/tungld/dl/string-array-llvm/]/home/tungld/dl/llvm-project/build/bin/opt: opt -O0 --mtriple=s390x-ibm-linux --mcpu=z14 -o test_categorymapper_100000-onnxbasic.bc test_categorymapper_100000-onnxbasic.unoptimized.bc
[/home/tungld/dl/string-array-llvm/]/home/tungld/dl/llvm-project/build/bin/llc: llc -O0 --mtriple=s390x-ibm-linux --mcpu=z14 -filetype=obj -relocation-model=pic -o test_categorymapper_100000-onnxbasic.o test_categorymapper_100000-onnxbasic.bc
[/home/tungld/dl/string-array-llvm/]/usr/bin/c++: c++ test_categorymapper_100000-onnxbasic.o -o test_categorymapper_100000-onnxbasic.so -shared -fPIC -L/home/tungld/dl/onnx-mlir/build/Debug/lib -lcruntime
Shared library test_categorymapper_100000-onnxbasic.so has been compiled.

real    9m28.690s
user    9m24.713s
sys     0m2.406s

real    9m29.545s
user    9m25.246s
sys     0m2.726s
Screenshot 2023-04-13 at 17 31 54

Will prepare a patch to LLVM.