velox: Example similar to ParquetTpchTest.cpp is giving unresolved function error

Here is the below C++ code for setting up Test Fixture and following that is the manually written query plan for TPC-H query 1. This tailored based on existing example - https://github.com/facebookincubator/velox/blob/main/velox/dwio/parquet/tests/ParquetTpchTest.cpp Let me share the error messages as the code is quite long.

C++ exception with description "Cannot resolve function call: lte(DATE, DATE)" thrown in the test body.

I believe something is wrong with the Setup since the message says as simple as date comparison function is not available. @mbasmanova @majetideepak Any hints on what I might be doing wrong here . Thank you!

class TPCHBench : public testing::Test {
 protected:
  [[maybe_unused]] static void SetupTestSuite() {
    functions::prestosql::registerAllFunctions();
  }
  void SetUp() override {
    filesystems::registerLocalFileSystem();
    parquet::registerParquetReaderFactory();
    auto hiveConnector =
        connector::getConnectorFactory(
            connector::hive::HiveConnectorFactory::kHiveConnectorName)
            ->newConnector(kHiveConnectorId, nullptr);
    connector::registerConnector(hiveConnector);
  }
  [[maybe_unused]] static void TearDownTestSuite() {
    connector::unregisterConnector(kHiveConnectorId);
    facebook::velox::dwrf::unregisterDwrfReaderFactory();
    parquet::unregisterParquetReaderFactory();
  }

  void TearDown() override {
    connector::unregisterConnector(kHiveConnectorId);
    parquet::unregisterParquetReaderFactory();
  }

  void AddTPCHSplits(std::shared_ptr<TaskCursor> cursor, size_t query_idx) {
    const fs::path root_path(FLAGS_data);

    for (const auto& table : getTPCHQuery(query_idx).tables_referenced) {
      for (const auto& dir_entry : fs::directory_iterator{root_path / table.second}) {
        if (!(dir_entry.path().extension() == ".parquet")) {
          continue;
        }
        auto split = std::make_shared<HiveConnectorSplit>(
            kHiveConnectorId,
            std::string(dir_entry.path()),
            facebook::velox::dwio::common::FileFormat::PARQUET,
            0,
            std::numeric_limits<uint64_t>::max());
        cursor->task()->addSplit(table.first, exec::Split(folly::copy(split)));
      }
      cursor->task()->noMoreSplits(table.first);
    }
  }

 public:
  void RunQuery(size_t idx) {
    auto query = getTPCHQuery(idx);

    int nthreads = FLAGS_nthreads;
    if (nthreads == 0) {
      nthreads = std::thread::hardware_concurrency();
    }

      auto plan = query.plan_builder();

      CursorParameters params;
      params.maxDrivers = nthreads;
      params.numResultDrivers = 1;
      params.planNode = plan;

      auto cursor = std::make_shared<TaskCursor>(params);
      AddTPCHSplits(cursor, idx);
      std::vector<VectorPtr> results;
      int32_t numRead = 0;
      while (cursor->moveNext()) {
        auto vector = cursor->current();
        numRead += vector->size();
        results.push_back(vector);
      }

#if 1
      std::cout << "TYPE: " << plan->outputType()->toString() << std::endl;
      std::cout << "Results: \n"
                << results[0]->toString(0) << std::endl
                << results[0]->toString(1) << std::endl
                << results[0]->toString(2) << std::endl
                << results[0]->toString(3) << std::endl
                << std::endl;
#endif
    }
  }
};

Given below is the query I have

const TPCHQuery q1 = {
    idx : 1,
    tables_referenced : {
        {"0", "lineitem"}
    },
    columns_referenced :
        {
            {
                "lineitem",
                {
                    "l_shipdate",
                    "l_returnflag",
                    "l_linestatus",
                    "l_quantity",
                    "l_extendedprice",
                    "l_discount",
                    "l_tax"
                }
            }
        },
    plan_builder : []() {
      auto planNodeIdGenerator =
        std::make_shared<facebook::velox::exec::test::PlanNodeIdGenerator>(0);
      return PlanBuilder(planNodeIdGenerator)
          .localPartition(
              {},
              {PlanBuilder(planNodeIdGenerator)
                   .tableScan(getRowTypeForQuery(
                       "lineitem", getTPCHQuery(1)))
                   .filter("l_shipdate <= cast('1998-09-02' as DATE)")
                   .project(
                       {"l_returnflag",
                        "l_linestatus",
                        "l_quantity",
                        "l_extendedprice",
                        "l_extendedprice * (cast(1 as DOUBLE) - l_discount) AS disc_price",
                        "l_extendedprice * (cast(1 as DOUBLE) - l_discount) * (cast(1 as DOUBLE) + l_tax) AS charge",
                        "l_discount"})
                   .partialAggregation(
                       {0, 1},
                       {"sum(l_quantity)",
                        "sum(l_extendedprice)",
                        "sum(disc_price)",
                        "sum(charge)",
                        "avg(l_quantity)",
                        "avg(l_extendedprice)",
                        "avg(l_discount)",
                        "count(1)"})
                   .orderBy({0, 1}, {kAscNullsLast, kAscNullsLast}, true)
                   .planNode()})
          .finalAggregation(
              {0, 1},
              {"sum(a0) as sum_qty",
               "sum(a1) AS sum_base_price",
               "sum(a2) AS sum_disc_price",
               "sum(a3) AS sum_charge",
               "avg(a4) AS avg_qty",
               "avg(a5) AS avg_price",
               "avg(a6) AS avg_disc",
               "sum(a7) AS count_order"},
              {BIGINT(),
               DOUBLE(),
               DOUBLE(),
               DOUBLE(),
               DOUBLE(),
               DOUBLE(),
               DOUBLE(),
               BIGINT()})
          .orderBy({0, 1}, {kAscNullsLast, kAscNullsLast}, false)
          .planNode();
    }
};

About this issue

  • Original URL
  • State: closed
  • Created 2 years ago
  • Comments: 17 (15 by maintainers)

Commits related to this issue

Most upvoted comments

@9prady9 Happy to hear the problem is solved.

@mbasmanova Yes, that worked. Thank you!

I added .filter("l_shipdate <= cast('1998-09-02' as DATE)") to the ParquetTpchTest.cpp and it works. Standalone will help.