velox: Example similar to ParquetTpchTest.cpp is giving unresolved function error
Here is the below C++ code for setting up Test Fixture and following that is the manually written query plan for TPC-H query 1. This tailored based on existing example - https://github.com/facebookincubator/velox/blob/main/velox/dwio/parquet/tests/ParquetTpchTest.cpp Let me share the error messages as the code is quite long.
C++ exception with description "Cannot resolve function call: lte(DATE, DATE)" thrown in the test body.
I believe something is wrong with the Setup since the message says as simple as date comparison function is not available. @mbasmanova @majetideepak Any hints on what I might be doing wrong here . Thank you!
class TPCHBench : public testing::Test {
protected:
[[maybe_unused]] static void SetupTestSuite() {
functions::prestosql::registerAllFunctions();
}
void SetUp() override {
filesystems::registerLocalFileSystem();
parquet::registerParquetReaderFactory();
auto hiveConnector =
connector::getConnectorFactory(
connector::hive::HiveConnectorFactory::kHiveConnectorName)
->newConnector(kHiveConnectorId, nullptr);
connector::registerConnector(hiveConnector);
}
[[maybe_unused]] static void TearDownTestSuite() {
connector::unregisterConnector(kHiveConnectorId);
facebook::velox::dwrf::unregisterDwrfReaderFactory();
parquet::unregisterParquetReaderFactory();
}
void TearDown() override {
connector::unregisterConnector(kHiveConnectorId);
parquet::unregisterParquetReaderFactory();
}
void AddTPCHSplits(std::shared_ptr<TaskCursor> cursor, size_t query_idx) {
const fs::path root_path(FLAGS_data);
for (const auto& table : getTPCHQuery(query_idx).tables_referenced) {
for (const auto& dir_entry : fs::directory_iterator{root_path / table.second}) {
if (!(dir_entry.path().extension() == ".parquet")) {
continue;
}
auto split = std::make_shared<HiveConnectorSplit>(
kHiveConnectorId,
std::string(dir_entry.path()),
facebook::velox::dwio::common::FileFormat::PARQUET,
0,
std::numeric_limits<uint64_t>::max());
cursor->task()->addSplit(table.first, exec::Split(folly::copy(split)));
}
cursor->task()->noMoreSplits(table.first);
}
}
public:
void RunQuery(size_t idx) {
auto query = getTPCHQuery(idx);
int nthreads = FLAGS_nthreads;
if (nthreads == 0) {
nthreads = std::thread::hardware_concurrency();
}
auto plan = query.plan_builder();
CursorParameters params;
params.maxDrivers = nthreads;
params.numResultDrivers = 1;
params.planNode = plan;
auto cursor = std::make_shared<TaskCursor>(params);
AddTPCHSplits(cursor, idx);
std::vector<VectorPtr> results;
int32_t numRead = 0;
while (cursor->moveNext()) {
auto vector = cursor->current();
numRead += vector->size();
results.push_back(vector);
}
#if 1
std::cout << "TYPE: " << plan->outputType()->toString() << std::endl;
std::cout << "Results: \n"
<< results[0]->toString(0) << std::endl
<< results[0]->toString(1) << std::endl
<< results[0]->toString(2) << std::endl
<< results[0]->toString(3) << std::endl
<< std::endl;
#endif
}
}
};
Given below is the query I have
const TPCHQuery q1 = {
idx : 1,
tables_referenced : {
{"0", "lineitem"}
},
columns_referenced :
{
{
"lineitem",
{
"l_shipdate",
"l_returnflag",
"l_linestatus",
"l_quantity",
"l_extendedprice",
"l_discount",
"l_tax"
}
}
},
plan_builder : []() {
auto planNodeIdGenerator =
std::make_shared<facebook::velox::exec::test::PlanNodeIdGenerator>(0);
return PlanBuilder(planNodeIdGenerator)
.localPartition(
{},
{PlanBuilder(planNodeIdGenerator)
.tableScan(getRowTypeForQuery(
"lineitem", getTPCHQuery(1)))
.filter("l_shipdate <= cast('1998-09-02' as DATE)")
.project(
{"l_returnflag",
"l_linestatus",
"l_quantity",
"l_extendedprice",
"l_extendedprice * (cast(1 as DOUBLE) - l_discount) AS disc_price",
"l_extendedprice * (cast(1 as DOUBLE) - l_discount) * (cast(1 as DOUBLE) + l_tax) AS charge",
"l_discount"})
.partialAggregation(
{0, 1},
{"sum(l_quantity)",
"sum(l_extendedprice)",
"sum(disc_price)",
"sum(charge)",
"avg(l_quantity)",
"avg(l_extendedprice)",
"avg(l_discount)",
"count(1)"})
.orderBy({0, 1}, {kAscNullsLast, kAscNullsLast}, true)
.planNode()})
.finalAggregation(
{0, 1},
{"sum(a0) as sum_qty",
"sum(a1) AS sum_base_price",
"sum(a2) AS sum_disc_price",
"sum(a3) AS sum_charge",
"avg(a4) AS avg_qty",
"avg(a5) AS avg_price",
"avg(a6) AS avg_disc",
"sum(a7) AS count_order"},
{BIGINT(),
DOUBLE(),
DOUBLE(),
DOUBLE(),
DOUBLE(),
DOUBLE(),
DOUBLE(),
BIGINT()})
.orderBy({0, 1}, {kAscNullsLast, kAscNullsLast}, false)
.planNode();
}
};
About this issue
- Original URL
- State: closed
- Created 2 years ago
- Comments: 17 (15 by maintainers)
@9prady9 Happy to hear the problem is solved.
@mbasmanova Yes, that worked. Thank you!
I added
.filter("l_shipdate <= cast('1998-09-02' as DATE)")
to the ParquetTpchTest.cpp and it works. Standalone will help.