Appearance
1. 简介
Velox 是由 Meta 开源的一款基于 C++ 实现的通用向量执行引擎,旨在为各种数据系统(如查询引擎、分析数据库、ETL 工具等)提供高性能的底层执行能力。
Velox 的核心设计目标是提升大规模数据分析的性能和效率,同时通过模块化设计促进代码复用,避免各大系统重复“造轮子”。
文档参考。
2. 开发环境搭建
根据项目 README 的介绍,在机器上直接构建,很容易遇到各种问题。目前尝试下来,最方便的方式是在容器环境中构建使用。官方提供了各种环境的镜像。
我所使用的机器环境:
- 零刻 SER8
- CPU: AMD 锐龙 R7-8745HS (8核16线程)
- RAM: 32G
Dockerfile 和构建命令参考如下:
Dockerfile
FROM ghcr.io/facebookincubator/velox-dev:ubuntu-22.04
ARG HTTP_PROXY
ARG HTTPS_PROXY
ENV HTTP_PROXY=${HTTP_PROXY}
ENV HTTPS_PROXY=${HTTPS_PROXY}
RUN apt install -y vim && \
cd / && \
rm -rf velox && \
git clone https://github.com/facebookincubator/velox.git && \
cd velox && \
./scripts/setup-ubuntu.sh && \
make && \
cmake --install _build/release && \
cp -r /velox/velox/external/utf8proc /usr/local/include/velox/external/ && \
rm -rf /velox/_build && \
rm -rf /velox/deps-download
ENV HTTP_PROXY=""
ENV HTTPS_PROXY=""
shell
#!/bin/bash
proxy="http://192.168.31.2:1087"
tag=$(date +"%Y%m%d")
docker build \
--build-arg HTTP_PROXY=${proxy} \
--build-arg HTTPS_PROXY=${proxy} \
-t velox-dev:${tag} .
TIP
- 因为众所周知的网络原因,拉代码或者安装依赖的时候会比较慢,此时可以设置 http 代理。如果不需要可不设置。
- 编译 Velox 比较耗资源,默认
make -j
参数是线程数 (getconf _NPROCESSORS_CONF
),内存可能不够,可暂时调大 swap (在自己的环境中调到 32G 可正常编译不卡)
shell
# 关闭现有 swap
sudo swapoff -a
# 原有 swap 文件为 /swap.img,新建 /swapfile
sudo fallocate -l 32G /swapfile
# 设置权限
sudo chmod 600 /swapfile
# 格式化为 swap
sudo mkswap /swapfile
# 启用新的 swap
sudo swapon /swapfile
# 验证
sudo swapon --show
# 或
free -h
3. 测试
- 相关代码放在 github 上
- 也可以直接使用构建好的镜像
docker pull ghcr.io/syaning/velox-dev:20251018
cpp
#include <iostream>
#include "velox/common/memory/Memory.h"
#include "velox/vector/BaseVector.h"
#include "velox/vector/ComplexVector.h"
#include "velox/vector/ConstantVector.h"
#include "velox/vector/FlatVector.h"
using namespace facebook::velox;
int main()
{
// init memory pool
memory::MemoryManager::initialize({});
auto memoryManager = memory::MemoryManager::getInstance();
auto rootPool = memoryManager->addRootPool("root");
auto leafPool = rootPool->addLeafChild("leaf");
// create id vector with values 1 to 7
auto id = BaseVector::create(INTEGER(), 7, leafPool.get());
auto *idValues = id->asFlatVector<int32_t>()->mutableRawValues();
for (vector_size_t i = 0; i < id->size(); ++i)
{
idValues[i] = static_cast<int32_t>(i + 1);
}
// create constant vector with value 42
auto con =
BaseVector::createConstant(INTEGER(), Variant(42), 7, leafPool.get());
// create day_of_week vector with string values
auto dow = BaseVector::create(VARCHAR(), 7, leafPool.get());
auto *dowValues = dow->asFlatVector<StringView>()->mutableRawValues();
std::vector<std::string> days = {
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday"};
for (vector_size_t i = 0; i < dow->size(); ++i)
{
dowValues[i] = StringView(days[i]);
}
// create row vector with the above three vectors as children
std::vector<std::string> names = {"id", "constant_42", "day_of_week"};
std::vector<TypePtr> types = {INTEGER(), INTEGER(), VARCHAR()};
auto rowType = ROW(std::move(names), std::move(types));
std::vector<VectorPtr> children = {id, con, dow};
auto rowVector = std::make_shared<RowVector>(
leafPool.get(), rowType, nullptr, id->size(), std::move(children));
std::cout << rowVector->toString() << std::endl;
std::cout << rowVector->toString(0, rowVector->size()) << std::endl;
return 0;
}
cmake
cmake_minimum_required(VERSION 3.18)
project(velox_demo LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_FIND_LIBRARY_PREFERENCE "STATIC")
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a";".so")
find_path(VELOX_INCLUDE_DIR velox
HINTS /usr/local/include)
find_library(VELOX_LIBRARY velox
HINTS /usr/local/lib/velox
/usr/local/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Velox REQUIRED
VELOX_INCLUDE_DIR VELOX_LIBRARY)
if(VELOX_FOUND)
add_library(velox UNKNOWN IMPORTED)
set_target_properties(velox PROPERTIES
IMPORTED_LOCATION ${VELOX_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${VELOX_INCLUDE_DIR}
)
endif()
add_executable(demo demo.cpp)
target_link_libraries(demo PRIVATE
velox
folly
boost_context
double-conversion
event
fmt
gflags
glog
re2
unwind
)
shell
#!/bin/bash
rm -rf build
mkdir build
cd build
cmake ..
cmake --build .
./demo