Skip to content

Velox 开发环境搭建

1. 简介

Velox 是由 Meta 开源的一款基于 C++ 实现的通用向量执行引擎,旨在为各种数据系统(如查询引擎、分析数据库、ETL 工具等)提供高性能的底层执行能力。

Velox 的核心设计目标是提升大规模数据分析的性能和效率,同时通过模块化设计促进代码复用,避免各大系统重复“造轮子”。

文档参考

2. 开发环境搭建

根据项目 README 的介绍,在机器上直接构建,很容易遇到各种问题。目前尝试下来,最方便的方式是在容器环境中构建使用。官方提供了各种环境的镜像

我所使用的机器环境:

  • 零刻 SER8
  • CPU: AMD 锐龙 R7-8745HS (8核16线程)
  • RAM: 32G

Dockerfile 和构建命令参考如下:

Dockerfile
FROM ghcr.io/facebookincubator/velox-dev:ubuntu-22.04

ARG HTTP_PROXY
ARG HTTPS_PROXY

ENV HTTP_PROXY=${HTTP_PROXY}
ENV HTTPS_PROXY=${HTTPS_PROXY}

RUN apt install -y vim && \
    cd / && \
    rm -rf velox && \
    git clone https://github.com/facebookincubator/velox.git && \
    cd velox && \
    ./scripts/setup-ubuntu.sh && \
    make && \
    cmake --install _build/release && \
    cp -r /velox/velox/external/utf8proc /usr/local/include/velox/external/ && \
    rm -rf /velox/_build && \
    rm -rf /velox/deps-download

ENV HTTP_PROXY=""
ENV HTTPS_PROXY=""
shell
#!/bin/bash

proxy="http://192.168.31.2:1087"
tag=$(date +"%Y%m%d")

docker build \
  --build-arg HTTP_PROXY=${proxy} \
  --build-arg HTTPS_PROXY=${proxy} \
  -t velox-dev:${tag} .

TIP

  • 因为众所周知的网络原因,拉代码或者安装依赖的时候会比较慢,此时可以设置 http 代理。如果不需要可不设置。
  • 编译 Velox 比较耗资源,默认 make -j 参数是线程数 (getconf _NPROCESSORS_CONF),内存可能不够,可暂时调大 swap (在自己的环境中调到 32G 可正常编译不卡)
shell
# 关闭现有 swap
sudo swapoff -a

# 原有 swap 文件为 /swap.img,新建 /swapfile
sudo fallocate -l 32G /swapfile

# 设置权限
sudo chmod 600 /swapfile

# 格式化为 swap
sudo mkswap /swapfile

# 启用新的 swap
sudo swapon /swapfile

# 验证
sudo swapon --show
# 或
free -h

3. 测试

  • 相关代码放在 github
  • 也可以直接使用构建好的镜像 docker pull ghcr.io/syaning/velox-dev:20251018
cpp
#include <iostream>
#include "velox/common/memory/Memory.h"
#include "velox/vector/BaseVector.h"
#include "velox/vector/ComplexVector.h"
#include "velox/vector/ConstantVector.h"
#include "velox/vector/FlatVector.h"

using namespace facebook::velox;

int main()
{
    // init memory pool
    memory::MemoryManager::initialize({});
    auto memoryManager = memory::MemoryManager::getInstance();
    auto rootPool = memoryManager->addRootPool("root");
    auto leafPool = rootPool->addLeafChild("leaf");

    // create id vector with values 1 to 7
    auto id = BaseVector::create(INTEGER(), 7, leafPool.get());
    auto *idValues = id->asFlatVector<int32_t>()->mutableRawValues();
    for (vector_size_t i = 0; i < id->size(); ++i)
    {
        idValues[i] = static_cast<int32_t>(i + 1);
    }

    // create constant vector with value 42
    auto con =
        BaseVector::createConstant(INTEGER(), Variant(42), 7, leafPool.get());

    // create day_of_week vector with string values
    auto dow = BaseVector::create(VARCHAR(), 7, leafPool.get());
    auto *dowValues = dow->asFlatVector<StringView>()->mutableRawValues();
    std::vector<std::string> days = {
        "monday",
        "tuesday",
        "wednesday",
        "thursday",
        "friday",
        "saturday",
        "sunday"};
    for (vector_size_t i = 0; i < dow->size(); ++i)
    {
        dowValues[i] = StringView(days[i]);
    }

    // create row vector with the above three vectors as children
    std::vector<std::string> names = {"id", "constant_42", "day_of_week"};
    std::vector<TypePtr> types = {INTEGER(), INTEGER(), VARCHAR()};
    auto rowType = ROW(std::move(names), std::move(types));
    std::vector<VectorPtr> children = {id, con, dow};
    auto rowVector = std::make_shared<RowVector>(
        leafPool.get(), rowType, nullptr, id->size(), std::move(children));

    std::cout << rowVector->toString() << std::endl;
    std::cout << rowVector->toString(0, rowVector->size()) << std::endl;

    return 0;
}
cmake
cmake_minimum_required(VERSION 3.18)
project(velox_demo LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_FIND_LIBRARY_PREFERENCE "STATIC")
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a";".so")

find_path(VELOX_INCLUDE_DIR velox
          HINTS /usr/local/include)
find_library(VELOX_LIBRARY velox
             HINTS /usr/local/lib/velox
                   /usr/local/lib)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Velox REQUIRED
    VELOX_INCLUDE_DIR VELOX_LIBRARY)

if(VELOX_FOUND)
    add_library(velox UNKNOWN IMPORTED)
    set_target_properties(velox PROPERTIES
        IMPORTED_LOCATION ${VELOX_LIBRARY}
        INTERFACE_INCLUDE_DIRECTORIES ${VELOX_INCLUDE_DIR}
    )
endif()

add_executable(demo demo.cpp)
target_link_libraries(demo PRIVATE
    velox
    folly
    boost_context
    double-conversion
    event
    fmt
    gflags
    glog
    re2
    unwind
)
shell
#!/bin/bash

rm -rf build
mkdir build

cd build
cmake ..
cmake --build .

./demo