Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
285 commits
Select commit Hold shift + click to select a range
b9587c9
abs
cpegeric Jan 8, 2026
0b9cc62
bug fix
cpegeric Jan 8, 2026
a2beaf7
inner product
cpegeric Jan 8, 2026
13c9b58
negative dot product
cpegeric Jan 8, 2026
aed8a33
cosine distance
cpegeric Jan 8, 2026
3ee83a6
bug fix
cpegeric Jan 8, 2026
098f41f
cosine similarity
cpegeric Jan 8, 2026
cdf2d48
bug fix
cpegeric Jan 8, 2026
b6e3335
spherical distance
cpegeric Jan 8, 2026
94fc9da
Merge branch 'main' into archsimd
cpegeric Jan 8, 2026
c7bc47a
check AVX
cpegeric Jan 8, 2026
107ba73
cleanup
cpegeric Jan 8, 2026
45560a3
bug fix
cpegeric Jan 8, 2026
c5043c9
Merge branch 'main' into archsimd
mergify[bot] Jan 8, 2026
7174862
enable simd only with go1.26+
cpegeric Jan 9, 2026
94fea4c
Merge branch 'archsimd' of github.com:cpegeric/matrixone into archsimd
cpegeric Jan 9, 2026
7cc576f
goexperiment.simd
cpegeric Jan 9, 2026
757606d
add benchmark
cpegeric Jan 9, 2026
3192a12
update benchmark
cpegeric Jan 9, 2026
5ffcdc3
abs function
cpegeric Jan 9, 2026
ab2d574
gofmt
cpegeric Jan 9, 2026
9425af1
add float32 test
cpegeric Jan 9, 2026
dc2a6e8
float64 test
cpegeric Jan 9, 2026
a3e7972
disable goexperiment=simd
cpegeric Jan 15, 2026
9b0752f
fix include path for usearch
cpegeric Feb 5, 2026
0c2f15b
fix ut
cpegeric Feb 5, 2026
7da35f9
add stream
cpegeric Feb 6, 2026
6818845
add worker
cpegeric Feb 7, 2026
ba0b62e
gofmt
cpegeric Feb 7, 2026
f6e3cdc
worker pool
cpegeric Feb 7, 2026
863c93c
remove init()
cpegeric Feb 7, 2026
1d9b72e
close channel in Stop
cpegeric Feb 7, 2026
75c8ae6
sigterm and sigint
cpegeric Feb 7, 2026
2d42d79
bug fix sigterm thread not stop
cpegeric Feb 7, 2026
4c25e19
sigterm test case
cpegeric Feb 7, 2026
e2f98ef
keepalive
cpegeric Feb 7, 2026
a29b0d3
cleanup
cpegeric Feb 7, 2026
acd31bd
stopfn
cpegeric Feb 7, 2026
2f0faef
brute-force with cuvs worker
cpegeric Feb 7, 2026
0d0c771
two ivf index will crash
cpegeric Feb 7, 2026
2ad2267
better error handling
cpegeric Feb 9, 2026
a7e3bce
bug fix check error
cpegeric Feb 9, 2026
7766829
better error handling
cpegeric Feb 9, 2026
a3bdddd
error handling
cpegeric Feb 9, 2026
9bef5ba
task result store use per-job channel to wait
cpegeric Feb 10, 2026
0cba40a
bug fix test
cpegeric Feb 10, 2026
87abd66
setting nthread to brute-force search
cpegeric Feb 10, 2026
1ae4c33
Merge branch 'main' into gpu_cuvsworker
cpegeric Feb 10, 2026
ee4cff7
always return result first even stopped
cpegeric Feb 10, 2026
be5a74e
cleanup
cpegeric Feb 10, 2026
3e172c3
cuvs must be LockOSThread with go routine
cpegeric Feb 11, 2026
2ceac55
gpu clusterer with cuvsworker
cpegeric Feb 11, 2026
3c39129
update
cpegeric Feb 11, 2026
9ca3046
disable gpu brute force index
cpegeric Feb 11, 2026
4e00002
bug fix
cpegeric Feb 12, 2026
1941d88
bug fix
cpegeric Feb 12, 2026
27b20d6
add cuvs cpp
cpegeric Feb 13, 2026
d2af75d
relocation
cpegeric Feb 13, 2026
51ecdb3
destructor
cpegeric Feb 13, 2026
6f2e395
change namespace
cpegeric Feb 13, 2026
4041cb1
change namespace
cpegeric Feb 13, 2026
973cc2a
change namespace
cpegeric Feb 13, 2026
5849207
suppress compiler warning
cpegeric Feb 13, 2026
3b47589
cleanup
cpegeric Feb 13, 2026
3a85f56
flatten vector
cpegeric Feb 13, 2026
38885d0
search with flattened vector
cpegeric Feb 16, 2026
2ee37e8
flattened vector in hostdataset
cpegeric Feb 16, 2026
be5efd7
shared mutex
cpegeric Feb 16, 2026
66a4f34
go and c interface
cpegeric Feb 16, 2026
267e516
bug fix shared mutex in Submit
cpegeric Feb 16, 2026
f149703
generate .a and .so
cpegeric Feb 16, 2026
dbf7501
Merge branch 'main' into archsimd
Feb 27, 2026
600d236
able to compile
Feb 27, 2026
349e415
merge fix
cpegeric Mar 2, 2026
c75c1ec
brute force index
cpegeric Mar 2, 2026
ddb2da8
refactor with flattened array
cpegeric Mar 2, 2026
a8d62a3
refactor cusv_worker
cpegeric Mar 2, 2026
39abb25
errmsg
cpegeric Mar 2, 2026
0cda120
errmsg
cpegeric Mar 2, 2026
aef6a37
ivfflat
cpegeric Mar 2, 2026
f355f6f
sync
cpegeric Mar 2, 2026
810607e
sharded ivfflat index
cpegeric Mar 2, 2026
348a87a
bug fix raft resource
cpegeric Mar 2, 2026
ab05746
sharded ivfflat index
cpegeric Mar 2, 2026
90abc79
add tests
cpegeric Mar 2, 2026
05610d0
helper
cpegeric Mar 2, 2026
74d6203
cagra
cpegeric Mar 2, 2026
3aacb04
support multiple data type
cpegeric Mar 2, 2026
6401eab
cleanup
cpegeric Mar 2, 2026
ac422f6
cleanup
cpegeric Mar 2, 2026
44b0a31
convert float32 to float16
cpegeric Mar 2, 2026
9096492
better float32 to float16 convsersion
cpegeric Mar 2, 2026
dac3337
extend and merge for cagra
cpegeric Mar 2, 2026
6808488
change package cuvs to mocuvs
cpegeric Mar 2, 2026
fbf0840
runtime.KeepAlive
cpegeric Mar 2, 2026
9945fcb
rename function to lowercase
cpegeric Mar 2, 2026
34eddc3
rename function to lowercase
cpegeric Mar 2, 2026
e14eac2
merge sharded and single gpu index
cpegeric Mar 2, 2026
1ba6f93
better checking snmg_handle
cpegeric Mar 2, 2026
1f02e69
rename gpu_ivf_flat_index to gpu_ivf_flat
cpegeric Mar 2, 2026
01d7e1e
rename
cpegeric Mar 2, 2026
24250b9
kmeans
cpegeric Mar 3, 2026
4879e9a
balanced kmeans
cpegeric Mar 3, 2026
4b48a60
build_params and search_params
cpegeric Mar 3, 2026
417e6bd
cpp cuvs_types
cpegeric Mar 3, 2026
34e1a98
add params
cpegeric Mar 3, 2026
f6e9616
include cpp for header
cpegeric Mar 3, 2026
cb171d3
remove ../cpp
cpegeric Mar 3, 2026
471f3f3
relocate
cpegeric Mar 3, 2026
1540501
fix test error
cpegeric Mar 3, 2026
77052b3
integrate to use cgo cuvs index
cpegeric Mar 3, 2026
54894e9
add tests
cpegeric Mar 3, 2026
8b68171
compile
cpegeric Mar 3, 2026
7ebe95a
copy .so
cpegeric Mar 4, 2026
c09ee19
rename to libmo_c
cpegeric Mar 4, 2026
8823616
fix linker in darwin
cpegeric Mar 4, 2026
50a2266
bug fix save the dataset pointer and only delete at the end. index o…
cpegeric Mar 4, 2026
ae27f31
Merge branch 'main' into gpu_cuvsworker
cpegeric Mar 4, 2026
e53a3a6
update distance type
cpegeric Mar 4, 2026
7815430
use moerr
cpegeric Mar 4, 2026
260ae1a
benchmark for bruteforce index
cpegeric Mar 4, 2026
3994578
enable gpu brute force index
cpegeric Mar 4, 2026
3adb0de
fix Makefile
cpegeric Mar 4, 2026
1ec5323
default params
cpegeric Mar 4, 2026
5f0cf17
default params in test
cpegeric Mar 4, 2026
ef0c7cd
update README
cpegeric Mar 4, 2026
1163c48
add license and comment
cpegeric Mar 4, 2026
b43fe58
Merge branch 'main' into archsimd
cpegeric Mar 4, 2026
d7dadaf
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 4, 2026
da2dfce
license
cpegeric Mar 4, 2026
974cbdf
bug fix revert to lmo
cpegeric Mar 4, 2026
8db7109
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 5, 2026
13a370b
remove test
cpegeric Mar 5, 2026
29e2e3f
remove test
cpegeric Mar 5, 2026
1bd9a37
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 5, 2026
45c498c
ld library path
cpegeric Mar 5, 2026
1132bc5
add rapids_logger
cpegeric Mar 5, 2026
bc5d868
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 5, 2026
0238107
loop unrolling
cpegeric Mar 5, 2026
0f47209
loop unrolling
cpegeric Mar 5, 2026
f874029
more loop unrolling
cpegeric Mar 5, 2026
c106b55
more loop unrolling
cpegeric Mar 5, 2026
06b00c6
more loop unrolling
cpegeric Mar 5, 2026
25c6245
optimize for zen 2
cpegeric Mar 5, 2026
49e1b3f
optimize for zen 2 inline and unsafe
cpegeric Mar 5, 2026
647c06c
return to 4x loop unrolling
cpegeric Mar 5, 2026
9ccf911
remove cuvs from async worker pool
cpegeric Mar 6, 2026
a402856
async worker pool
cpegeric Mar 6, 2026
bab3b88
check nil callback function
cpegeric Mar 6, 2026
365f968
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 6, 2026
2e40c5b
darwin support
cpegeric Mar 6, 2026
24f7048
remove cuvs
cpegeric Mar 6, 2026
222b56b
remove cuvs
cpegeric Mar 6, 2026
5c1e7cc
bug fix ivfflat search slow table scan
cpegeric Mar 6, 2026
abc7929
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 6, 2026
6327006
sample
Mar 6, 2026
3bfa022
Merge branch 'gpu_cuvsworker' into archsimd
Mar 6, 2026
02cbd0b
balanced kmeans
cpegeric Mar 6, 2026
a19ba58
C.malloc for kmeans
cpegeric Mar 7, 2026
f8869a0
Merge branch 'balanced_kmeans' into archsimd
Mar 7, 2026
da975c5
sample
Mar 7, 2026
cfed566
sync.Pool Product l2
cpegeric Mar 7, 2026
938d4c6
bug fix cap < n pool.Put back the memory
cpegeric Mar 7, 2026
045d688
bug fix cap < n pool.Put back the memory
cpegeric Mar 7, 2026
43fb190
bug fix cap < n pool.Put back the memory
cpegeric Mar 7, 2026
f875146
sync.Pool for brute-force index
cpegeric Mar 7, 2026
3417216
sync pool the dataset
cpegeric Mar 7, 2026
bbd0477
gpu brute force index use sync.Pool
cpegeric Mar 8, 2026
f4d4d01
sync.Pool and C.malloc
cpegeric Mar 9, 2026
46bb110
remove merge small centroids
cpegeric Mar 9, 2026
c724dee
lmo
cpegeric Mar 9, 2026
8335446
Merge branch 'gpu_cuvsworker' into balanced_kmeans
cpegeric Mar 9, 2026
7d9fb3a
cherry pick
cpegeric Mar 9, 2026
09d02c5
Merge branch 'main' into ivf_escapeheap
mergify[bot] Mar 9, 2026
b035516
merge fix
cpegeric Mar 9, 2026
228afca
Merge branch 'gpu_cuvsworker' into balanced_kmeans
cpegeric Mar 9, 2026
efe3a34
Merge branch 'balanced_kmeans' into archsimd
cpegeric Mar 9, 2026
18a65ff
update gpu
cpegeric Mar 9, 2026
b49ea80
bug fix kmeans
cpegeric Mar 9, 2026
e47845e
fix select count with version
cpegeric Mar 9, 2026
5f23108
merge fix
cpegeric Mar 9, 2026
f79efb3
zero out the memory before put to sync.Pool
cpegeric Mar 9, 2026
53f8e7a
balanced kmeans
cpegeric Mar 9, 2026
6f10ae6
fix sca
cpegeric Mar 9, 2026
3b7254f
sca test
cpegeric Mar 9, 2026
3675de6
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 9, 2026
9af53b8
bug fix u16 pool
cpegeric Mar 9, 2026
843ba67
limit sample percent between 0 and 100
cpegeric Mar 9, 2026
226e2c5
Merge branch 'gpu_cuvsworker' of github.com:cpegeric/matrixone into g…
cpegeric Mar 9, 2026
1af76c5
limit sample percent between 0 and 100
cpegeric Mar 9, 2026
7cc5bb3
go fmt
cpegeric Mar 9, 2026
d4630e2
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 9, 2026
b69d512
go fmt
cpegeric Mar 9, 2026
28469b7
go fmt
cpegeric Mar 9, 2026
caa06ba
sca
cpegeric Mar 9, 2026
deb4202
revise test
cpegeric Mar 9, 2026
0a4574e
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 9, 2026
13bd93a
Merge branch 'ivf_escapeheap' into balanced_kmeans_v3
cpegeric Mar 9, 2026
773eea3
sca
cpegeric Mar 9, 2026
c15c4db
update tests
cpegeric Mar 9, 2026
60dc973
fix make ut
cpegeric Mar 9, 2026
5a280ba
bvt tests
cpegeric Mar 9, 2026
81b05bd
ld library path
cpegeric Mar 9, 2026
d0d4e4f
fix seed
cpegeric Mar 9, 2026
5079e67
async worker pool race condition
cpegeric Mar 9, 2026
97479ee
check context
cpegeric Mar 9, 2026
1d8325f
bvt test
cpegeric Mar 9, 2026
55402f3
run_ut.sh
cpegeric Mar 10, 2026
dec4d7f
use CAllocator
cpegeric Mar 10, 2026
6bc92e6
merge fix
cpegeric Mar 10, 2026
3dd3b13
merge balanced kmeans
cpegeric Mar 10, 2026
473642f
default to use go brute force index
cpegeric Mar 10, 2026
22ea33a
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 10, 2026
2c3f367
gpu remove sync.pool
cpegeric Mar 10, 2026
84ceb5f
remove partial
cpegeric Mar 10, 2026
ae54552
merge fix
cpegeric Mar 10, 2026
0583fec
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 10, 2026
e09b50f
go fmt
cpegeric Mar 10, 2026
64cdb1a
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 10, 2026
82c0d89
merge
cpegeric Mar 10, 2026
cff0eeb
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 10, 2026
772f9d6
Merge branch 'ivf_escapeheap' into balanced_kmeans_v3
cpegeric Mar 10, 2026
9316375
normalized centroid
cpegeric Mar 10, 2026
4b8cc4c
cleanup malloc
cpegeric Mar 10, 2026
f43965f
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 10, 2026
f6e2b60
remove signal handler from C++
cpegeric Mar 10, 2026
88fc7d1
fast max heap
cpegeric Mar 10, 2026
96d8387
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 10, 2026
6887b7f
go fmt
cpegeric Mar 10, 2026
ff291b9
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 10, 2026
c13d604
Merge branch 'balanced_kmeans_v3' into archsimd
cpegeric Mar 10, 2026
cce3bd0
go fmt
cpegeric Mar 10, 2026
877ff41
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 10, 2026
7d3de29
go fmt
cpegeric Mar 11, 2026
1c9151b
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 11, 2026
43751b2
Merge branch 'main' into ivf_escapeheap
cpegeric Mar 11, 2026
bb50776
Merge branch 'main' into gpu_cuvsworker
cpegeric Mar 11, 2026
ea0557f
Merge branch 'main' into balanced_kmeans_v3
cpegeric Mar 11, 2026
106f745
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 11, 2026
d64c36a
Merge branch 'balanced_kmeans_v3' into archsimd
cpegeric Mar 11, 2026
53f968e
only use archsimd when AVX512 available otherwise fallback to loop un…
cpegeric Mar 11, 2026
50564fb
split by dataset
cpegeric Mar 11, 2026
ee7a75d
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 11, 2026
0afa220
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 11, 2026
f996a50
Revert "split by dataset"
cpegeric Mar 11, 2026
227e82f
Merge branch 'ivf_escapeheap' into gpu_cuvsworker
cpegeric Mar 11, 2026
e39b77d
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 11, 2026
a6022c8
single thread run in current thread
cpegeric Mar 11, 2026
ea90d7c
add centroid search test
cpegeric Mar 11, 2026
ff14d84
Merge branch 'gpu_cuvsworker' into archsimd
cpegeric Mar 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 45 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,23 @@
# % MO_CL_CUDA=1 make

# where am I
ifeq ($(GO),)
GO=go
endif

ROOT_DIR = $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
BIN_NAME := mo-service
UNAME_S := $(shell uname -s | tr A-Z a-z)
UNAME_M := $(shell uname -m)
GOPATH := $(shell go env GOPATH)
GO_VERSION=$(shell go version)
GOPATH := $(shell $(GO) env GOPATH)
GO_VERSION=$(shell $(GO) version)
BRANCH_NAME=$(shell git rev-parse --abbrev-ref HEAD)
LAST_COMMIT_ID=$(shell git rev-parse --short HEAD)
BUILD_TIME=$(shell date +%s)
MO_VERSION=$(shell git symbolic-ref -q --short HEAD || git describe --tags --exact-match)
GO_MODULE=$(shell go list -m)
GO_MODULE=$(shell $(GO) list -m)
GO_MAJOR_VERSION = $(shell $(GO) version | cut -c 14- | cut -d' ' -f1 | cut -d'.' -f1)
GO_MINOR_VERSION = $(shell $(GO) version | cut -c 14- | cut -d' ' -f1 | cut -d'.' -f2)

# check the MUSL_TARGET from https://musl.cc
# make MUSL_TARGET=aarch64-linux musl to cross make the aarch64 linux executable
Expand All @@ -78,6 +84,7 @@ ifneq ($(GOARCH)$(TARGET_ARCH)$(GOOS)$(TARGET_OS),)
$(error cross compilation has been disabled)
endif


###############################################################################
# default target
###############################################################################
Expand Down Expand Up @@ -151,8 +158,8 @@ help:

.PHONY: vendor-build
vendor-build:
$(info [go mod vendor])
@go mod vendor
$(info [$(GO) mod vendor])
@$(GO) mod vendor

###############################################################################
# code generation
Expand All @@ -161,7 +168,7 @@ vendor-build:
.PHONY: config
config:
$(info [Create build config])
@go mod tidy
@$(GO) mod tidy

.PHONY: generate-pb
generate-pb:
Expand All @@ -178,37 +185,55 @@ pb: vendor-build generate-pb fmt

VERSION_INFO :=-X '$(GO_MODULE)/pkg/version.GoVersion=$(GO_VERSION)' -X '$(GO_MODULE)/pkg/version.BranchName=$(BRANCH_NAME)' -X '$(GO_MODULE)/pkg/version.CommitID=$(LAST_COMMIT_ID)' -X '$(GO_MODULE)/pkg/version.BuildTime=$(BUILD_TIME)' -X '$(GO_MODULE)/pkg/version.Version=$(MO_VERSION)'
THIRDPARTIES_INSTALL_DIR=$(ROOT_DIR)/thirdparties/install
CGO_DIR=$(ROOT_DIR)/cgo
RACE_OPT :=
DEBUG_OPT :=
CGO_DEBUG_OPT :=
TAGS :=
GOTAGS :=
GOEXPERIMENT_OPT :=

ifeq ("$(UNAME_M)", "x86_64")
ifeq ($(shell expr $(GO_MAJOR_VERSION) \>= 1), 1)
ifeq ($(shell expr $(GO_MINOR_VERSION) \>= 26), 1)
#GOEXPERIMENT_OPT=GOEXPERIMENT=simd
endif
endif
ifneq ($(GOAMD64),)
GOEXPERIMENT_OPT+=GOAMD64=$(GOAMD64)
endif
endif

ifeq ($(MO_CL_CUDA),1)
ifeq ($(CONDA_PREFIX),)
$(error CONDA_PREFIX env variable not found.)
endif
CUVS_CFLAGS := -I$(CONDA_PREFIX)/include
CUVS_LDFLAGS := -L$(CONDA_PREFIX)/envs/go/lib -lcuvs -lcuvs_c
CUVS_LDFLAGS := -L$(CONDA_PREFIX)/lib -lcuvs -lcuvs_c
CUDA_CFLAGS := -I/usr/local/cuda/include $(CUVS_CFLAGS)
CUDA_LDFLAGS := -L/usr/local/cuda/lib64/stubs -lcuda -L/usr/local/cuda/lib64 -lcudart $(CUVS_LDFLAGS) -lstdc++
TAGS += -tags "gpu"
TAGS += gpu
endif

ifeq ($(TYPECHECK),1)
TAGS += -tags "typecheck"
TAGS += typecheck
endif

CGO_OPTS :=CGO_CFLAGS="-I$(THIRDPARTIES_INSTALL_DIR)/include $(CUDA_CFLAGS)"
GOLDFLAGS=-ldflags="-extldflags '$(CUDA_LDFLAGS) -L$(THIRDPARTIES_INSTALL_DIR)/lib -Wl,-rpath,\$${ORIGIN}/lib -fopenmp' $(VERSION_INFO)"
CGO_OPTS :=CGO_CFLAGS="-I$(CGO_DIR) -I$(THIRDPARTIES_INSTALL_DIR)/include $(CUDA_CFLAGS)"
GOLDFLAGS=-ldflags="-extldflags '$(CUDA_LDFLAGS) -L$(CGO_DIR) -lmo -L$(THIRDPARTIES_INSTALL_DIR)/lib -Wl,-rpath,\$${ORIGIN}/lib -fopenmp' $(VERSION_INFO)"

ifeq ("$(UNAME_S)","darwin")
GOLDFLAGS:=-ldflags="-extldflags '-L$(THIRDPARTIES_INSTALL_DIR)/lib -Wl,-rpath,@executable_path/lib' $(VERSION_INFO)"
GOLDFLAGS:=-ldflags="-extldflags '-L$(CGO_DIR) -lmo -L$(THIRDPARTIES_INSTALL_DIR)/lib -Wl,-rpath,@executable_path/lib' $(VERSION_INFO)"
endif

ifeq ($(GOBUILD_OPT),)
GOBUILD_OPT :=
endif

ifneq ($(TAGS),)
GOTAGS := -tags "$(TAGS)"
endif

.PHONY: cgo
cgo: thirdparties
@(cd cgo; ${MAKE} ${CGO_DEBUG_OPT})
Expand All @@ -222,7 +247,7 @@ thirdparties:
.PHONY: build
build: config cgo thirdparties
$(info [Build binary])
$(CGO_OPTS) go build $(TAGS) $(RACE_OPT) $(GOLDFLAGS) $(DEBUG_OPT) $(GOBUILD_OPT) -o $(BIN_NAME) ./cmd/mo-service
$(GOEXPERIMENT_OPT) $(CGO_OPTS) $(GO) build $(GOTAGS) $(RACE_OPT) $(GOLDFLAGS) $(DEBUG_OPT) $(GOBUILD_OPT) -o $(BIN_NAME) ./cmd/mo-service

# https://wiki.musl-libc.org/getting-started.html
# https://musl.cc/
Expand All @@ -248,17 +273,17 @@ musl-thirdparties: musl-install
.PHONY: musl
musl: override CGO_OPTS += CC=$(MUSL_CC)
musl: override GOLDFLAGS:=-ldflags="--linkmode 'external' --extldflags '-static -L$(THIRDPARTIES_INSTALL_DIR)/lib -lstdc++ -Wl,-rpath,\$${ORIGIN}/lib' $(VERSION_INFO)"
musl: override TAGS := -tags musl
musl: override GOTAGS := -tags musl
musl: musl-install musl-cgo config musl-thirdparties
musl:
$(info [Build binary(musl)])
$(CGO_OPTS) go build $(TAGS) $(RACE_OPT) $(GOLDFLAGS) $(DEBUG_OPT) $(GOBUILD_OPT) -o $(BIN_NAME) ./cmd/mo-service
$(CGO_OPTS) $(GO) build $(GOTAGS) $(RACE_OPT) $(GOLDFLAGS) $(DEBUG_OPT) $(GOBUILD_OPT) -o $(BIN_NAME) ./cmd/mo-service

# build mo-tool
.PHONY: mo-tool
mo-tool: config cgo thirdparties
$(info [Build mo-tool tool])
$(CGO_OPTS) go build $(GOLDFLAGS) -o mo-tool ./cmd/mo-tool
$(CGO_OPTS) $(GO) build $(GOLDFLAGS) -o mo-tool ./cmd/mo-tool

# build mo-service binary for debugging with go's race detector enabled
# produced executable is 10x slower and consumes much more memory
Expand Down Expand Up @@ -1007,7 +1032,7 @@ launch-minio-debug: debug dev-up-minio-local
clean:
$(info [Clean up])
$(info Clean go test cache)
@go clean -testcache
@$(GO) clean -testcache
rm -f $(BIN_NAME)
rm -rf $(ROOT_DIR)/vendor
rm -rf $(MUSL_DIR)
Expand All @@ -1027,12 +1052,12 @@ fmt:
.PHONY: install-static-check-tools
install-static-check-tools:
@curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | bash -s -- -b $(GOPATH)/bin v2.6.2
@go install github.com/matrixorigin/linter/cmd/molint@latest
@go install github.com/apache/skywalking-eyes/cmd/license-eye@v0.4.0
@$(GO) install github.com/matrixorigin/linter/cmd/molint@latest
@$(GO) install github.com/apache/skywalking-eyes/cmd/license-eye@v0.4.0

.PHONY: static-check
static-check: config err-check
$(CGO_OPTS) go vet -vettool=`which molint` ./...
$(CGO_OPTS) $(GO) vet -vettool=`which molint` ./...
$(CGO_OPTS) license-eye -c .licenserc.yml header check
$(CGO_OPTS) license-eye -c .licenserc.yml dep check
$(CGO_OPTS) golangci-lint run -v -c .golangci.yml ./...
Expand Down
65 changes: 47 additions & 18 deletions cgo/Makefile
Original file line number Diff line number Diff line change
@@ -1,48 +1,77 @@
DEBUG_OPT :=
UNAME_M := $(shell uname -m)
UNAME_S := $(shell uname -s)
CC ?= gcc

# Yeah, fast math. We want it to be fast, for all xcall,
# IEEE compliance should not be an issue.
OPT_LV := -O3 -ffast-math -ftree-vectorize -funroll-loops
CFLAGS=-std=c99 -g ${OPT_LV} -Wall -Werror -I../thirdparties/install/include
OBJS=mo.o arith.o compare.o logic.o xcall.o usearchex.o bloom.o
CUDA_OBJS=
COMMON_CFLAGS := -g $(OPT_LV) -Wall -Werror -fPIC -I../thirdparties/install/include
CFLAGS := -std=c99 $(COMMON_CFLAGS)
OBJS := mo.o arith.o compare.o logic.o xcall.o usearchex.o bloom.o
CUDA_OBJS :=
LDFLAGS := -L../thirdparties/install/lib -lusearch_c
TARGET_LIB := libmo.so

ifeq ($(UNAME_S),Darwin)
TARGET_LIB := libmo.dylib
LDFLAGS += -dynamiclib -undefined dynamic_lookup -install_name @rpath/$(TARGET_LIB)
else
LDFLAGS += -shared
endif

ifeq ($(UNAME_M), x86_64)
CFLAGS+= -march=haswell
CFLAGS += -march=haswell
endif

ifeq ($(MO_CL_CUDA),1)
ifeq ($(CONDA_PREFIX),)
$(error CONDA_PREFIX env variable not found. Please activate your conda environment.)
endif
CC = /usr/local/cuda/bin/nvcc
CFLAGS = -ccbin g++ -m64 --shared -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90,code=compute_90
CFLAGS = -ccbin g++ -m64 -Xcompiler -fPIC -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90,code=compute_90
CFLAGS += -I../thirdparties/install/include -DMO_CL_CUDA
CUDA_OBJS += cuda/cuda.o
CUDA_LDFLAGS := -L/usr/local/cuda/lib64/stubs -lcuda -L/usr/local/cuda/lib64 -lcudart -lstdc++
# Explicitly include all needed libraries for shared library linking
CUDA_LDFLAGS := -L/usr/local/cuda/lib64/stubs -lcuda -L/usr/local/cuda/lib64 -lcudart -L$(CONDA_PREFIX)/lib -lcuvs -lcuvs_c -ldl -lrmm -lstdc++
LDFLAGS += $(CUDA_LDFLAGS)
endif

all: libmo.a
.PHONY: all clean test debug

all: $(TARGET_LIB) libmo.a

libmo.a: $(OBJS)
$(TARGET_LIB): $(OBJS)
ifeq ($(MO_CL_CUDA),1)
make -C cuda
$(MAKE) -C cuda
$(MAKE) -C cuvs
$(CC) $(LDFLAGS) -o $@ $(OBJS) $(CUDA_OBJS) cuvs/*.o
else
$(CC) $(LDFLAGS) -o $@ $(OBJS)
endif
ar -rcs libmo.a $(OBJS) $(CUDA_OBJS)

#
# $(CC) -o libmo.a $(OBJS) $(CUDA_OBJS) $(CUDA_LDFLAGS)
libmo.a: $(OBJS)
ifeq ($(MO_CL_CUDA),1)
$(MAKE) -C cuda
$(MAKE) -C cuvs
ar -rcs $@ $(OBJS) $(CUDA_OBJS) cuvs/*.o
else
ar -rcs $@ $(OBJS)
endif

%.o: %.c
$(CC) $(CFLAGS) -c $< -o $@

test: libmo.a
make -C test
test: $(TARGET_LIB)
$(MAKE) -C test

.PHONY: debug
debug: override OPT_LV := -O0
debug: override DEBUG_OPT := debug
debug: all

.PHONY: clean
clean:
rm -f *.o *.a *.so
rm -f *.o *.a *.so *.dylib
ifeq ($(MO_CL_CUDA),1)
make -C cuda clean
$(MAKE) -C cuda clean
$(MAKE) -C cuvs clean
endif
33 changes: 18 additions & 15 deletions cgo/README.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
MatrixOne CGO Kernel
===============================

This directory contains cgo source code for MO. Running
make should produce two files to be used by go code.
On go side, go will `include "mo.h"` and `-lmo`.
This directory contains CGO source code for MatrixOne. Running `make` produces the core library files used by Go code.

On the Go side, the integration typically uses `mo.h` and links against the generated libraries:
```
mo.h
libmo.a
libmo.a / libmo.so
```

`mo.h` should be pristine, meaning it only contains C function
prototype used by go. The only datatypes that can be passed
between go and c code are int and float/double and pointer.
Always explicitly specify int size such as `int32_t`, `uint64_t`.
Do not use `int`, `long`, etc.
`mo.h` should remain pristine, containing only C function prototypes for Go to consume. Data passed between Go and C should be limited to standard types (int, float, double, pointers). Always specify explicit integer sizes (e.g., `int32_t`, `uint64_t`) and avoid platform-dependent types like `int` or `long`.

GPU Support (CUDA & cuVS)
-------------------------
The kernel supports GPU acceleration for certain operations (e.g., vector search) via NVIDIA CUDA and the cuVS library.

- **Build Flag:** GPU support is enabled by setting `MO_CL_CUDA=1` during the build.
- **Environment:** Requires a working CUDA installation and a Conda environment with `cuvs` and `rmm` installed.
- **Source Code:** GPU-specific code resides in the `cuda/` and `cuvs/` subdirectories.

Implementation Notes
--------------------------------
--------------------

1. Pure C.
2. Use memory passed from go. Try not allocate memory in C code.
3. Only depends on libc and libm.
4. If 3rd party lib is absolutely necessary, import source code
and build from source. If 3rd party lib is C++, wrap it completely in C.
1. **Language:** Core kernel is Pure C. GPU extensions use C++ and CUDA, wrapped in a C-compatible interface.
2. **Memory Management:** Prefer using memory allocated and passed from Go. Minimize internal allocations in C/C++ code.
3. **Dependencies:** The base kernel depends only on `libc`, `libm`, and `libusearch`. GPU builds introduce dependencies on CUDA, `cuvs`, and `rmm`.
4. **Third-party Libraries:** If a third-party library is necessary, it should be built from source (see `thirdparties/` directory). C++ libraries must be fully wrapped in C before being exposed to Go.
2 changes: 1 addition & 1 deletion cgo/cuda/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ $(FATBIN_FILE): mocl.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -fatbin $<

cuda.o: cuda.cpp
$(EXEC) $(NVCC) $(INCLUDES) -O3 --shared $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
$(EXEC) $(NVCC) $(INCLUDES) -O3 --shared -Xcompiler -fPIC $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

mytest.o: cuda.cpp $(FATBIN_FILE)
$(EXEC) $(NVCC) $(INCLUDES) -DTEST_RUN -g -O0 $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
Expand Down
71 changes: 71 additions & 0 deletions cgo/cuvs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Makefile for MatrixOne cuVS C Wrapper

UNAME_M := $(shell uname -m)
CUDA_PATH ?= /usr/local/cuda
NVCC := $(CUDA_PATH)/bin/nvcc

ifeq ($(CONDA_PREFIX),)
$(error CONDA_PREFIX env variable not found. Please activate your conda environment.)
endif

# Compilation flags
# Added --extended-lambda because raft/core/copy.cuh requires it for some internal headers
NVCC_FLAGS := -std=c++17 -x cu -Xcompiler "-Wall -Wextra -fPIC -O2" --extended-lambda --expt-relaxed-constexpr
NVCC_FLAGS += -I. -I$(CUDA_PATH)/include -I$(CONDA_PREFIX)/include -I$(CONDA_PREFIX)/include/rapids -I$(CONDA_PREFIX)/include/raft -I$(CONDA_PREFIX)/include/cuvs
NVCC_FLAGS += -DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE -DRAFT_SYSTEM_LITTLE_ENDIAN=1

# Linking flags
LDFLAGS := -shared
LDFLAGS += -L$(CUDA_PATH)/lib64/stubs -lcuda -L$(CUDA_PATH)/lib64 -lcudart
LDFLAGS += -L$(CONDA_PREFIX)/lib -lcuvs -lcuvs_c -ldl -lrmm -lrapids_logger
LDFLAGS += -Xlinker -lpthread -Xlinker -lm

# Target library
TARGET := libmocuvs.so

# Source files
SRCS := brute_force_c.cpp ivf_flat_c.cpp cagra_c.cpp kmeans_c.cpp helper.cpp
OBJS := $(SRCS:.cpp=.o)

# Test configuration
TESTDIR := test
OBJDIR := obj
TEST_EXE := test_cuvs_worker
TEST_SRCS := $(TESTDIR)/main_test.cu \
$(TESTDIR)/brute_force_test.cu \
$(TESTDIR)/ivf_flat_test.cu \
$(TESTDIR)/cagra_test.cu \
$(TESTDIR)/kmeans_test.cu

TEST_OBJS := $(patsubst $(TESTDIR)/%.cu, $(OBJDIR)/test/%.o, $(TEST_SRCS))

.PHONY: all clean test

all: $(OBJS)

$(TARGET): $(OBJS)
@echo "Linking shared library $@"
$(NVCC) $(LDFLAGS) $^ -o $@

%.o: %.cpp
@echo "Compiling $< with NVCC"
$(NVCC) $(NVCC_FLAGS) -c $< -o $@

# Test targets
test: $(TEST_EXE)
@echo "Running tests..."
./$(TEST_EXE)

$(TEST_EXE): $(TEST_OBJS)
@echo "NVCCLD $@"
$(NVCC) $(subst -x cu,,$(NVCC_FLAGS)) $^ $(subst -shared,,$(LDFLAGS)) -o $@

$(OBJDIR)/test/%.o: $(TESTDIR)/%.cu
@mkdir -p $(@D)
@echo "NVCC $<"
$(NVCC) -std=c++17 -Xcompiler "-Wall -Wextra -fPIC -O2" --extended-lambda --expt-relaxed-constexpr -I. -I$(CUDA_PATH)/include -I$(CONDA_PREFIX)/include -I$(CONDA_PREFIX)/include/rapids -I$(CONDA_PREFIX)/include/raft -I$(CONDA_PREFIX)/include/cuvs -DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE -DRAFT_SYSTEM_LITTLE_ENDIAN=1 -c $< -o $@

clean:
@echo "Cleaning up..."
rm -f $(TARGET) *.o $(TEST_EXE)
rm -rf $(OBJDIR)
Loading
Loading