* use hipblas based on cublas
* Update Makefile for the Cuda kernels
* Expand arch list and make it overrideable
* Fix multi GPU on multiple amd architectures with rocblas_initialize() (#5)
* add hipBLAS to README
* new build arg LLAMA_CUDA_MMQ_Y
* fix half2 decomposition
* Add intrinsics polyfills for AMD
* AMD assembly optimized __dp4a
* Allow overriding CC_TURING
* use "ROCm" instead of "CUDA"
* ignore all build dirs
* Add Dockerfiles
* fix llama-bench
* fix -nommq help for non CUDA/HIP

---------

Co-authored-by: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com>
Co-authored-by: ardfork <134447697+ardfork@users.noreply.github.com>
Co-authored-by: funnbot <22226942+funnbot@users.noreply.github.com>
Co-authored-by: Engininja2 <139037756+Engininja2@users.noreply.github.com>
Co-authored-by: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
Co-authored-by: jammm <2500920+jammm@users.noreply.github.com>
Co-authored-by: jdecourval <7315817+jdecourval@users.noreply.github.com>
This commit is contained in:
Henri Vasserman 2023-08-25 12:09:42 +03:00 committed by GitHub
parent 3f460a2b72
commit 6bbc598a63
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 335 additions and 59 deletions

View file

@ -280,6 +280,30 @@ ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
$(CXX) $(CXXFLAGS) -c $< -o $@
endif # LLAMA_CLBLAST
ifdef LLAMA_HIPBLAS
ROCM_PATH ?= /opt/rocm
HIPCC ?= $(ROCM_PATH)/bin/hipcc
GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
LLAMA_CUDA_DMMV_X ?= 32
LLAMA_CUDA_MMV_Y ?= 1
LLAMA_CUDA_KQUANTS_ITER ?= 2
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
LDFLAGS += -lhipblas -lamdhip64 -lrocblas
HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
HIPFLAGS += -DCC_TURING=1000000000
ifdef LLAMA_CUDA_FORCE_DMMV
HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
endif # LLAMA_CUDA_FORCE_DMMV
OBJS += ggml-cuda.o
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
endif # LLAMA_HIPBLAS
ifdef LLAMA_METAL
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
CXXFLAGS += -DGGML_USE_METAL