Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ endif()

project(lightgbm LANGUAGES C CXX)

if(USE_CUDA)
if(USE_CUDA OR USE_ROCM)
set(CMAKE_CXX_STANDARD 17)
elseif(BUILD_CPP_TEST)
set(CMAKE_CXX_STANDARD 14)
Expand Down Expand Up @@ -480,10 +480,21 @@ set(
src/cuda/cuda_algorithms.cu
)

if(USE_CUDA)
if(USE_CUDA OR USE_ROCM)
list(APPEND LGBM_SOURCES ${LGBM_CUDA_SOURCES})
endif()

if(USE_ROCM)
set(CU_FILES "")
foreach(file IN LISTS LGBM_CUDA_SOURCES)
string(REGEX MATCH "\\.cu$" is_cu_file "${file}")
if(is_cu_file)
list(APPEND CU_FILES "${file}")
endif()
endforeach()
set_source_files_properties(${CU_FILES} PROPERTIES LANGUAGE HIP)
endif()

add_library(lightgbm_objs OBJECT ${LGBM_SOURCES})

if(BUILD_CLI)
Expand Down Expand Up @@ -632,6 +643,10 @@ if(USE_CUDA)
endif()
endif()

if(USE_ROCM)
target_link_libraries(lightgbm_objs PUBLIC hip::host)
endif()

if(WIN32)
if(MINGW OR CYGWIN)
target_link_libraries(lightgbm_objs PUBLIC ws2_32 iphlpapi)
Expand Down
4 changes: 2 additions & 2 deletions include/LightGBM/bin.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,13 +600,13 @@ class MultiValBin {

virtual MultiValBin* Clone() = 0;

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
virtual const void* GetRowWiseData(uint8_t* bit_type,
size_t* total_size,
bool* is_sparse,
const void** out_data_ptr,
uint8_t* data_ptr_bit_type) const = 0;
#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
};

inline uint32_t BinMapper::ValueToBin(double value) const {
Expand Down
6 changes: 4 additions & 2 deletions include/LightGBM/cuda/cuda_algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
#ifndef LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
#define LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#if defined(USE_CUDA)
#include <cuda.h>
#include <cuda_runtime.h>
#endif
#include <stdio.h>

#include <LightGBM/bin.h>
Expand Down Expand Up @@ -619,5 +621,5 @@ __device__ VAL_T PercentileDevice(const VAL_T* values,

} // namespace LightGBM

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
#endif // LIGHTGBM_CUDA_CUDA_ALGORITHMS_HPP_
4 changes: 2 additions & 2 deletions include/LightGBM/cuda/cuda_column_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#ifndef LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
#define LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_
Expand Down Expand Up @@ -139,4 +139,4 @@ class CUDAColumnData {

#endif // LIGHTGBM_CUDA_CUDA_COLUMN_DATA_HPP_

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
4 changes: 2 additions & 2 deletions include/LightGBM/cuda/cuda_metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_
#define LIGHTGBM_CUDA_CUDA_METADATA_HPP_
Expand Down Expand Up @@ -55,4 +55,4 @@ class CUDAMetadata {

#endif // LIGHTGBM_CUDA_CUDA_METADATA_HPP_

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
4 changes: 2 additions & 2 deletions include/LightGBM/cuda/cuda_metric.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_
#define LIGHTGBM_CUDA_CUDA_METRIC_HPP_

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#include <LightGBM/cuda/cuda_utils.hu>
#include <LightGBM/metric.h>
Expand Down Expand Up @@ -39,6 +39,6 @@ class CUDAMetricInterface: public HOST_METRIC {

} // namespace LightGBM

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM

#endif // LIGHTGBM_CUDA_CUDA_METRIC_HPP_
4 changes: 2 additions & 2 deletions include/LightGBM/cuda/cuda_objective_function.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#ifndef LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
#define LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#include <LightGBM/cuda/cuda_utils.hu>
#include <LightGBM/objective_function.h>
Expand Down Expand Up @@ -81,6 +81,6 @@ class CUDAObjectiveInterface: public HOST_OBJECTIVE {

} // namespace LightGBM

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM

#endif // LIGHTGBM_CUDA_CUDA_OBJECTIVE_FUNCTION_HPP_
6 changes: 4 additions & 2 deletions include/LightGBM/cuda/cuda_random.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
#ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
#define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#if defined(USE_CUDA)
#include <cuda.h>
#include <cuda_runtime.h>
#endif

namespace LightGBM {

Expand Down Expand Up @@ -69,6 +71,6 @@ class CUDARandom {

} // namespace LightGBM

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM

#endif // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
57 changes: 51 additions & 6 deletions include/LightGBM/cuda/cuda_rocm_interop.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,65 @@
/*!
* Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/
#ifdef USE_CUDA
#pragma once

#if defined(USE_CUDA) || defined(USE_ROCM)

#if defined(__HIP_PLATFORM_AMD__)

#if defined(__HIP_PLATFORM_AMD__) || defined(__HIP__)
// ROCm doesn't have __shfl_down_sync, only __shfl_down without mask.
// Since mask is full 0xffffffff, we can use __shfl_down instead.
#define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset)
#define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset)
// ROCm warpSize is constexpr and is either 32 or 64 depending on gfx arch.
#define WARPSIZE warpSize

// ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd
#define atomicAdd_block atomicAdd
#else

// hipify
#include <hip/hip_runtime.h>
#define cudaDeviceProp hipDeviceProp_t
#define cudaDeviceSynchronize hipDeviceSynchronize
#define cudaError_t hipError_t
#define cudaFree hipFree
#define cudaFreeHost hipFreeHost
#define cudaGetDevice hipGetDevice
#define cudaGetDeviceProperties hipGetDeviceProperties
#define cudaGetErrorName hipGetErrorName
#define cudaGetErrorString hipGetErrorString
#define cudaGetLastError hipGetLastError
#define cudaHostAlloc hipHostAlloc
#define cudaHostAllocPortable hipHostAllocPortable
#define cudaMalloc hipMalloc
#define cudaMemcpy hipMemcpy
#define cudaMemcpyAsync hipMemcpyAsync
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemoryTypeHost hipMemoryTypeHost
#define cudaMemset hipMemset
#define cudaPointerAttributes hipPointerAttribute_t
#define cudaPointerGetAttributes hipPointerGetAttributes
#define cudaSetDevice hipSetDevice
#define cudaStreamCreate hipStreamCreate
#define cudaStreamDestroy hipStreamDestroy
#define cudaStream_t hipStream_t
#define cudaSuccess hipSuccess

// warpSize is only allowed for device code.
// HIP header used to define warpSize as a constexpr that was either 32 or 64
// depending on the target device, and then always set it to 64 for host code.
static inline constexpr int WARP_SIZE_INTERNAL() {
#if defined(__GFX9__)
return 64;
#else // __GFX9__
return 32;
#endif // __GFX9__
}
#define WARPSIZE (WARP_SIZE_INTERNAL())

#else // __HIP_PLATFORM_AMD__
// CUDA warpSize is not a constexpr, but always 32
#define WARPSIZE 32
#endif

#endif
#endif // USE_CUDA || USE_ROCM
4 changes: 2 additions & 2 deletions include/LightGBM/cuda/cuda_row_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#ifndef LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
#define LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_
Expand Down Expand Up @@ -177,4 +177,4 @@ class CUDARowData {
} // namespace LightGBM
#endif // LIGHTGBM_CUDA_CUDA_ROW_DATA_HPP_

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
4 changes: 2 additions & 2 deletions include/LightGBM/cuda/cuda_split_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* Modifications Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#ifndef LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
#define LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_
Expand Down Expand Up @@ -105,4 +105,4 @@ class CUDASplitInfo {

#endif // LIGHTGBM_CUDA_CUDA_SPLIT_INFO_HPP_

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
4 changes: 2 additions & 2 deletions include/LightGBM/cuda/cuda_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#ifndef LIGHTGBM_CUDA_CUDA_TREE_HPP_
#define LIGHTGBM_CUDA_CUDA_TREE_HPP_
Expand Down Expand Up @@ -170,4 +170,4 @@ class CUDATree : public Tree {

#endif // LIGHTGBM_CUDA_CUDA_TREE_HPP_

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM
8 changes: 6 additions & 2 deletions include/LightGBM/cuda/cuda_utils.hu
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@
#ifndef LIGHTGBM_CUDA_CUDA_UTILS_H_
#define LIGHTGBM_CUDA_CUDA_UTILS_H_

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)

#if defined(USE_CUDA)
#include <cuda.h>
#include <cuda_runtime.h>
#else
#include <LightGBM/cuda/cuda_rocm_interop.h>
#endif
#include <stdio.h>

#include <LightGBM/utils/log.h>
Expand Down Expand Up @@ -207,6 +211,6 @@ static __device__ T SafeLog(T x) {

} // namespace LightGBM

#endif // USE_CUDA
#endif // USE_CUDA || USE_ROCM

#endif // LIGHTGBM_CUDA_CUDA_UTILS_H_
19 changes: 11 additions & 8 deletions include/LightGBM/cuda/vector_cudahost.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@

#include <LightGBM/utils/common.h>

#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(USE_CUDA)
#include <cuda.h>
#include <cuda_runtime.h>
#endif
#endif // USE_CUDA
#include <LightGBM/cuda/cuda_utils.hu>
#endif // USE_CUDA || USE_ROCM
#include <stdio.h>

enum LGBM_Device {
Expand Down Expand Up @@ -44,7 +47,7 @@ struct CHAllocator {
T* ptr;
if (n == 0) return NULL;
n = SIZE_ALIGNED(n);
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (LGBM_config_::current_device == lgbm_device_cuda) {
cudaError_t ret = cudaHostAlloc(reinterpret_cast<void**>(&ptr), n*sizeof(T), cudaHostAllocPortable);
if (ret != cudaSuccess) {
Expand All @@ -63,17 +66,17 @@ struct CHAllocator {
void deallocate(T* p, std::size_t n) {
(void)n; // UNUSED
if (p == NULL) return;
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
if (LGBM_config_::current_device == lgbm_device_cuda) {
cudaPointerAttributes attributes;
cudaPointerGetAttributes(&attributes, p);
#if CUDA_VERSION >= 10000
CUDASUCCESS_OR_FATAL(cudaPointerGetAttributes(&attributes, p));
#if CUDA_VERSION >= 10000 || defined(USE_ROCM)
if ((attributes.type == cudaMemoryTypeHost) && (attributes.devicePointer != NULL)) {
cudaFreeHost(p);
CUDASUCCESS_OR_FATAL(cudaFreeHost(p));
}
#else
if ((attributes.memoryType == cudaMemoryTypeHost) && (attributes.devicePointer != NULL)) {
cudaFreeHost(p);
CUDASUCCESS_OR_FATAL(cudaFreeHost(p));
}
#endif
} else {
Expand Down
Loading
Loading