Skip to content

Commit b117ff4

Browse files
committed
wip
1 parent 23f7081 commit b117ff4

File tree

4 files changed

+210
-20
lines changed

4 files changed

+210
-20
lines changed

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -773,11 +773,36 @@ pub(crate) unsafe fn llvm_optimize(
773773
};
774774

775775
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
776+
let lib_bc_c = CString::new("/p/lustre1/drehwald1/prog/offload/r/lib.bc").unwrap();
777+
let host_out_c = CString::new("/p/lustre1/drehwald1/prog/offload/r/host.out").unwrap();
778+
let out_obj_c = CString::new("/p/lustre1/drehwald1/prog/offload/r/host.o").unwrap();
779+
776780
unsafe {
777-
llvm::LLVMRustBundleImages(module.module_llvm.llmod(), module.module_llvm.tm.raw());
781+
llvm::LLVMRustBundleImages(
782+
module.module_llvm.llmod(),
783+
module.module_llvm.tm.raw(),
784+
host_out_c.as_ptr(),
785+
);
778786
}
779-
}
787+
unsafe {
788+
// 1) Bundle device module into offload image host.out (device TM)
789+
let ok = llvm::LLVMRustBundleImages(
790+
module.module_llvm.llmod(),
791+
module.module_llvm.tm.raw(),
792+
host_out_c.as_ptr(),
793+
);
794+
assert!(ok, "LLVMRustBundleImages (device -> host.out) failed");
780795

796+
// 2) Finalize host: lib.bc + host.out -> host.offload.o (host TM created in C++)
797+
let ok = llvm::LLVMRustFinalizeOffload(
798+
lib_bc_c.as_ptr(),
799+
host_out_c.as_ptr(),
800+
out_obj_c.as_ptr(),
801+
);
802+
assert!(ok, "LLVMRustFinalizeOffload (host finalize) failed");
803+
}
804+
dbg!("done");
805+
}
781806
result.into_result().unwrap_or_else(|()| llvm_err(dcx, LlvmError::RunLlvmPasses))
782807
}
783808

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,7 +1726,16 @@ mod Offload {
17261726
use super::*;
17271727
unsafe extern "C" {
17281728
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
1729-
pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool;
1729+
pub(crate) fn LLVMRustBundleImages<'a>(
1730+
M: &'a Module,
1731+
TM: &'a TargetMachine,
1732+
host_out: *const c_char,
1733+
) -> bool;
1734+
pub(crate) fn LLVMRustFinalizeOffload(
1735+
lib_bc_path: *const c_char,
1736+
host_out_path: *const c_char,
1737+
out_obj_path: *const c_char,
1738+
) -> bool;
17301739
pub(crate) fn LLVMRustOffloadMapper<'a>(OldFn: &'a Value, NewFn: &'a Value);
17311740
}
17321741
}
@@ -1740,7 +1749,11 @@ mod Offload_fallback {
17401749
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
17411750
/// Marked as unsafe to match the real offload wrapper which is unsafe due to FFI.
17421751
#[allow(unused_unsafe)]
1743-
pub(crate) unsafe fn LLVMRustBundleImages<'a>(_M: &'a Module, _TM: &'a TargetMachine) -> bool {
1752+
pub(crate) unsafe fn LLVMRustBundleImages<'a>(
1753+
_M: &'a Module,
1754+
_TM: &'a TargetMachine,
1755+
_host_out: *const c_char,
1756+
) -> bool {
17441757
unimplemented!("This rustc version was not built with LLVM Offload support!");
17451758
}
17461759
#[allow(unused_unsafe)]

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

Lines changed: 160 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,18 @@
4545
#ifdef OFFLOAD
4646
#include "llvm/Object/OffloadBinary.h"
4747
#include "llvm/Target/TargetMachine.h"
48+
#include "llvm/Transforms/Utils/ModuleUtils.h"
49+
#include "llvm/Analysis/VectorUtils.h"
50+
#include "llvm/ADT/SmallString.h"
51+
#include "llvm/IR/DerivedTypes.h"
52+
#include "llvm/IR/Function.h"
53+
#include "llvm/IR/IRBuilder.h"
54+
#include "llvm/IR/MDBuilder.h"
55+
#include "llvm/IR/Module.h"
56+
#include "llvm/Support/Casting.h"
57+
#include "llvm/Support/MD5.h"
58+
#include "llvm/Support/raw_ostream.h"
59+
#include "llvm/Support/xxhash.h"
4860
#endif
4961

5062
// for raw `write` in the bad-alloc handler
@@ -174,12 +186,12 @@ static Error writeFile(StringRef Filename, StringRef Data) {
174186
// --image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp
175187
// The input module is the rust code compiled for a gpu target like amdgpu.
176188
// Based on clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
177-
extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) {
189+
extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM, const char *HostOutPath) {
178190
std::string Storage;
179191
llvm::raw_string_ostream OS1(Storage);
180192
llvm::WriteBitcodeToFile(*unwrap(M), OS1);
181193
OS1.flush();
182-
auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "module.bc");
194+
auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "device.bc");
183195

184196
SmallVector<char, 1024> BinaryData;
185197
raw_svector_ostream OS2(BinaryData);
@@ -188,19 +200,103 @@ extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) {
188200
ImageBinary.TheImageKind = object::IMG_Bitcode;
189201
ImageBinary.Image = std::move(MB);
190202
ImageBinary.TheOffloadKind = object::OFK_OpenMP;
191-
ImageBinary.StringData["triple"] = TM.getTargetTriple().str();
192-
ImageBinary.StringData["arch"] = TM.getTargetCPU();
203+
204+
205+
std::string TripleStr = TM.getTargetTriple().str();
206+
llvm::StringRef CPURef = TM.getTargetCPU();
207+
ImageBinary.StringData["triple"] = TripleStr;
208+
ImageBinary.StringData["arch"] = CPURef;
193209
llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary);
194210
if (Buffer.size() % OffloadBinary::getAlignment() != 0)
195211
// Offload binary has invalid size alignment
196212
return false;
197213
OS2 << Buffer;
198-
if (Error E = writeFile("host.out",
214+
if (Error E = writeFile(HostOutPath,
199215
StringRef(BinaryData.begin(), BinaryData.size())))
200216
return false;
201217
return true;
202218
}
203219

220+
#include "llvm/Bitcode/BitcodeReader.h"
221+
Expected<std::unique_ptr<Module>>
222+
loadHostModuleFromBitcode(LLVMContext &Ctx, StringRef LibBCPath) {
223+
auto MBOrErr = MemoryBuffer::getFile(LibBCPath);
224+
if (!MBOrErr)
225+
return errorCodeToError(MBOrErr.getError());
226+
227+
MemoryBufferRef Ref = (*MBOrErr)->getMemBufferRef();
228+
return parseBitcodeFile(Ref, Ctx);
229+
}
230+
231+
extern "C" void embedBufferInModule(Module &M, MemoryBufferRef Buf) {
232+
StringRef SectionName = ".llvm.offloading";
233+
Align Alignment = Align(8);
234+
// Embed the memory buffer into the module.
235+
Constant *ModuleConstant = ConstantDataArray::get(
236+
M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
237+
GlobalVariable *GV = new GlobalVariable(
238+
M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
239+
ModuleConstant, "llvm.embedded.object");
240+
GV->setSection(SectionName);
241+
GV->setAlignment(Alignment);
242+
243+
LLVMContext &Ctx = M.getContext();
244+
NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
245+
Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
246+
MDString::get(Ctx, SectionName)};
247+
248+
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
249+
GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
250+
251+
appendToCompilerUsed(M, GV);
252+
}
253+
254+
Error embedHostOutIntoHostModule(Module &HostM, StringRef HostOutPath) {
255+
llvm::errs() << "embedHostOutIntoHostModule step 1:\n";
256+
auto MBOrErr = MemoryBuffer::getFile(HostOutPath);
257+
llvm::errs() << "embedHostOutIntoHostModule step 2:\n";
258+
if (!MBOrErr)
259+
return errorCodeToError(MBOrErr.getError());
260+
261+
llvm::errs() << "embedHostOutIntoHostModule step 3:\n";
262+
MemoryBufferRef Buf = (*MBOrErr)->getMemBufferRef();
263+
llvm::errs() << "embedHostOutIntoHostModule step 4:\n";
264+
embedBufferInModule(HostM, Buf);
265+
return Error::success();
266+
}
267+
268+
#include "llvm/Support/TargetSelect.h"
269+
#include "llvm/Target/TargetMachine.h"
270+
#include "llvm/Target/TargetOptions.h"
271+
#include "llvm/IR/LegacyPassManager.h"
272+
//#include "llvm/Support/Host.h"
273+
//#include "llvm/Support/TargetRegistry.h"
274+
#include "llvm/MC/TargetRegistry.h"
275+
#include "llvm/Support/raw_ostream.h"
276+
#include "llvm/Support/FileSystem.h"
277+
#include "llvm/Support/CodeGen.h" // <-- new
278+
279+
Error emitHostObjectWithTM(Module &HostM,
280+
TargetMachine &TM,
281+
StringRef OutObjPath) {
282+
// Make sure module matches the TM
283+
//HostM.setDataLayout(TM.createDataLayout());
284+
//HostM.setTargetTriple(TM.getTargetTriple().str());
285+
286+
legacy::PassManager PM;
287+
std::error_code EC;
288+
raw_fd_ostream OS(OutObjPath, EC, sys::fs::OF_None);
289+
if (EC)
290+
return errorCodeToError(EC);
291+
292+
if (TM.addPassesToEmitFile(PM, OS, nullptr, llvm::CodeGenFileType::ObjectFile))
293+
return createStringError(inconvertibleErrorCode(),
294+
"TargetMachine can't emit a file of this type");
295+
296+
PM.run(HostM);
297+
return Error::success();
298+
}
299+
204300
extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) {
205301
llvm::Function *oldFn = llvm::unwrap<llvm::Function>(OldFn);
206302
llvm::Function *newFn = llvm::unwrap<llvm::Function>(NewFn);
@@ -222,6 +318,65 @@ extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) {
222318
}
223319
#endif
224320

321+
// Create a host TargetMachine with HARDCODED triple/CPU
322+
static std::unique_ptr<TargetMachine> createHostTargetMachine() {
323+
static bool Initialized = false;
324+
if (!Initialized) {
325+
InitializeAllTargets();
326+
InitializeAllTargetMCs();
327+
InitializeAllAsmPrinters();
328+
InitializeAllAsmParsers();
329+
Initialized = true;
330+
}
331+
332+
// Hardcoded host triple + CPU (adapt if your CI/host differs)
333+
std::string TripleStr = "x86_64-unknown-linux-gnu";
334+
std::string CPU = "x86-64"; // OK for X86
335+
336+
std::string Err;
337+
const Target *T = TargetRegistry::lookupTarget(TripleStr, Err);
338+
if (!T) {
339+
// Could log Err here
340+
return nullptr;
341+
}
342+
343+
TargetOptions Opts;
344+
auto RM = std::optional<Reloc::Model>(Reloc::PIC_);
345+
346+
std::unique_ptr<TargetMachine> TM(
347+
T->createTargetMachine(TripleStr, CPU, /*Features*/"", Opts, RM));
348+
349+
return TM;
350+
}
351+
352+
// Top-level entry: host finalize in second rustc invocation
353+
// lib.bc (from first rustc) + host.out (from LLVMRustBundleImages) => host.offload.o
354+
extern "C" bool LLVMRustFinalizeOffload(const char *LibBCPath,
355+
const char *HostOutPath,
356+
const char *OutObjPath) {
357+
LLVMContext Ctx;
358+
359+
// 1. Load host lib.bc
360+
auto ModOrErr = loadHostModuleFromBitcode(Ctx, LibBCPath);
361+
if (!ModOrErr)
362+
return !errorToBool(ModOrErr.takeError());
363+
std::unique_ptr<Module> HostM = std::move(*ModOrErr);
364+
365+
// 2. Embed host.out
366+
if (Error E = embedHostOutIntoHostModule(*HostM, HostOutPath))
367+
return !errorToBool(std::move(E));
368+
369+
// 3. Create host TM and emit host object
370+
auto HostTM = createHostTargetMachine();
371+
if (!HostTM)
372+
return false;
373+
374+
if (Error E = emitHostObjectWithTM(*HostM, *HostTM, OutObjPath))
375+
return !errorToBool(std::move(E));
376+
377+
return true;
378+
}
379+
225380
extern "C" LLVMValueRef LLVMRustGetNamedValue(LLVMModuleRef M, const char *Name,
226381
size_t NameLen) {
227382
return wrap(unwrap(M)->getNamedValue(StringRef(Name, NameLen)));

src/doc/rustc-dev-guide/src/offload/usage.md

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,28 +68,25 @@ pub extern "gpu-kernel" fn kernel_1(x: *mut [f64; 256]) {
6868
## Compile instructions
6969
It is important to use a clang compiler build on the same llvm as rustc. Just calling clang without the full path will likely use your system clang, which probably will be incompatible. So either substitute clang/lld invocations below with absolute path, or set your `PATH` accordingly.
7070

71-
First we generate the host (cpu) code. The first build is just to compile libc, take note of the hashed path. Then we call rustc directly to build our host code, while providing the libc artifact to rustc.
71+
First we generate the host (cpu) code.
7272
```
73-
cargo +offload build -r -v
74-
rustc +offload --edition 2024 src/lib.rs -g --crate-type cdylib -C opt-level=3 -C panic=abort -C lto=fat -L dependency=/absolute_path_to/target/release/deps --extern libc=/absolute_path_to/target/release/deps/liblibc-<HASH>.rlib --emit=llvm-bc,llvm-ir -Zoffload=Enable -Zunstable-options
73+
RUSTFLAGS="--emit=llvm-bc -Zoffload=Enable -Zunstable-options" cargo +offload build -r
7574
```
75+
You might afterwards need to copy your target/release/deps/<lib_name>.bc to lib.bc for now, before the next step.
7676

7777
Now we generate the device code. Replace the target-cpu with the right code for your gpu.
7878
```
79-
RUSTFLAGS="-Ctarget-cpu=gfx90a --emit=llvm-bc,llvm-ir -Zoffload=Enable -Zunstable-options" cargo +offload build -Zunstable-options -r -v --target amdgcn-amd-amdhsa -Zbuild-std=core
79+
RUSTFLAGS="-Ctarget-cpu=gfx90a -Zoffload=Enable -Zunstable-options" cargo +offload build -Zunstable-options -r --target amdgcn-amd-amdhsa -Zbuild-std=core
8080
```
81-
81+
This call also does a lot of work and generates multiple intermediate files for llvm offload.
82+
While we integrated most offload steps into rustc by now, one binary invocation still remains for now:
8283

8384
```
84-
"clang-21" "-cc1" "-triple" "x86_64-unknown-linux-gnu" "-S" "-save-temps=cwd" "-disable-free" "-clear-ast-before-backend" "-main-file-name" "lib.rs" "-mrelocation-model" "pic" "-pic-level" "2" "-pic-is-pie" "-mframe-pointer=all" "-fmath-errno" "-ffp-contract=on" "-fno-rounding-math" "-mconstructor-aliases" "-funwind-tables=2" "-target-cpu" "x86-64" "-tune-cpu" "generic" "-resource-dir" "/<ABSOLUTE_PATH_TO>/rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21" "-ferror-limit" "19" "-fopenmp" "-fopenmp-offload-mandatory" "-fgnuc-version=4.2.1" "-fskip-odr-check-in-gmf" "-fembed-offload-object=host.out" "-fopenmp-targets=amdgcn-amd-amdhsa" "-faddrsig" "-D__GCC_HAVE_DWARF2_CFI_ASM=1" "-o" "host.s" "-x" "ir" "lib.bc"
85-
86-
"clang-21" "-cc1as" "-triple" "x86_64-unknown-linux-gnu" "-filetype" "obj" "-main-file-name" "lib.rs" "-target-cpu" "x86-64" "-mrelocation-model" "pic" "-o" "host.o" "host.s"
87-
8885
"clang-linker-wrapper" "--should-extract=gfx90a" "--device-compiler=amdgcn-amd-amdhsa=-g" "--device-compiler=amdgcn-amd-amdhsa=-save-temps=cwd" "--device-linker=amdgcn-amd-amdhsa=-lompdevice" "--host-triple=x86_64-unknown-linux-gnu" "--save-temps" "--linker-path=/ABSOlUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/lld/bin/ld.lld" "--hash-style=gnu" "--eh-frame-hdr" "-m" "elf_x86_64" "-pie" "-dynamic-linker" "/lib64/ld-linux-x86-64.so.2" "-o" "bare" "/lib/../lib64/Scrt1.o" "/lib/../lib64/crti.o" "/ABSOLUTE_PATH_TO/crtbeginS.o" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/bin/../lib/x86_64-unknown-linux-gnu" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21/lib/x86_64-unknown-linux-gnu" "-L/lib/../lib64" "-L/usr/lib64" "-L/lib" "-L/usr/lib" "host.o" "-lstdc++" "-lm" "-lomp" "-lomptarget" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib" "-lgcc_s" "-lgcc" "-lpthread" "-lc" "-lgcc_s" "-lgcc" "/ABSOLUTE_PATH_TO/crtendS.o" "/lib/../lib64/crtn.o"
8986
```
9087

91-
Especially for the last three commands I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps.
92-
You can ignore other steps, e.g. the invocation of a "clang-offload-packager".
88+
You can try to find the paths to those files on your system. However, I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps.
89+
It will show multiple steps, just look for the clang-linker-wrapper example. Make sure to still include the `host.o` file, and not whatever tmp file you got when compiling your c++ example with the following call.
9390
```
9491
myclang++ -fuse-ld=lld -O3 -fopenmp -fopenmp-offload-mandatory --offload-arch=gfx90a omp_bare.cpp -o main -###
9592
```

0 commit comments

Comments
 (0)