Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Go bridge #45

Merged
merged 3 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module github.com/sppark/sppark

go 1.18

require github.com/supranational/blst v0.3.13
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/supranational/blst v0.3.13 h1:AYeSxdOMacwu7FBmpfloBz5pbFXDmJL33RuwnKtmTjk=
github.com/supranational/blst v0.3.13/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw=
32 changes: 32 additions & 0 deletions go/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Go bridge for sppark

The goal is to make it possible to reuse modules targeting Rust with Go. The suggestion is to complement the CUDA source module with a Go bridge that would look something like the following. Assuming that `poc.cu` implements ~`exern "C" __attribute__((visibility("default")))`~ `SPPARK_FFI RustError::by_value cuda_func(void*)`:

```go
package poc_cu

// #include "cgo_sppark.h"
// WRAP_ERR(Error, cuda_func, void *ptr)
// { toGoError(go_err, (*cuda_func.call)(ptr)); }
import "C"

import (
sppark "github.com/supranational/sppark/go"
)

func init() {
sppark.Load("poc.cu", "-arch=native")
}

func CudaFunc() {
var err C.GoError
C.go_cuda_func(&err, nil)
if err.code != 0 {
panic(err.message)
}
}
```

In the presented case `sppark.Load()` attempts to load `poc.so`, a shared library with the name derived from the first argument to the method, that is expected to reside next to the **current** executable. If not found, the method will attempt to compile `poc.cu` with `nvcc` and retry to load it. There may be any number of wrappers implemented in the bridge module. And one needs a copy of [`cgo_sppark.h`](cgo_sppark.h) in the same directory. If so desired, the CUDA module and the Go bridge can be packaged into a Go module for the target application to `import`.

The nature of this Go module is such that if a user wants to compile the shared object prior the application being executed for the first time, it's on the user. Because that's where the actual CUDA code is. One way is to implement a test that won't even have to make any CUDA calls, it's sufficient to copy the generated shared object to a directory of your choice. Consider [`poc_test.go`](../poc/go/poc_test.go) as a template.
37 changes: 37 additions & 0 deletions go/cgo_sppark.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef __CGO_SPPARK_H__
#define __CGO_SPPARK_H__

typedef struct {
int code;
char *message;
} Error;

typedef struct {
int code;
_GoString_ message;
} GoError;

__attribute__((weak)) // required with go1.18 and earlier
void toGoError(GoError *go_err, Error c_err);

#define WRAP_ERR(ret_t, func, ...) __attribute__((section("_sppark"), used)) \
static struct { Error (*call)(__VA_ARGS__); const char *name; } \
func = { NULL, #func }; \
static void go_##func(GoError *go_err, __VA_ARGS__)
#if 0
// For example in the import "C" section:
//
// #include "cgo_sppark.h"
// WRAP_ERR(Error, cuda_func, type1 arg1, type2 arg2)
// { toGoError(go_err, (*cuda_func.call)(arg1, arg2); }
//
// and then on the Go side:
...
var err C.GoError
C.go_cuda_func(&err, arg1, arg2)
if err.code != 0 {
panic(err.message)
}
...
#endif
#endif
284 changes: 284 additions & 0 deletions go/sppark.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
package sppark

// #cgo linux LDFLAGS: -ldl -Wl,-rpath,"$ORIGIN"
//
// #ifndef GO_CGO_EXPORT_PROLOGUE_H
// #ifdef _WIN32
// # include <windows.h>
// # include <stdio.h>
// #else
// # include <dlfcn.h>
// # include <errno.h>
// #endif
// #include <string.h>
// #include <stdlib.h>
// #ifdef __SPPARK_CGO_DEBUG__
// # include <stdio.h>
// #endif
//
// #include "cgo_sppark.h"
//
// void toGoString(_GoString_ *, char *);
//
// void toGoError(GoError *go_err, Error c_err)
// {
// go_err->code = c_err.code;
// if (c_err.message != NULL) {
// toGoString(&go_err->message, c_err.message);
// free(c_err.message);
// c_err.message = NULL;
// }
// }
//
// typedef struct {
// void *ptr;
// } gpu_ptr_t;
//
// #define WRAP(ret_t, func, ...) __attribute__((section("_sppark"), used)) \
// static struct { ret_t (*call)(__VA_ARGS__); const char *name; } \
// func = { NULL, #func }; \
// static ret_t go_##func(__VA_ARGS__)
//
// WRAP(gpu_ptr_t, clone_gpu_ptr_t, gpu_ptr_t *ptr)
// { return (*clone_gpu_ptr_t.call)(ptr); }
//
// WRAP(void, drop_gpu_ptr_t, gpu_ptr_t *ptr)
// { (*drop_gpu_ptr_t.call)(ptr); }
//
// WRAP(_Bool, cuda_available, void)
// { return (*cuda_available.call)(); }
//
// typedef struct {
// void *value;
// const char *name;
// } dlsym_t;
//
// static _Bool go_load(_GoString_ *err, _GoString_ so_name)
// {
// static void *hmod = NULL;
// void *h;
//
// if ((h = hmod) == NULL) {
// size_t len = _GoStringLen(so_name);
// char fname[len + 1];
//
// memcpy(fname, _GoStringPtr(so_name), len);
// fname[len] = '\0';
// #ifdef _WIN32
// h = LoadLibraryA(fname);
// #else
// h = dlopen(fname, RTLD_NOW|RTLD_GLOBAL);
// #endif
// if ((hmod = h) != NULL) {
// extern dlsym_t __start__sppark, __stop__sppark;
// dlsym_t *sym;
//
// for (sym = &__start__sppark; sym < &__stop__sppark; sym++) {
// #ifdef _WIN32
// sym->value = GetProcAddress(h, sym->name);
// #else
// sym->value = dlsym(h, sym->name);
// #endif
// if (sym->value == NULL) {
// h = NULL;
// break;
// }
// #ifdef __SPPARK_CGO_DEBUG__
// printf("%p %s\n", sym->value, sym->name);
// #endif
// }
// }
// }
//
// if (h == NULL) {
// #ifdef _WIN32
// static char buf[24];
// snprintf(buf, sizeof(buf), "WIN32 Error #0x%x", GetLastError());
// toGoString(err, buf);
// if (hmod) FreeLibrary(hmod);
// #else
// toGoString(err, dlerror());
// if (hmod) dlclose(hmod);
// #endif
// hmod = h;
// }
//
// return h != NULL;
// }
// #endif
import "C"

import (
blst "github.com/supranational/blst/build"
"io"
"log"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
)

//export toGoString
func toGoString(go_str *string, c_str *C.char) {
*go_str = C.GoString(c_str)
}

var SrcRoot string

func init() {
if _, self, _, ok := runtime.Caller(0); ok {
SrcRoot = filepath.Dir(filepath.Dir(self))
}
}

func Load(baseName string, options ...string) {
baseName = strings.TrimSuffix(baseName, filepath.Ext(baseName))

var dst, src string

if exe, err := os.Executable(); err == nil {
dst = filepath.Join(filepath.Dir(exe), filepath.Base(baseName))
if runtime.GOOS == "windows" {
dst += ".dll"
} else {
dst += ".so"
}
} else {
log.Panic(err)
}

if _, caller, _, ok := runtime.Caller(1); ok {
src = filepath.Join(filepath.Dir(caller), baseName + ".cu")
} else {
log.Panic("passed the event horizon")
}

// To facilitate the edit-compile-run turnaround check if the source
// .cu file is writable and see if it's newer than the destination
// shared object...
rebuild := false
if fd, err := os.OpenFile(src, os.O_RDWR, 0); err == nil {
src_stat, _ := fd.Stat()
fd.Close()
dst_stat, err := os.Stat(dst)
rebuild = err != nil || src_stat.ModTime().After(dst_stat.ModTime())
}

var go_err string

if rebuild || !bool(C.go_load(&go_err, dst)) {
if !build(dst, src, options...) {
log.Panic("failed to build the shared object")
}
go_err = ""
if !C.go_load(&go_err, dst) {
log.Panic(go_err)
}
}
}

func build(dst string, src string, custom_args ...string) bool {
var args []string

args = append(args, "-shared", "-o", dst, src)
args = append(args, "-I" + SrcRoot)
args = append(args, filepath.Join(SrcRoot, "util", "all_gpus.cpp"))
args = append(args, "-I" + filepath.Join(blst.SrcRoot, "src"))
args = append(args, filepath.Join(blst.SrcRoot, "build", "assembly.S"))
args = append(args, filepath.Join(blst.SrcRoot, "src", "cpuid.c"))
args = append(args, "-DTAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE")
if runtime.GOOS == "windows" {
args = append(args, "-ccbin=clang-cl")
} else {
args = append(args, "-Xcompiler", "-fPIC,-fvisibility=hidden")
args = append(args, "-Xlinker", "-Bsymbolic")
}
args = append(args, "-cudart=shared")

src = filepath.Dir(src)
for _, arg := range custom_args {
if strings.HasPrefix(arg, "-") {
args = append(args, arg)
} else {
file := filepath.Join(src, arg)
if _, err := os.Stat(file); os.IsNotExist(err) {
args = append(args, arg)
} else {
args = append(args, file)
}
}
}

nvcc := "nvcc"

if sccache, err := exec.LookPath("sccache"); err == nil {
args = append([]string{nvcc}, args...)
nvcc = sccache
}

cmd := exec.Command(nvcc, args...)

if out, err := cmd.CombinedOutput(); err != nil {
log.Fatal(cmd.String(), "\n", string(out))
return false
}

return true
}

func Exfiltrate(optional ...string) error {
exe, _ := os.Executable()
dir := filepath.Dir(exe)

var glob string
if runtime.GOOS == "windows" {
glob = "*.dll"
} else {
glob = "*.so"
}
files, _ := filepath.Glob(filepath.Join(dir, glob))

if len(optional) > 0 {
dir = optional[0]
} else {
dir = ""
}

for _, file := range files {
finp, err := os.Open(file)
if err != nil {
return err
}
fout, err := os.OpenFile(filepath.Join(dir, filepath.Base(file)),
os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return err
}
finpStat, _ := finp.Stat()
foutStat, _ := fout.Stat()
if !os.SameFile(finpStat, foutStat) {
log.Print("copying ", file)
io.Copy(fout, finp)
}
fout.Close()
finp.Close()
}

return nil
}

type GpuPtrT = C.gpu_ptr_t

func (ptr *GpuPtrT) Clone() GpuPtrT {
return C.go_clone_gpu_ptr_t(ptr)
}

func (ptr *GpuPtrT) Drop() {
C.go_drop_gpu_ptr_t(ptr)
ptr.ptr = nil
}

func IsCudaAvailable() bool {
return bool(C.go_cuda_available())
}
Loading
Loading