16b9e43c6SManoel Roemmer //===-RTLs/nec-aurora/src/rtl.cpp - Target RTLs Implementation - C++ -*-======//
26b9e43c6SManoel Roemmer //
36b9e43c6SManoel Roemmer // The LLVM Compiler Infrastructure
46b9e43c6SManoel Roemmer //
56b9e43c6SManoel Roemmer // This file is dual licensed under the MIT and the University of Illinois Open
66b9e43c6SManoel Roemmer // Source Licenses. See LICENSE.txt for details.
76b9e43c6SManoel Roemmer //
86b9e43c6SManoel Roemmer //===----------------------------------------------------------------------===//
96b9e43c6SManoel Roemmer //
106b9e43c6SManoel Roemmer // RTL for NEC Aurora TSUBASA machines
116b9e43c6SManoel Roemmer //
126b9e43c6SManoel Roemmer //===----------------------------------------------------------------------===//
136b9e43c6SManoel Roemmer
146b9e43c6SManoel Roemmer #include <algorithm>
156b9e43c6SManoel Roemmer #include <cassert>
166b9e43c6SManoel Roemmer #include <cerrno>
176b9e43c6SManoel Roemmer #include <cstring>
186b9e43c6SManoel Roemmer #include <list>
196b9e43c6SManoel Roemmer #include <stdlib.h>
206b9e43c6SManoel Roemmer #include <string>
216b9e43c6SManoel Roemmer #include <sys/stat.h>
226b9e43c6SManoel Roemmer #include <ve_offload.h>
236b9e43c6SManoel Roemmer #include <vector>
246b9e43c6SManoel Roemmer #include <veosinfo/veosinfo.h>
256b9e43c6SManoel Roemmer
26c816ee13SManoel Roemmer #include "Debug.h"
27c816ee13SManoel Roemmer #include "omptargetplugin.h"
28c816ee13SManoel Roemmer
29c816ee13SManoel Roemmer #ifndef TARGET_NAME
30c816ee13SManoel Roemmer #define TARGET_NAME VE
31c816ee13SManoel Roemmer #endif
32c816ee13SManoel Roemmer
33c816ee13SManoel Roemmer #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
34c816ee13SManoel Roemmer
356b9e43c6SManoel Roemmer #ifndef TARGET_ELF_ID
366b9e43c6SManoel Roemmer #define TARGET_ELF_ID 0
376b9e43c6SManoel Roemmer #endif
386b9e43c6SManoel Roemmer
39a81c68aeSShilei Tian #include "elf_common.h"
406b9e43c6SManoel Roemmer
416b9e43c6SManoel Roemmer struct DynLibTy {
426b9e43c6SManoel Roemmer char *FileName;
436b9e43c6SManoel Roemmer uint64_t VeoLibHandle;
446b9e43c6SManoel Roemmer };
456b9e43c6SManoel Roemmer
466b9e43c6SManoel Roemmer /// Keep entries table per device.
476b9e43c6SManoel Roemmer struct FuncOrGblEntryTy {
486b9e43c6SManoel Roemmer __tgt_target_table Table;
496b9e43c6SManoel Roemmer std::vector<__tgt_offload_entry> Entries;
506b9e43c6SManoel Roemmer };
516b9e43c6SManoel Roemmer
526b9e43c6SManoel Roemmer class RTLDeviceInfoTy {
536b9e43c6SManoel Roemmer std::vector<std::list<FuncOrGblEntryTy>> FuncOrGblEntry;
546b9e43c6SManoel Roemmer
556b9e43c6SManoel Roemmer public:
566b9e43c6SManoel Roemmer std::vector<struct veo_proc_handle *> ProcHandles;
576b9e43c6SManoel Roemmer std::vector<struct veo_thr_ctxt *> Contexts;
586b9e43c6SManoel Roemmer std::vector<uint64_t> LibraryHandles;
596b9e43c6SManoel Roemmer std::list<DynLibTy> DynLibs;
606b9e43c6SManoel Roemmer // Maps OpenMP device Ids to Ve nodeids
616b9e43c6SManoel Roemmer std::vector<int> NodeIds;
626b9e43c6SManoel Roemmer
buildOffloadTableFromHost(int32_t device_id,uint64_t VeoLibHandle,__tgt_offload_entry * HostBegin,__tgt_offload_entry * HostEnd)636b9e43c6SManoel Roemmer void buildOffloadTableFromHost(int32_t device_id, uint64_t VeoLibHandle,
646b9e43c6SManoel Roemmer __tgt_offload_entry *HostBegin,
656b9e43c6SManoel Roemmer __tgt_offload_entry *HostEnd) {
666b9e43c6SManoel Roemmer FuncOrGblEntry[device_id].emplace_back();
676b9e43c6SManoel Roemmer std::vector<__tgt_offload_entry> &T =
686b9e43c6SManoel Roemmer FuncOrGblEntry[device_id].back().Entries;
696b9e43c6SManoel Roemmer T.clear();
706b9e43c6SManoel Roemmer for (__tgt_offload_entry *i = HostBegin; i != HostEnd; ++i) {
716b9e43c6SManoel Roemmer char *SymbolName = i->name;
726b9e43c6SManoel Roemmer // we have not enough access to the target memory to conveniently parse
736b9e43c6SManoel Roemmer // the offload table there so we need to lookup every symbol with the host
746b9e43c6SManoel Roemmer // table
756b9e43c6SManoel Roemmer DP("Looking up symbol: %s\n", SymbolName);
766b9e43c6SManoel Roemmer uint64_t SymbolTargetAddr =
776b9e43c6SManoel Roemmer veo_get_sym(ProcHandles[device_id], VeoLibHandle, SymbolName);
786b9e43c6SManoel Roemmer __tgt_offload_entry Entry;
796b9e43c6SManoel Roemmer
806b9e43c6SManoel Roemmer if (!SymbolTargetAddr) {
816b9e43c6SManoel Roemmer DP("Symbol %s not found in target image\n", SymbolName);
826b9e43c6SManoel Roemmer Entry = {NULL, NULL, 0, 0, 0};
836b9e43c6SManoel Roemmer } else {
846b9e43c6SManoel Roemmer DP("Found symbol %s successfully in target image (addr: %p)\n",
856b9e43c6SManoel Roemmer SymbolName, reinterpret_cast<void *>(SymbolTargetAddr));
86ea939571SJohannes Doerfert Entry = {reinterpret_cast<void *>(SymbolTargetAddr), i->name, i->size,
87ea939571SJohannes Doerfert i->flags, 0};
886b9e43c6SManoel Roemmer }
896b9e43c6SManoel Roemmer
906b9e43c6SManoel Roemmer T.push_back(Entry);
916b9e43c6SManoel Roemmer }
926b9e43c6SManoel Roemmer
936b9e43c6SManoel Roemmer FuncOrGblEntry[device_id].back().Table.EntriesBegin = &T.front();
946b9e43c6SManoel Roemmer FuncOrGblEntry[device_id].back().Table.EntriesEnd = &T.back() + 1;
956b9e43c6SManoel Roemmer }
966b9e43c6SManoel Roemmer
getOffloadTable(int32_t device_id)976b9e43c6SManoel Roemmer __tgt_target_table *getOffloadTable(int32_t device_id) {
986b9e43c6SManoel Roemmer return &FuncOrGblEntry[device_id].back().Table;
996b9e43c6SManoel Roemmer }
1006b9e43c6SManoel Roemmer
RTLDeviceInfoTy()1016b9e43c6SManoel Roemmer RTLDeviceInfoTy() {
1026b9e43c6SManoel Roemmer
1036b9e43c6SManoel Roemmer struct ve_nodeinfo node_info;
1046b9e43c6SManoel Roemmer ve_node_info(&node_info);
1056b9e43c6SManoel Roemmer
1066b9e43c6SManoel Roemmer // Build a predictable mapping between VE node ids and OpenMP device ids.
1076b9e43c6SManoel Roemmer // This is necessary, because nodes can be missing or offline and (active)
1086b9e43c6SManoel Roemmer // node ids are thus not consecutive. The entries in ve_nodeinfo may also
1096b9e43c6SManoel Roemmer // not be in the order of their node ids.
1106b9e43c6SManoel Roemmer for (int i = 0; i < node_info.total_node_count; ++i) {
1116b9e43c6SManoel Roemmer if (node_info.status[i] == 0) {
1126b9e43c6SManoel Roemmer NodeIds.push_back(node_info.nodeid[i]);
1136b9e43c6SManoel Roemmer }
1146b9e43c6SManoel Roemmer }
1156b9e43c6SManoel Roemmer
1166b9e43c6SManoel Roemmer // Because the entries in ve_nodeinfo may not be in the order of their node
1176b9e43c6SManoel Roemmer // ids, we sort NodeIds to get a predictable mapping.
1186b9e43c6SManoel Roemmer std::sort(NodeIds.begin(), NodeIds.end());
1196b9e43c6SManoel Roemmer
1206b9e43c6SManoel Roemmer int NumDevices = NodeIds.size();
1216b9e43c6SManoel Roemmer DP("Found %i VE devices\n", NumDevices);
1226b9e43c6SManoel Roemmer ProcHandles.resize(NumDevices, NULL);
1236b9e43c6SManoel Roemmer Contexts.resize(NumDevices, NULL);
1246b9e43c6SManoel Roemmer FuncOrGblEntry.resize(NumDevices);
1256b9e43c6SManoel Roemmer LibraryHandles.resize(NumDevices);
1266b9e43c6SManoel Roemmer }
1276b9e43c6SManoel Roemmer
~RTLDeviceInfoTy()1286b9e43c6SManoel Roemmer ~RTLDeviceInfoTy() {
1296b9e43c6SManoel Roemmer for (auto &ctx : Contexts) {
1306b9e43c6SManoel Roemmer if (ctx != NULL) {
1316b9e43c6SManoel Roemmer if (veo_context_close(ctx) != 0) {
1326b9e43c6SManoel Roemmer DP("Failed to close VEO context.\n");
1336b9e43c6SManoel Roemmer }
1346b9e43c6SManoel Roemmer }
1356b9e43c6SManoel Roemmer }
1366b9e43c6SManoel Roemmer
1376b9e43c6SManoel Roemmer for (auto &hdl : ProcHandles) {
1386b9e43c6SManoel Roemmer if (hdl != NULL) {
1396b9e43c6SManoel Roemmer veo_proc_destroy(hdl);
1406b9e43c6SManoel Roemmer }
1416b9e43c6SManoel Roemmer }
1426b9e43c6SManoel Roemmer
1436b9e43c6SManoel Roemmer for (auto &lib : DynLibs) {
1446b9e43c6SManoel Roemmer if (lib.FileName) {
1456b9e43c6SManoel Roemmer remove(lib.FileName);
1466b9e43c6SManoel Roemmer }
1476b9e43c6SManoel Roemmer }
1486b9e43c6SManoel Roemmer }
1496b9e43c6SManoel Roemmer };
1506b9e43c6SManoel Roemmer
1516b9e43c6SManoel Roemmer static RTLDeviceInfoTy DeviceInfo;
1526b9e43c6SManoel Roemmer
target_run_function_wait(uint32_t DeviceID,uint64_t FuncAddr,struct veo_args * args,uint64_t * RetVal)1536b9e43c6SManoel Roemmer static int target_run_function_wait(uint32_t DeviceID, uint64_t FuncAddr,
1546b9e43c6SManoel Roemmer struct veo_args *args, uint64_t *RetVal) {
1556b9e43c6SManoel Roemmer DP("Running function with entry point %p\n",
1566b9e43c6SManoel Roemmer reinterpret_cast<void *>(FuncAddr));
1576b9e43c6SManoel Roemmer uint64_t RequestHandle =
1586b9e43c6SManoel Roemmer veo_call_async(DeviceInfo.Contexts[DeviceID], FuncAddr, args);
1596b9e43c6SManoel Roemmer if (RequestHandle == VEO_REQUEST_ID_INVALID) {
1606b9e43c6SManoel Roemmer DP("Execution of entry point %p failed\n",
1616b9e43c6SManoel Roemmer reinterpret_cast<void *>(FuncAddr));
1626b9e43c6SManoel Roemmer return OFFLOAD_FAIL;
1636b9e43c6SManoel Roemmer }
1646b9e43c6SManoel Roemmer
1656b9e43c6SManoel Roemmer DP("Function at address %p called (VEO request ID: %" PRIu64 ")\n",
1666b9e43c6SManoel Roemmer reinterpret_cast<void *>(FuncAddr), RequestHandle);
1676b9e43c6SManoel Roemmer
1686b9e43c6SManoel Roemmer int ret = veo_call_wait_result(DeviceInfo.Contexts[DeviceID], RequestHandle,
1696b9e43c6SManoel Roemmer RetVal);
1706b9e43c6SManoel Roemmer if (ret != 0) {
1716b9e43c6SManoel Roemmer DP("Waiting for entry point %p failed (Error code %d)\n",
1726b9e43c6SManoel Roemmer reinterpret_cast<void *>(FuncAddr), ret);
1736b9e43c6SManoel Roemmer return OFFLOAD_FAIL;
1746b9e43c6SManoel Roemmer }
1756b9e43c6SManoel Roemmer return OFFLOAD_SUCCESS;
1766b9e43c6SManoel Roemmer }
1776b9e43c6SManoel Roemmer
1786b9e43c6SManoel Roemmer // Return the number of available devices of the type supported by the
1796b9e43c6SManoel Roemmer // target RTL.
__tgt_rtl_number_of_devices(void)1806b9e43c6SManoel Roemmer int32_t __tgt_rtl_number_of_devices(void) { return DeviceInfo.NodeIds.size(); }
1816b9e43c6SManoel Roemmer
1826b9e43c6SManoel Roemmer // Return an integer different from zero if the provided device image can be
1836b9e43c6SManoel Roemmer // supported by the runtime. The functionality is similar to comparing the
1846b9e43c6SManoel Roemmer // result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
1856b9e43c6SManoel Roemmer // lightweight query to determine if the RTL is suitable for an image without
1866b9e43c6SManoel Roemmer // having to load the library, which can be expensive.
__tgt_rtl_is_valid_binary(__tgt_device_image * Image)1876b9e43c6SManoel Roemmer int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
1886b9e43c6SManoel Roemmer #if TARGET_ELF_ID < 1
1896b9e43c6SManoel Roemmer return 0;
1906b9e43c6SManoel Roemmer #else
1916b9e43c6SManoel Roemmer return elf_check_machine(Image, TARGET_ELF_ID);
1926b9e43c6SManoel Roemmer #endif
1936b9e43c6SManoel Roemmer }
1946b9e43c6SManoel Roemmer
1956b9e43c6SManoel Roemmer // Initialize the specified device. In case of success return 0; otherwise
1966b9e43c6SManoel Roemmer // return an error code.
__tgt_rtl_init_device(int32_t ID)1976b9e43c6SManoel Roemmer int32_t __tgt_rtl_init_device(int32_t ID) {
1986b9e43c6SManoel Roemmer DP("Available VEO version: %i\n", veo_api_version());
1996b9e43c6SManoel Roemmer
2006b9e43c6SManoel Roemmer // At the moment we do not really initialize (i.e. create a process or
2016b9e43c6SManoel Roemmer // context on) the device here, but in "__tgt_rtl_load_binary".
2026b9e43c6SManoel Roemmer // The reason for this is, that, when we create a process for a statically
2036b9e43c6SManoel Roemmer // linked binary, the VEO api needs us to already supply the binary (but we
2046b9e43c6SManoel Roemmer // can load a dynamically linked binary later, after we create the process).
2056b9e43c6SManoel Roemmer // At this stage, we cannot check if we have a dynamically or statically
2066b9e43c6SManoel Roemmer // linked binary so we defer process creation until we know.
2076b9e43c6SManoel Roemmer return OFFLOAD_SUCCESS;
2086b9e43c6SManoel Roemmer }
2096b9e43c6SManoel Roemmer
2106b9e43c6SManoel Roemmer // Pass an executable image section described by image to the specified
2116b9e43c6SManoel Roemmer // device and prepare an address table of target entities. In case of error,
2126b9e43c6SManoel Roemmer // return NULL. Otherwise, return a pointer to the built address table.
2136b9e43c6SManoel Roemmer // Individual entries in the table may also be NULL, when the corresponding
2146b9e43c6SManoel Roemmer // offload region is not supported on the target device.
__tgt_rtl_load_binary(int32_t ID,__tgt_device_image * Image)2156b9e43c6SManoel Roemmer __tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
2166b9e43c6SManoel Roemmer __tgt_device_image *Image) {
2176b9e43c6SManoel Roemmer DP("Dev %d: load binary from " DPxMOD " image\n", ID,
2186b9e43c6SManoel Roemmer DPxPTR(Image->ImageStart));
2196b9e43c6SManoel Roemmer
2206b9e43c6SManoel Roemmer assert(ID >= 0 && "bad dev id");
2216b9e43c6SManoel Roemmer
2226b9e43c6SManoel Roemmer size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart;
2236b9e43c6SManoel Roemmer size_t NumEntries = (size_t)(Image->EntriesEnd - Image->EntriesBegin);
2246b9e43c6SManoel Roemmer DP("Expecting to have %zd entries defined.\n", NumEntries);
2256b9e43c6SManoel Roemmer
2266b9e43c6SManoel Roemmer // load dynamic library and get the entry points. We use the dl library
2276b9e43c6SManoel Roemmer // to do the loading of the library, but we could do it directly to avoid the
2286b9e43c6SManoel Roemmer // dump to the temporary file.
2296b9e43c6SManoel Roemmer //
2306b9e43c6SManoel Roemmer // 1) Create tmp file with the library contents.
2316b9e43c6SManoel Roemmer // 2) Use dlopen to load the file and dlsym to retrieve the symbols.
2326b9e43c6SManoel Roemmer char tmp_name[] = "/tmp/tmpfile_XXXXXX";
2336b9e43c6SManoel Roemmer int tmp_fd = mkstemp(tmp_name);
2346b9e43c6SManoel Roemmer
2356b9e43c6SManoel Roemmer if (tmp_fd == -1) {
2366b9e43c6SManoel Roemmer return NULL;
2376b9e43c6SManoel Roemmer }
2386b9e43c6SManoel Roemmer
2396b9e43c6SManoel Roemmer FILE *ftmp = fdopen(tmp_fd, "wb");
2406b9e43c6SManoel Roemmer
2416b9e43c6SManoel Roemmer if (!ftmp) {
2426b9e43c6SManoel Roemmer DP("fdopen() for %s failed. Could not write target image\n", tmp_name);
2436b9e43c6SManoel Roemmer return NULL;
2446b9e43c6SManoel Roemmer }
2456b9e43c6SManoel Roemmer
2466b9e43c6SManoel Roemmer fwrite(Image->ImageStart, ImageSize, 1, ftmp);
2476b9e43c6SManoel Roemmer
2486b9e43c6SManoel Roemmer // at least for the static case we need to change the permissions
2496b9e43c6SManoel Roemmer chmod(tmp_name, 0700);
2506b9e43c6SManoel Roemmer
2516b9e43c6SManoel Roemmer DP("Wrote target image to %s. ImageSize=%zu\n", tmp_name, ImageSize);
2526b9e43c6SManoel Roemmer
2536b9e43c6SManoel Roemmer fclose(ftmp);
2546b9e43c6SManoel Roemmer
2556b9e43c6SManoel Roemmer // See comment in "__tgt_rtl_init_device"
2566b9e43c6SManoel Roemmer bool is_dyn = true;
2576b9e43c6SManoel Roemmer if (DeviceInfo.ProcHandles[ID] == NULL) {
2586b9e43c6SManoel Roemmer struct veo_proc_handle *proc_handle;
2596b9e43c6SManoel Roemmer is_dyn = elf_is_dynamic(Image);
2606b9e43c6SManoel Roemmer // If we have a dynamically linked image, we create the process handle, then
2616b9e43c6SManoel Roemmer // the thread, and then load the image.
2626b9e43c6SManoel Roemmer // If we have a statically linked image, we need to create the process
2636b9e43c6SManoel Roemmer // handle and load the image at the same time with veo_proc_create_static().
2646b9e43c6SManoel Roemmer if (is_dyn) {
2656b9e43c6SManoel Roemmer proc_handle = veo_proc_create(DeviceInfo.NodeIds[ID]);
2666b9e43c6SManoel Roemmer if (!proc_handle) {
2676b9e43c6SManoel Roemmer DP("veo_proc_create() failed for device %d\n", ID);
2686b9e43c6SManoel Roemmer return NULL;
2696b9e43c6SManoel Roemmer }
2706b9e43c6SManoel Roemmer } else {
2716b9e43c6SManoel Roemmer proc_handle = veo_proc_create_static(DeviceInfo.NodeIds[ID], tmp_name);
2726b9e43c6SManoel Roemmer if (!proc_handle) {
2736b9e43c6SManoel Roemmer DP("veo_proc_create_static() failed for device %d, image=%s\n", ID,
2746b9e43c6SManoel Roemmer tmp_name);
2756b9e43c6SManoel Roemmer return NULL;
2766b9e43c6SManoel Roemmer }
2776b9e43c6SManoel Roemmer }
2786b9e43c6SManoel Roemmer DeviceInfo.ProcHandles[ID] = proc_handle;
2796b9e43c6SManoel Roemmer }
2806b9e43c6SManoel Roemmer
2816b9e43c6SManoel Roemmer if (DeviceInfo.Contexts[ID] == NULL) {
2826b9e43c6SManoel Roemmer struct veo_thr_ctxt *ctx = veo_context_open(DeviceInfo.ProcHandles[ID]);
2836b9e43c6SManoel Roemmer
2846b9e43c6SManoel Roemmer if (!ctx) {
2856b9e43c6SManoel Roemmer DP("veo_context_open() failed: %s\n", std::strerror(errno));
2866b9e43c6SManoel Roemmer return NULL;
2876b9e43c6SManoel Roemmer }
2886b9e43c6SManoel Roemmer
2896b9e43c6SManoel Roemmer DeviceInfo.Contexts[ID] = ctx;
2906b9e43c6SManoel Roemmer }
2916b9e43c6SManoel Roemmer
2926b9e43c6SManoel Roemmer DP("Aurora device successfully initialized with loaded binary: "
2936b9e43c6SManoel Roemmer "proc_handle=%p, ctx=%p\n",
2946b9e43c6SManoel Roemmer DeviceInfo.ProcHandles[ID], DeviceInfo.Contexts[ID]);
2956b9e43c6SManoel Roemmer
2966b9e43c6SManoel Roemmer uint64_t LibHandle = 0UL;
2976b9e43c6SManoel Roemmer if (is_dyn) {
2986b9e43c6SManoel Roemmer LibHandle = veo_load_library(DeviceInfo.ProcHandles[ID], tmp_name);
2996b9e43c6SManoel Roemmer
3006b9e43c6SManoel Roemmer if (!LibHandle) {
3016b9e43c6SManoel Roemmer DP("veo_load_library() failed: LibHandle=%" PRIu64
3026b9e43c6SManoel Roemmer " Name=%s. Set env VEORUN_BIN for static linked target code.\n",
3036b9e43c6SManoel Roemmer LibHandle, tmp_name);
3046b9e43c6SManoel Roemmer return NULL;
3056b9e43c6SManoel Roemmer }
3066b9e43c6SManoel Roemmer
3076b9e43c6SManoel Roemmer DP("Successfully loaded library dynamically\n");
3086b9e43c6SManoel Roemmer } else {
3096b9e43c6SManoel Roemmer DP("Symbol table is expected to have been created by "
3106b9e43c6SManoel Roemmer "veo_create_proc_static()\n");
3116b9e43c6SManoel Roemmer }
3126b9e43c6SManoel Roemmer
3136b9e43c6SManoel Roemmer DynLibTy Lib = {tmp_name, LibHandle};
3146b9e43c6SManoel Roemmer DeviceInfo.DynLibs.push_back(Lib);
3156b9e43c6SManoel Roemmer DeviceInfo.LibraryHandles[ID] = LibHandle;
3166b9e43c6SManoel Roemmer
3176b9e43c6SManoel Roemmer DeviceInfo.buildOffloadTableFromHost(ID, LibHandle, Image->EntriesBegin,
3186b9e43c6SManoel Roemmer Image->EntriesEnd);
3196b9e43c6SManoel Roemmer
3206b9e43c6SManoel Roemmer return DeviceInfo.getOffloadTable(ID);
3216b9e43c6SManoel Roemmer }
3226b9e43c6SManoel Roemmer
3236b9e43c6SManoel Roemmer // Allocate data on the particular target device, of the specified size.
3246b9e43c6SManoel Roemmer // HostPtr is a address of the host data the allocated target data
3256b9e43c6SManoel Roemmer // will be associated with (HostPtr may be NULL if it is not known at
3266b9e43c6SManoel Roemmer // allocation time, like for example it would be for target data that
3276b9e43c6SManoel Roemmer // is allocated by omp_target_alloc() API). Return address of the
3286b9e43c6SManoel Roemmer // allocated data on the target that will be used by libomptarget.so to
3296b9e43c6SManoel Roemmer // initialize the target data mapping structures. These addresses are
3306b9e43c6SManoel Roemmer // used to generate a table of target variables to pass to
3316b9e43c6SManoel Roemmer // __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
3326b9e43c6SManoel Roemmer // case an error occurred on the target device.
__tgt_rtl_data_alloc(int32_t ID,int64_t Size,void * HostPtr,int32_t kind)3332468fdd9SGeorge Rokos void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr,
3342468fdd9SGeorge Rokos int32_t kind) {
3356b9e43c6SManoel Roemmer int ret;
3366b9e43c6SManoel Roemmer uint64_t addr;
3376b9e43c6SManoel Roemmer
3382468fdd9SGeorge Rokos if (kind != TARGET_ALLOC_DEFAULT) {
3392468fdd9SGeorge Rokos REPORT("Invalid target data allocation kind or requested allocator not "
3402468fdd9SGeorge Rokos "implemented yet\n");
3412468fdd9SGeorge Rokos return NULL;
3422468fdd9SGeorge Rokos }
3432468fdd9SGeorge Rokos
3446b9e43c6SManoel Roemmer if (DeviceInfo.ProcHandles[ID] == NULL) {
3456b9e43c6SManoel Roemmer struct veo_proc_handle *proc_handle;
3466b9e43c6SManoel Roemmer proc_handle = veo_proc_create(DeviceInfo.NodeIds[ID]);
3476b9e43c6SManoel Roemmer if (!proc_handle) {
3486b9e43c6SManoel Roemmer DP("veo_proc_create() failed for device %d\n", ID);
3496b9e43c6SManoel Roemmer return NULL;
3506b9e43c6SManoel Roemmer }
3516b9e43c6SManoel Roemmer DeviceInfo.ProcHandles[ID] = proc_handle;
3526b9e43c6SManoel Roemmer DP("Aurora device successfully initialized: proc_handle=%p", proc_handle);
3536b9e43c6SManoel Roemmer }
3546b9e43c6SManoel Roemmer
3556b9e43c6SManoel Roemmer ret = veo_alloc_mem(DeviceInfo.ProcHandles[ID], &addr, Size);
3566b9e43c6SManoel Roemmer DP("Allocate target memory: device=%d, target addr=%p, size=%" PRIu64 "\n",
3576b9e43c6SManoel Roemmer ID, reinterpret_cast<void *>(addr), Size);
3586b9e43c6SManoel Roemmer if (ret != 0) {
359ea939571SJohannes Doerfert DP("veo_alloc_mem(%d, %p, %" PRIu64 ") failed with error code %d\n", ID,
360ea939571SJohannes Doerfert reinterpret_cast<void *>(addr), Size, ret);
3616b9e43c6SManoel Roemmer return NULL;
3626b9e43c6SManoel Roemmer }
3636b9e43c6SManoel Roemmer
3646b9e43c6SManoel Roemmer return reinterpret_cast<void *>(addr);
3656b9e43c6SManoel Roemmer }
3666b9e43c6SManoel Roemmer
3676b9e43c6SManoel Roemmer // Pass the data content to the target device using the target address.
3686b9e43c6SManoel Roemmer // In case of success, return zero. Otherwise, return an error code.
__tgt_rtl_data_submit(int32_t ID,void * TargetPtr,void * HostPtr,int64_t Size)3696b9e43c6SManoel Roemmer int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
3706b9e43c6SManoel Roemmer int64_t Size) {
3716b9e43c6SManoel Roemmer int ret = veo_write_mem(DeviceInfo.ProcHandles[ID], (uint64_t)TargetPtr,
3726b9e43c6SManoel Roemmer HostPtr, (size_t)Size);
3736b9e43c6SManoel Roemmer if (ret != 0) {
3746b9e43c6SManoel Roemmer DP("veo_write_mem() failed with error code %d\n", ret);
3756b9e43c6SManoel Roemmer return OFFLOAD_FAIL;
3766b9e43c6SManoel Roemmer }
3776b9e43c6SManoel Roemmer return OFFLOAD_SUCCESS;
3786b9e43c6SManoel Roemmer }
3796b9e43c6SManoel Roemmer
3806b9e43c6SManoel Roemmer // Retrieve the data content from the target device using its address.
3816b9e43c6SManoel Roemmer // In case of success, return zero. Otherwise, return an error code.
__tgt_rtl_data_retrieve(int32_t ID,void * HostPtr,void * TargetPtr,int64_t Size)3826b9e43c6SManoel Roemmer int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
3836b9e43c6SManoel Roemmer int64_t Size) {
3846b9e43c6SManoel Roemmer int ret = veo_read_mem(DeviceInfo.ProcHandles[ID], HostPtr,
3856b9e43c6SManoel Roemmer (uint64_t)TargetPtr, Size);
3866b9e43c6SManoel Roemmer if (ret != 0) {
3876b9e43c6SManoel Roemmer DP("veo_read_mem() failed with error code %d\n", ret);
3886b9e43c6SManoel Roemmer return OFFLOAD_FAIL;
3896b9e43c6SManoel Roemmer }
3906b9e43c6SManoel Roemmer return OFFLOAD_SUCCESS;
3916b9e43c6SManoel Roemmer }
3926b9e43c6SManoel Roemmer
3936b9e43c6SManoel Roemmer // De-allocate the data referenced by target ptr on the device. In case of
3946b9e43c6SManoel Roemmer // success, return zero. Otherwise, return an error code.
__tgt_rtl_data_delete(int32_t ID,void * TargetPtr)3956b9e43c6SManoel Roemmer int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr) {
3966b9e43c6SManoel Roemmer int ret = veo_free_mem(DeviceInfo.ProcHandles[ID], (uint64_t)TargetPtr);
3976b9e43c6SManoel Roemmer
3986b9e43c6SManoel Roemmer if (ret != 0) {
3996b9e43c6SManoel Roemmer DP("veo_free_mem() failed with error code %d\n", ret);
4006b9e43c6SManoel Roemmer return OFFLOAD_FAIL;
4016b9e43c6SManoel Roemmer }
4026b9e43c6SManoel Roemmer return OFFLOAD_SUCCESS;
4036b9e43c6SManoel Roemmer }
4046b9e43c6SManoel Roemmer
4056b9e43c6SManoel Roemmer // Similar to __tgt_rtl_run_target_region, but additionally specify the
4066b9e43c6SManoel Roemmer // number of teams to be created and a number of threads in each team.
__tgt_rtl_run_target_team_region(int32_t ID,void * Entry,void ** Args,ptrdiff_t * Offsets,int32_t NumArgs,int32_t NumTeams,int32_t ThreadLimit,uint64_t loop_tripcount)4076b9e43c6SManoel Roemmer int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
4086b9e43c6SManoel Roemmer ptrdiff_t *Offsets, int32_t NumArgs,
4096b9e43c6SManoel Roemmer int32_t NumTeams, int32_t ThreadLimit,
4106b9e43c6SManoel Roemmer uint64_t loop_tripcount) {
4116b9e43c6SManoel Roemmer int ret;
4126b9e43c6SManoel Roemmer
4136b9e43c6SManoel Roemmer // ignore team num and thread limit.
4146b9e43c6SManoel Roemmer std::vector<void *> ptrs(NumArgs);
4156b9e43c6SManoel Roemmer
4166b9e43c6SManoel Roemmer struct veo_args *TargetArgs;
4176b9e43c6SManoel Roemmer TargetArgs = veo_args_alloc();
4186b9e43c6SManoel Roemmer
4196b9e43c6SManoel Roemmer if (TargetArgs == NULL) {
4206b9e43c6SManoel Roemmer DP("Could not allocate VEO args\n");
4216b9e43c6SManoel Roemmer return OFFLOAD_FAIL;
4226b9e43c6SManoel Roemmer }
4236b9e43c6SManoel Roemmer
4246b9e43c6SManoel Roemmer for (int i = 0; i < NumArgs; ++i) {
4256b9e43c6SManoel Roemmer ret = veo_args_set_u64(TargetArgs, i, (intptr_t)Args[i]);
4266b9e43c6SManoel Roemmer
4276b9e43c6SManoel Roemmer if (ret != 0) {
428ea939571SJohannes Doerfert DP("veo_args_set_u64() has returned %d for argnum=%d and value %p\n", ret,
429ea939571SJohannes Doerfert i, Args[i]);
4306b9e43c6SManoel Roemmer return OFFLOAD_FAIL;
4316b9e43c6SManoel Roemmer }
4326b9e43c6SManoel Roemmer }
4336b9e43c6SManoel Roemmer
4346b9e43c6SManoel Roemmer uint64_t RetVal;
4356b9e43c6SManoel Roemmer if (target_run_function_wait(ID, reinterpret_cast<uint64_t>(Entry),
4366b9e43c6SManoel Roemmer TargetArgs, &RetVal) != OFFLOAD_SUCCESS) {
4376b9e43c6SManoel Roemmer veo_args_free(TargetArgs);
4386b9e43c6SManoel Roemmer return OFFLOAD_FAIL;
4396b9e43c6SManoel Roemmer }
4406b9e43c6SManoel Roemmer veo_args_free(TargetArgs);
4416b9e43c6SManoel Roemmer return OFFLOAD_SUCCESS;
4426b9e43c6SManoel Roemmer }
4436b9e43c6SManoel Roemmer
4446b9e43c6SManoel Roemmer // Transfer control to the offloaded entry Entry on the target device.
4456b9e43c6SManoel Roemmer // Args and Offsets are arrays of NumArgs size of target addresses and
4466b9e43c6SManoel Roemmer // offsets. An offset should be added to the target address before passing it
4476b9e43c6SManoel Roemmer // to the outlined function on device side. In case of success, return zero.
4486b9e43c6SManoel Roemmer // Otherwise, return an error code.
__tgt_rtl_run_target_region(int32_t ID,void * Entry,void ** Args,ptrdiff_t * Offsets,int32_t NumArgs)4496b9e43c6SManoel Roemmer int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
4506b9e43c6SManoel Roemmer ptrdiff_t *Offsets, int32_t NumArgs) {
4516b9e43c6SManoel Roemmer return __tgt_rtl_run_target_team_region(ID, Entry, Args, Offsets, NumArgs, 1,
4526b9e43c6SManoel Roemmer 1, 0);
4536b9e43c6SManoel Roemmer }
454542d9c21SManoel Roemmer
__tgt_rtl_supports_empty_images()455542d9c21SManoel Roemmer int32_t __tgt_rtl_supports_empty_images() { return 1; }
456*2b6f2008SJoseph Huber
457*2b6f2008SJoseph Huber // VEC plugin's internal InfoLevel.
458*2b6f2008SJoseph Huber std::atomic<uint32_t> InfoLevel;
459