1 //===----------- rtl.cpp - Target independent OpenMP target RTL -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Functionality for handling RTL plugins. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "device.h" 14 #include "private.h" 15 #include "rtl.h" 16 17 #include <cassert> 18 #include <cstdlib> 19 #include <cstring> 20 #include <dlfcn.h> 21 #include <mutex> 22 #include <string> 23 24 // List of all plugins that can support offloading. 25 static const char *RTLNames[] = { 26 /* PowerPC target */ "libomptarget.rtl.ppc64.so", 27 /* x86_64 target */ "libomptarget.rtl.x86_64.so", 28 /* CUDA target */ "libomptarget.rtl.cuda.so", 29 /* AArch64 target */ "libomptarget.rtl.aarch64.so"}; 30 31 RTLsTy *RTLs; 32 std::mutex *RTLsMtx; 33 34 HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable; 35 std::mutex *TrlTblMtx; 36 37 HostPtrToTableMapTy *HostPtrToTableMap; 38 std::mutex *TblMapMtx; 39 40 __attribute__((constructor(101))) void init() { 41 DP("Init target library!\n"); 42 RTLs = new RTLsTy(); 43 RTLsMtx = new std::mutex(); 44 HostEntriesBeginToTransTable = new HostEntriesBeginToTransTableTy(); 45 TrlTblMtx = new std::mutex(); 46 HostPtrToTableMap = new HostPtrToTableMapTy(); 47 TblMapMtx = new std::mutex(); 48 } 49 50 __attribute__((destructor(101))) void deinit() { 51 DP("Deinit target library!\n"); 52 delete RTLs; 53 delete RTLsMtx; 54 delete HostEntriesBeginToTransTable; 55 delete TrlTblMtx; 56 delete HostPtrToTableMap; 57 delete TblMapMtx; 58 } 59 60 void RTLsTy::LoadRTLs() { 61 #ifdef OMPTARGET_DEBUG 62 if (char *envStr = getenv("LIBOMPTARGET_DEBUG")) { 63 DebugLevel = std::stoi(envStr); 64 } 65 #endif // OMPTARGET_DEBUG 66 67 // Parse environment variable OMP_TARGET_OFFLOAD (if set) 68 TargetOffloadPolicy = (kmp_target_offload_kind_t) __kmpc_get_target_offload(); 69 if (TargetOffloadPolicy == tgt_disabled) { 70 return; 71 } 72 73 DP("Loading RTLs...\n"); 74 75 // Attempt to open all the plugins and, if they exist, check if the interface 76 // is correct and if they are supporting any devices. 77 for (auto *Name : RTLNames) { 78 DP("Loading library '%s'...\n", Name); 79 void *dynlib_handle = dlopen(Name, RTLD_NOW); 80 81 if (!dynlib_handle) { 82 // Library does not exist or cannot be found. 83 DP("Unable to load library '%s': %s!\n", Name, dlerror()); 84 continue; 85 } 86 87 DP("Successfully loaded library '%s'!\n", Name); 88 89 // Retrieve the RTL information from the runtime library. 90 RTLInfoTy R; 91 92 R.LibraryHandler = dynlib_handle; 93 R.isUsed = false; 94 95 #ifdef OMPTARGET_DEBUG 96 R.RTLName = Name; 97 #endif 98 99 if (!(*((void **)&R.is_valid_binary) = 100 dlsym(dynlib_handle, "__tgt_rtl_is_valid_binary"))) 101 continue; 102 if (!(*((void **)&R.number_of_devices) = 103 dlsym(dynlib_handle, "__tgt_rtl_number_of_devices"))) 104 continue; 105 if (!(*((void **)&R.init_device) = 106 dlsym(dynlib_handle, "__tgt_rtl_init_device"))) 107 continue; 108 if (!(*((void **)&R.load_binary) = 109 dlsym(dynlib_handle, "__tgt_rtl_load_binary"))) 110 continue; 111 if (!(*((void **)&R.data_alloc) = 112 dlsym(dynlib_handle, "__tgt_rtl_data_alloc"))) 113 continue; 114 if (!(*((void **)&R.data_submit) = 115 dlsym(dynlib_handle, "__tgt_rtl_data_submit"))) 116 continue; 117 if (!(*((void **)&R.data_retrieve) = 118 dlsym(dynlib_handle, "__tgt_rtl_data_retrieve"))) 119 continue; 120 if (!(*((void **)&R.data_delete) = 121 dlsym(dynlib_handle, "__tgt_rtl_data_delete"))) 122 continue; 123 if (!(*((void **)&R.run_region) = 124 dlsym(dynlib_handle, "__tgt_rtl_run_target_region"))) 125 continue; 126 if (!(*((void **)&R.run_team_region) = 127 dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region"))) 128 continue; 129 130 // Optional functions 131 *((void **)&R.init_requires) = 132 dlsym(dynlib_handle, "__tgt_rtl_init_requires"); 133 *((void **)&R.data_submit_async) = 134 dlsym(dynlib_handle, "__tgt_rtl_data_submit_async"); 135 *((void **)&R.data_retrieve_async) = 136 dlsym(dynlib_handle, "__tgt_rtl_data_retrieve_async"); 137 *((void **)&R.run_region_async) = 138 dlsym(dynlib_handle, "__tgt_rtl_run_target_region_async"); 139 *((void **)&R.run_team_region_async) = 140 dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region_async"); 141 *((void **)&R.synchronize) = dlsym(dynlib_handle, "__tgt_rtl_synchronize"); 142 143 // No devices are supported by this RTL? 144 if (!(R.NumberOfDevices = R.number_of_devices())) { 145 DP("No devices supported in this RTL\n"); 146 continue; 147 } 148 149 DP("Registering RTL %s supporting %d devices!\n", R.RTLName.c_str(), 150 R.NumberOfDevices); 151 152 // The RTL is valid! Will save the information in the RTLs list. 153 AllRTLs.push_back(R); 154 } 155 156 DP("RTLs loaded!\n"); 157 158 return; 159 } 160 161 //////////////////////////////////////////////////////////////////////////////// 162 // Functionality for registering libs 163 164 static void RegisterImageIntoTranslationTable(TranslationTable &TT, 165 RTLInfoTy &RTL, __tgt_device_image *image) { 166 167 // same size, as when we increase one, we also increase the other. 168 assert(TT.TargetsTable.size() == TT.TargetsImages.size() && 169 "We should have as many images as we have tables!"); 170 171 // Resize the Targets Table and Images to accommodate the new targets if 172 // required 173 unsigned TargetsTableMinimumSize = RTL.Idx + RTL.NumberOfDevices; 174 175 if (TT.TargetsTable.size() < TargetsTableMinimumSize) { 176 TT.TargetsImages.resize(TargetsTableMinimumSize, 0); 177 TT.TargetsTable.resize(TargetsTableMinimumSize, 0); 178 } 179 180 // Register the image in all devices for this target type. 181 for (int32_t i = 0; i < RTL.NumberOfDevices; ++i) { 182 // If we are changing the image we are also invalidating the target table. 183 if (TT.TargetsImages[RTL.Idx + i] != image) { 184 TT.TargetsImages[RTL.Idx + i] = image; 185 TT.TargetsTable[RTL.Idx + i] = 0; // lazy initialization of target table. 186 } 187 } 188 } 189 190 //////////////////////////////////////////////////////////////////////////////// 191 // Functionality for registering Ctors/Dtors 192 193 static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc, 194 __tgt_device_image *img, RTLInfoTy *RTL) { 195 196 for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) { 197 DeviceTy &Device = Devices[RTL->Idx + i]; 198 Device.PendingGlobalsMtx.lock(); 199 Device.HasPendingGlobals = true; 200 for (__tgt_offload_entry *entry = img->EntriesBegin; 201 entry != img->EntriesEnd; ++entry) { 202 if (entry->flags & OMP_DECLARE_TARGET_CTOR) { 203 DP("Adding ctor " DPxMOD " to the pending list.\n", 204 DPxPTR(entry->addr)); 205 Device.PendingCtorsDtors[desc].PendingCtors.push_back(entry->addr); 206 } else if (entry->flags & OMP_DECLARE_TARGET_DTOR) { 207 // Dtors are pushed in reverse order so they are executed from end 208 // to beginning when unregistering the library! 209 DP("Adding dtor " DPxMOD " to the pending list.\n", 210 DPxPTR(entry->addr)); 211 Device.PendingCtorsDtors[desc].PendingDtors.push_front(entry->addr); 212 } 213 214 if (entry->flags & OMP_DECLARE_TARGET_LINK) { 215 DP("The \"link\" attribute is not yet supported!\n"); 216 } 217 } 218 Device.PendingGlobalsMtx.unlock(); 219 } 220 } 221 222 void RTLsTy::RegisterRequires(int64_t flags) { 223 // TODO: add more elaborate check. 224 // Minimal check: only set requires flags if previous value 225 // is undefined. This ensures that only the first call to this 226 // function will set the requires flags. All subsequent calls 227 // will be checked for compatibility. 228 assert(flags != OMP_REQ_UNDEFINED && 229 "illegal undefined flag for requires directive!"); 230 if (RequiresFlags == OMP_REQ_UNDEFINED) { 231 RequiresFlags = flags; 232 return; 233 } 234 235 // If multiple compilation units are present enforce 236 // consistency across all of them for require clauses: 237 // - reverse_offload 238 // - unified_address 239 // - unified_shared_memory 240 if ((RequiresFlags & OMP_REQ_REVERSE_OFFLOAD) != 241 (flags & OMP_REQ_REVERSE_OFFLOAD)) { 242 FATAL_MESSAGE0(1, 243 "'#pragma omp requires reverse_offload' not used consistently!"); 244 } 245 if ((RequiresFlags & OMP_REQ_UNIFIED_ADDRESS) != 246 (flags & OMP_REQ_UNIFIED_ADDRESS)) { 247 FATAL_MESSAGE0(1, 248 "'#pragma omp requires unified_address' not used consistently!"); 249 } 250 if ((RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) != 251 (flags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { 252 FATAL_MESSAGE0(1, 253 "'#pragma omp requires unified_shared_memory' not used consistently!"); 254 } 255 256 // TODO: insert any other missing checks 257 258 DP("New requires flags %ld compatible with existing %ld!\n", 259 flags, RequiresFlags); 260 } 261 262 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { 263 // Attempt to load all plugins available in the system. 264 std::call_once(initFlag, &RTLsTy::LoadRTLs, this); 265 266 RTLsMtx->lock(); 267 // Register the images with the RTLs that understand them, if any. 268 for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { 269 // Obtain the image. 270 __tgt_device_image *img = &desc->DeviceImages[i]; 271 272 RTLInfoTy *FoundRTL = NULL; 273 274 // Scan the RTLs that have associated images until we find one that supports 275 // the current image. 276 for (auto &R : AllRTLs) { 277 if (!R.is_valid_binary(img)) { 278 DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", 279 DPxPTR(img->ImageStart), R.RTLName.c_str()); 280 continue; 281 } 282 283 DP("Image " DPxMOD " is compatible with RTL %s!\n", 284 DPxPTR(img->ImageStart), R.RTLName.c_str()); 285 286 // If this RTL is not already in use, initialize it. 287 if (!R.isUsed) { 288 // Initialize the device information for the RTL we are about to use. 289 DeviceTy device(&R); 290 size_t start = Devices.size(); 291 Devices.resize(start + R.NumberOfDevices, device); 292 for (int32_t device_id = 0; device_id < R.NumberOfDevices; 293 device_id++) { 294 // global device ID 295 Devices[start + device_id].DeviceID = start + device_id; 296 // RTL local device ID 297 Devices[start + device_id].RTLDeviceID = device_id; 298 } 299 300 // Initialize the index of this RTL and save it in the used RTLs. 301 R.Idx = (UsedRTLs.empty()) 302 ? 0 303 : UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices; 304 assert((size_t) R.Idx == start && 305 "RTL index should equal the number of devices used so far."); 306 R.isUsed = true; 307 UsedRTLs.push_back(&R); 308 309 DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx); 310 } 311 312 // Initialize (if necessary) translation table for this library. 313 TrlTblMtx->lock(); 314 if(!HostEntriesBeginToTransTable->count(desc->HostEntriesBegin)){ 315 TranslationTable &tt = 316 (*HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; 317 tt.HostTable.EntriesBegin = desc->HostEntriesBegin; 318 tt.HostTable.EntriesEnd = desc->HostEntriesEnd; 319 } 320 321 // Retrieve translation table for this library. 322 TranslationTable &TransTable = 323 (*HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; 324 325 DP("Registering image " DPxMOD " with RTL %s!\n", 326 DPxPTR(img->ImageStart), R.RTLName.c_str()); 327 RegisterImageIntoTranslationTable(TransTable, R, img); 328 TrlTblMtx->unlock(); 329 FoundRTL = &R; 330 331 // Load ctors/dtors for static objects 332 RegisterGlobalCtorsDtorsForImage(desc, img, FoundRTL); 333 334 // if an RTL was found we are done - proceed to register the next image 335 break; 336 } 337 338 if (!FoundRTL) { 339 DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart)); 340 } 341 } 342 RTLsMtx->unlock(); 343 344 345 DP("Done registering entries!\n"); 346 } 347 348 void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) { 349 DP("Unloading target library!\n"); 350 351 RTLsMtx->lock(); 352 // Find which RTL understands each image, if any. 353 for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { 354 // Obtain the image. 355 __tgt_device_image *img = &desc->DeviceImages[i]; 356 357 RTLInfoTy *FoundRTL = NULL; 358 359 // Scan the RTLs that have associated images until we find one that supports 360 // the current image. We only need to scan RTLs that are already being used. 361 for (auto *R : UsedRTLs) { 362 363 assert(R->isUsed && "Expecting used RTLs."); 364 365 if (!R->is_valid_binary(img)) { 366 DP("Image " DPxMOD " is NOT compatible with RTL " DPxMOD "!\n", 367 DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); 368 continue; 369 } 370 371 DP("Image " DPxMOD " is compatible with RTL " DPxMOD "!\n", 372 DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); 373 374 FoundRTL = R; 375 376 // Execute dtors for static objects if the device has been used, i.e. 377 // if its PendingCtors list has been emptied. 378 for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) { 379 DeviceTy &Device = Devices[FoundRTL->Idx + i]; 380 Device.PendingGlobalsMtx.lock(); 381 if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { 382 for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { 383 int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1, 384 1, true /*team*/); 385 if (rc != OFFLOAD_SUCCESS) { 386 DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor)); 387 } 388 } 389 // Remove this library's entry from PendingCtorsDtors 390 Device.PendingCtorsDtors.erase(desc); 391 } 392 Device.PendingGlobalsMtx.unlock(); 393 } 394 395 DP("Unregistered image " DPxMOD " from RTL " DPxMOD "!\n", 396 DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); 397 398 break; 399 } 400 401 // if no RTL was found proceed to unregister the next image 402 if (!FoundRTL){ 403 DP("No RTLs in use support the image " DPxMOD "!\n", 404 DPxPTR(img->ImageStart)); 405 } 406 } 407 RTLsMtx->unlock(); 408 DP("Done unregistering images!\n"); 409 410 // Remove entries from HostPtrToTableMap 411 TblMapMtx->lock(); 412 for (__tgt_offload_entry *cur = desc->HostEntriesBegin; 413 cur < desc->HostEntriesEnd; ++cur) { 414 HostPtrToTableMap->erase(cur->addr); 415 } 416 417 // Remove translation table for this descriptor. 418 auto tt = HostEntriesBeginToTransTable->find(desc->HostEntriesBegin); 419 if (tt != HostEntriesBeginToTransTable->end()) { 420 DP("Removing translation table for descriptor " DPxMOD "\n", 421 DPxPTR(desc->HostEntriesBegin)); 422 HostEntriesBeginToTransTable->erase(tt); 423 } else { 424 DP("Translation table for descriptor " DPxMOD " cannot be found, probably " 425 "it has been already removed.\n", DPxPTR(desc->HostEntriesBegin)); 426 } 427 428 TblMapMtx->unlock(); 429 430 // TODO: Remove RTL and the devices it manages if it's not used anymore? 431 // TODO: Write some RTL->unload_image(...) function? 432 433 DP("Done unregistering library!\n"); 434 } 435