diff options
author | Joseph Huber <jhuber6@vols.utk.edu> | 2022-09-15 18:28:52 -0500 |
---|---|---|
committer | Tobias Hieta <tobias@hieta.se> | 2022-09-19 08:43:01 +0200 |
commit | 046d5b917bcdb415e099805cadaca121c0caad49 (patch) | |
tree | 12468947273aa0a926927f8f50dfedd2331b4358 | |
parent | [llvm-objdump] Change printSymbolVersionDependency to use ELFFile API (diff) | |
download | llvm-project-046d5b917bcdb415e099805cadaca121c0caad49.tar.gz llvm-project-046d5b917bcdb415e099805cadaca121c0caad49.tar.bz2 llvm-project-046d5b917bcdb415e099805cadaca121c0caad49.zip |
[Libomptarget] Revert changes to AMDGPU plugin destructors
These patches exposed a lot of problems in the AMD toolchain. Rather
than keep it broken we should revert it to its old semi-functional
state. This will prevent us from using device destructors but should
remove some new bugs. In the future this interface should be changed
once these problems are addressed more correctly.
This reverts commit ed0f21811544320f829124efbb6a38ee12eb9155.
This reverts commit 2b7203a35972e98b8521f92d2791043dc539ae88.
Fixes #57536
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D133997
-rw-r--r-- | openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 55 | ||||
-rw-r--r-- | openmp/libomptarget/src/rtl.cpp | 24 |
2 files changed, 29 insertions, 50 deletions
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 6b95756f1d30..6a6fbdc7c638 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -210,6 +210,9 @@ private: }; pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER; +std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>> + KernelArgPoolMap; + /// Use a single entity to encode a kernel and a set of flags struct KernelTy { llvm::omp::OMPTgtExecModeFlags ExecutionMode; @@ -221,9 +224,7 @@ struct KernelTy { KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize, int32_t DeviceId, void *CallStackAddr, const char *Name, uint32_t KernargSegmentSize, - hsa_amd_memory_pool_t &KernArgMemoryPool, - std::unordered_map<std::string, std::unique_ptr<KernelArgPool>> - &KernelArgPoolMap) + hsa_amd_memory_pool_t &KernArgMemoryPool) : ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize), DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) { DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode); @@ -237,6 +238,10 @@ struct KernelTy { } }; +/// List that contains all the kernels. +/// FIXME: we may need this to be per device and per library. +std::list<KernelTy> KernelsList; + template <typename Callback> static hsa_status_t findAgents(Callback CB) { hsa_status_t Err = @@ -451,12 +456,6 @@ public: int NumberOfDevices = 0; - /// List that contains all the kernels. - /// FIXME: we may need this to be per device and per library. - std::list<KernelTy> KernelsList; - std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>> - KernelArgPoolMap; - // GPU devices std::vector<hsa_agent_t> HSAAgents; std::vector<HSAQueueScheduler> HSAQueueSchedulers; // one per gpu @@ -858,6 +857,7 @@ public: "Unexpected device id!"); FuncGblEntries[DeviceId].emplace_back(); FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); + // KernelArgPoolMap.clear(); E.Entries.clear(); E.Table.EntriesBegin = E.Table.EntriesEnd = 0; } @@ -1113,8 +1113,10 @@ public: pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER; -static RTLDeviceInfoTy *DeviceInfoState = nullptr; -static RTLDeviceInfoTy &DeviceInfo() { return *DeviceInfoState; } +// Putting accesses to DeviceInfo global behind a function call prior +// to changing to use init_plugin/deinit_plugin calls +static RTLDeviceInfoTy DeviceInfoState; +static RTLDeviceInfoTy &DeviceInfo() { return DeviceInfoState; } namespace { @@ -1455,9 +1457,8 @@ int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, KernelArgPool *ArgPool = nullptr; void *KernArg = nullptr; { - auto It = - DeviceInfo().KernelArgPoolMap.find(std::string(KernelInfo->Name)); - if (It != DeviceInfo().KernelArgPoolMap.end()) { + auto It = KernelArgPoolMap.find(std::string(KernelInfo->Name)); + if (It != KernelArgPoolMap.end()) { ArgPool = (It->second).get(); } } @@ -2019,20 +2020,6 @@ bool IsImageCompatibleWithEnv(const char *ImgInfo, std::string EnvInfo) { } extern "C" { - -int32_t __tgt_rtl_init_plugin() { - DeviceInfoState = new RTLDeviceInfoTy; - return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded) - ? OFFLOAD_SUCCESS - : OFFLOAD_FAIL; -} - -int32_t __tgt_rtl_deinit_plugin() { - if (DeviceInfoState) - delete DeviceInfoState; - return OFFLOAD_SUCCESS; -} - int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { return elfMachineIdIsAmdgcn(Image); } @@ -2064,6 +2051,9 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image, return true; } +int32_t __tgt_rtl_init_plugin() { return OFFLOAD_SUCCESS; } +int32_t __tgt_rtl_deinit_plugin() { return OFFLOAD_SUCCESS; } + int __tgt_rtl_number_of_devices() { // If the construction failed, no methods are safe to call if (DeviceInfo().ConstructionSucceeded) { @@ -2600,12 +2590,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId, } check("Loading computation property", Err); - DeviceInfo().KernelsList.push_back( - KernelTy(ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name, - KernargSegmentSize, DeviceInfo().KernArgPool, - DeviceInfo().KernelArgPoolMap)); + KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, DeviceId, + CallStackAddr, E->name, KernargSegmentSize, + DeviceInfo().KernArgPool)); __tgt_offload_entry Entry = *E; - Entry.addr = (void *)&DeviceInfo().KernelsList.back(); + Entry.addr = (void *)&KernelsList.back(); DeviceInfo().addOffloadEntry(DeviceId, Entry); DP("Entry point %ld maps to %s\n", E - HostBegin, E->name); } diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp index aef6fc6ec82e..9acf5f3e19c3 100644 --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -67,23 +67,6 @@ __attribute__((constructor(101))) void init() { __attribute__((destructor(101))) void deinit() { DP("Deinit target library!\n"); - - for (auto *R : PM->RTLs.UsedRTLs) { - // Plugins can either destroy their local state using global variables - // or attribute(destructor) functions or by implementing deinit_plugin - // The hazard with plugin local destructors is they may be called before - // or after this destructor. If the plugin is destroyed using global - // state before this library finishes calling into it the plugin is - // likely to crash. If good fortune means the plugin outlives this - // library then there may be no crash. - // Using deinit_plugin and no global destructors from the plugin works. - if (R->deinit_plugin) { - if (R->deinit_plugin() != OFFLOAD_SUCCESS) { - DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str()); - } - } - } - delete PM; #ifdef OMPTARGET_PROFILE_ENABLED @@ -584,6 +567,13 @@ void RTLsTy::unregisterLib(__tgt_bin_desc *Desc) { PM->TblMapMtx.unlock(); // TODO: Write some RTL->unload_image(...) function? + for (auto *R : UsedRTLs) { + if (R->deinit_plugin) { + if (R->deinit_plugin() != OFFLOAD_SUCCESS) { + DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str()); + } + } + } DP("Done unregistering library!\n"); } |