1 // RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG 2 // RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG 3 // RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG 4 // RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG 5 // RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=DEBUG 6 // REQUIRES: libomptarget-debug 7 8 #include <cstdio> 9 #include <cstdlib> 10 #include <cassert> 11 12 // Data structure definitions copied from OpenMP RTL. 13 struct __tgt_target_non_contig { 14 int64_t offset; 15 int64_t width; 16 int64_t stride; 17 }; 18 19 enum tgt_map_type { 20 OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000 21 }; 22 23 // OpenMP RTL interfaces 24 #ifdef __cplusplus 25 extern "C" { 26 #endif 27 void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 28 void **args_base, void **args, int64_t *arg_sizes, 29 int64_t *arg_types); 30 #ifdef __cplusplus 31 } 32 #endif 33 34 int main() { 35 // case 1 36 // int arr[3][4][5][6]; 37 // #pragma omp target update to(arr[0:2][1:3][1:2][:]) 38 // set up descriptor 39 __tgt_target_non_contig non_contig[5] = { 40 {0, 2, 480}, {1, 3, 120}, {1, 2, 24}, {0, 6, 4}, {0, 1, 4}}; 41 int64_t size = 4, type = OMP_TGT_MAPTYPE_NON_CONTIG; 42 43 void *base; 44 void *begin = &non_contig; 45 int64_t *sizes = &size; 46 int64_t *types = &type; 47 48 // The below diagram is the visualization of the non-contiguous transfer after 49 // optimization. Note that each element represent the innermost dimension 50 // (unit size = 24) since the stride * count of last dimension is equal to the 51 // stride of second last dimension. 52 // 53 // OOOOO OOOOO OOOOO 54 // OXXOO OXXOO OOOOO 55 // OXXOO OXXOO OOOOO 56 // OXXOO OXXOO OOOOO 57 __tgt_target_data_update(/*device_id*/ -1, /*arg_num*/ 1, &base, &begin, 58 sizes, types); 59 // DEBUG: offset 144 60 // DEBUG: offset 264 61 // DEBUG: offset 384 62 // DEBUG: offset 624 63 // DEBUG: offset 744 64 // DEBUG: offset 864 65 66 67 // case 2 68 // double darr[3][4][5]; 69 // #pragma omp target update to(darr[0:2:2][2:2][:2:2]) 70 // set up descriptor 71 __tgt_target_non_contig non_contig_2[4] = { 72 {0, 2, 320}, {2, 2, 40}, {0, 2, 16}, {0, 1, 8}}; 73 int64_t size_2 = 4, type_2 = OMP_TGT_MAPTYPE_NON_CONTIG; 74 75 void *base_2; 76 void *begin_2 = &non_contig_2; 77 int64_t *sizes_2 = &size_2; 78 int64_t *types_2 = &type_2; 79 80 // The below diagram is the visualization of the non-contiguous transfer after 81 // optimization. Note that each element represent the innermost dimension 82 // (unit size = 24) since the stride * count of last dimension is equal to the 83 // stride of second last dimension. 84 // 85 // OOOOO OOOOO OOOOO 86 // OOOOO OOOOO OOOOO 87 // XOXOO OOOOO XOXOO 88 // XOXOO OOOOO XOXOO 89 __tgt_target_data_update(/*device_id*/ -1, /*arg_num*/ 1, &base_2, &begin_2, 90 sizes_2, types_2); 91 // DEBUG: offset 80 92 // DEBUG: offset 96 93 // DEBUG: offset 120 94 // DEBUG: offset 136 95 // DEBUG: offset 400 96 // DEBUG: offset 416 97 // DEBUG: offset 440 98 // DEBUG: offset 456 99 return 0; 100 } 101 102