1 // RUN: %libomp-cxx-compile-and-run 2 3 #include <stdio.h> 4 #include <omp.h> 5 6 #define NT 4 7 #define INIT 10 8 9 /* 10 The test emulates code generation needed for reduction with task modifier on 11 parallel construct. 12 13 Note: tasks could just use in_reduction clause, but compiler does not accept 14 this because of bug: it mistakenly requires reduction item to be shared, which 15 is only true for reduction on worksharing and wrong for task reductions. 16 */ 17 18 //------------------------------------------------ 19 // OpenMP runtime library routines 20 #ifdef __cplusplus 21 extern "C" { 22 #endif 23 extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item); 24 // extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int 25 // flags, int num, void* data); 26 extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws, 27 int num, void *data); 28 extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws); 29 extern int __kmpc_global_thread_num(void *); 30 #ifdef __cplusplus 31 } 32 #endif 33 34 //------------------------------------------------ 35 // Compiler-generated code 36 37 typedef struct red_input { 38 void *reduce_shar; /**< shared between tasks item to reduce into */ 39 void *reduce_orig; /**< original reduction item used for initialization */ 40 size_t reduce_size; /**< size of data item in bytes */ 41 // three compiler-generated routines (init, fini are optional): 42 void *reduce_init; /**< data initialization routine (single parameter) */ 43 void *reduce_fini; /**< data finalization routine */ 44 void *reduce_comb; /**< data combiner routine */ 45 unsigned flags; /**< flags for additional info from compiler */ 46 } red_input_t; 47 48 void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; } 49 50 int main() { 51 int var = INIT; 52 int *p_var_orig = &var; 53 int i; 54 omp_set_dynamic(0); 55 omp_set_num_threads(NT); 56 #pragma omp parallel private(i) shared(p_var_orig) 57 // #pragma omp for reduction(task,+:var) 58 #pragma omp for reduction(+ : var) 59 for (i = 0; i < NT; ++i) // single iteration per thread 60 { 61 // generated code, which actually should be placed before 62 // loop iterations distribution, but placed here just to show the idea, 63 // and to keep correctness the loop count is equal to number of threads 64 int gtid = __kmpc_global_thread_num(NULL); 65 void *tg; // pointer to taskgroup (optional) 66 red_input_t r_var; 67 r_var.reduce_shar = &var; 68 r_var.reduce_orig = 69 p_var_orig; // not used in this test but illustrates codegen 70 r_var.reduce_size = sizeof(var); 71 r_var.reduce_init = NULL; 72 r_var.reduce_fini = NULL; 73 r_var.reduce_comb = (void *)&i_comb; 74 tg = __kmpc_taskred_modifier_init( 75 NULL, // ident_t loc; 76 gtid, 77 1, // 1 - worksharing construct, 0 - parallel 78 1, // number of reduction objects 79 &r_var // related data 80 ); 81 // end of generated code 82 var++; 83 #pragma omp task /*in_reduction(+:var)*/ shared(var) 84 { 85 // emulate task reduction here because of compiler bug: 86 // it mistakenly declines to accept in_reduction because var is private 87 // outside. 88 int gtid = __kmpc_global_thread_num(NULL); 89 int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); 90 *p_var += 1; 91 } 92 if (omp_get_thread_num() > 0) { 93 #pragma omp task /*in_reduction(+:var)*/ shared(var) 94 { 95 int gtid = __kmpc_global_thread_num(NULL); 96 int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var); 97 *p_var += 1; 98 } 99 } 100 // generated code, which actually should be placed after loop completion 101 // but before barrier and before loop reduction. It placed here just to show 102 // the idea, 103 // and to keep correctness the loop count is equal to number of threads 104 __kmpc_task_reduction_modifier_fini(NULL, gtid, 1); 105 // end of generated code 106 } 107 if (var == INIT + NT * 3 - 1) { 108 printf("passed\n"); 109 return 0; 110 } else { 111 printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1); 112 return 1; 113 } 114 } 115