1// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B32 2// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64 3 4typedef void (^bl_t)(local void *); 5 6// N.B. The check here only exists to set BL_GLOBAL 7// COMMON: @block_G = {{.*}}bitcast ([[BL_GLOBAL:[^@]+@__block_literal_global(\.[0-9]+)?]] 8const bl_t block_G = (bl_t) ^ (local void *a) {}; 9 10kernel void device_side_enqueue(global int *a, global int *b, int i) { 11 // COMMON: %default_queue = alloca %opencl.queue_t* 12 queue_t default_queue; 13 // COMMON: %flags = alloca i32 14 unsigned flags = 0; 15 // COMMON: %ndrange = alloca %opencl.ndrange_t* 16 ndrange_t ndrange; 17 // COMMON: %clk_event = alloca %opencl.clk_event_t* 18 clk_event_t clk_event; 19 // COMMON: %event_wait_list = alloca %opencl.clk_event_t* 20 clk_event_t event_wait_list; 21 // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*] 22 clk_event_t event_wait_list2[] = {clk_event}; 23 24 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue 25 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags 26 // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange 27 // COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(2)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block to void ()* 28 // COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8* 29 // COMMON: call i32 @__enqueue_kernel_basic(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* [[BL_I8]]) 30 enqueue_kernel(default_queue, flags, ndrange, 31 ^(void) { 32 a[i] = b[i]; 33 }); 34 35 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue 36 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags 37 // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange 38 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* 39 // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* 40 // COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(2)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* 41 // COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8* 42 // COMMON: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* [[BL_I8]]) 43 enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event, 44 ^(void) { 45 a[i] = b[i]; 46 }); 47 48 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue 49 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags 50 // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange 51 // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256) 52 // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 256) 53 enqueue_kernel(default_queue, flags, ndrange, 54 ^(local void *p) { 55 return; 56 }, 57 256); 58 char c; 59 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue 60 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags 61 // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange 62 // B32: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32 63 // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) 64 // B64: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i64 65 // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]]) 66 enqueue_kernel(default_queue, flags, ndrange, 67 ^(local void *p) { 68 return; 69 }, 70 c); 71 72 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue 73 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags 74 // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange 75 // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 76 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)* 77 // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* 78 // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256) 79 // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 256) 80 enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, 81 ^(local void *p) { 82 return; 83 }, 84 256); 85 86 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue 87 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags 88 // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange 89 // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 90 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)* 91 // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* 92 // B32: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32 93 // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) 94 // B64: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i64 95 // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]]) 96 enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, 97 ^(local void *p) { 98 return; 99 }, 100 c); 101 102 long l; 103 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue 104 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags 105 // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange 106 // B32: [[SIZE:%[0-9]+]] = trunc i64 {{%[0-9]+}} to i32 107 // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) 108 // B64: [[SIZE:%[0-9]+]] = load i64, i64* %l 109 // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]]) 110 enqueue_kernel(default_queue, flags, ndrange, 111 ^(local void *p) { 112 return; 113 }, 114 l); 115 116 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue 117 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags 118 // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange 119 // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 0) 120 // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 4294967296) 121 enqueue_kernel(default_queue, flags, ndrange, 122 ^(local void *p) { 123 return; 124 }, 125 4294967296L); 126 127 // The full type of these expressions are long (and repeated elsewhere), so we 128 // capture it as part of the regex for convenience and clarity. 129 // COMMON: store void ()* bitcast ([[BL_A:[^@]+@__block_literal_global.[0-9]+]] to void ()*), void ()** %block_A 130 void (^const block_A)(void) = ^{ 131 return; 132 }; 133 134 // COMMON: store void (i8 addrspace(3)*)* bitcast ([[BL_B:[^@]+@__block_literal_global.[0-9]+]] to void (i8 addrspace(3)*)*), void (i8 addrspace(3)*)** %block_B 135 void (^const block_B)(local void *) = ^(local void *a) { 136 return; 137 }; 138 139 // COMMON: call i32 @__get_kernel_work_group_size_impl(i8* bitcast ([[BL_A]] to i8*)) 140 unsigned size = get_kernel_work_group_size(block_A); 141 // COMMON: call i32 @__get_kernel_work_group_size_impl(i8* bitcast ([[BL_B]] to i8*)) 142 size = get_kernel_work_group_size(block_B); 143 // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* bitcast ([[BL_A]] to i8*)) 144 size = get_kernel_preferred_work_group_size_multiple(block_A); 145 // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* bitcast ([[BL_GLOBAL]] to i8*)) 146 size = get_kernel_preferred_work_group_size_multiple(block_G); 147} 148