1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s 3; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s 4 5declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 6 7@lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4 8@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 9 10@global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4 11@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 12 13;. 14; HSA: @[[LDS_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global i32 undef, align 4 15; HSA: @[[LDS_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 16; HSA: @[[GLOBAL_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global i32 undef, align 4 17; HSA: @[[GLOBAL_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 18;. 19define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 { 20; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast 21; HSA-SAME: () #[[ATTR1:[0-9]+]] { 22; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4 23; HSA-NEXT: ret void 24; 25 store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) 26 ret void 27} 28 29define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 { 30; AKF_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast 31; AKF_HSA-SAME: () #[[ATTR1]] { 32; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4 33; AKF_HSA-NEXT: ret void 34; 35; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast 36; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] { 37; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4 38; ATTRIBUTOR_HSA-NEXT: ret void 39; 40 store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*) 41 ret void 42} 43 44define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 { 45; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat 46; AKF_HSA-SAME: () #[[ATTR1]] { 47; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4 48; AKF_HSA-NEXT: ret void 49; 50; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat 51; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { 52; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4 53; ATTRIBUTOR_HSA-NEXT: ret void 54; 55 store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*) 56 ret void 57} 58 59define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 { 60; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat 61; AKF_HSA-SAME: () #[[ATTR1]] { 62; AKF_HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 63; AKF_HSA-NEXT: ret void 64; 65; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat 66; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { 67; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 68; ATTRIBUTOR_HSA-NEXT: ret void 69; 70 store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) 71 ret void 72} 73 74define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 { 75; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat 76; HSA-SAME: () #[[ATTR1]] { 77; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4 78; HSA-NEXT: ret void 79; 80 store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*) 81 ret void 82} 83 84define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 { 85; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat 86; HSA-SAME: () #[[ATTR1]] { 87; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 88; HSA-NEXT: ret void 89; 90 store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) 91 ret void 92} 93 94define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { 95; AKF_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat 96; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { 97; AKF_HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 98; AKF_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 99; AKF_HSA-NEXT: ret void 100; 101; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat 102; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 103; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 104; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 105; ATTRIBUTOR_HSA-NEXT: ret void 106; 107 %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) 108 store i32 %val, i32 addrspace(1)* %out 109 ret void 110} 111 112define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { 113; AKF_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat 114; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { 115; AKF_HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4 116; AKF_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 117; AKF_HSA-NEXT: ret void 118; 119; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat 120; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 121; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4 122; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 123; ATTRIBUTOR_HSA-NEXT: ret void 124; 125 %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst 126 store i32 %val, i32 addrspace(1)* %out 127 ret void 128} 129 130define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { 131; AKF_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat 132; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { 133; AKF_HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4 134; AKF_HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0 135; AKF_HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4 136; AKF_HSA-NEXT: ret void 137; 138; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat 139; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 140; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4 141; ATTRIBUTOR_HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0 142; ATTRIBUTOR_HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4 143; ATTRIBUTOR_HSA-NEXT: ret void 144; 145 %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst 146 %val0 = extractvalue { i32, i1 } %val, 0 147 store i32 %val0, i32 addrspace(1)* %out 148 ret void 149} 150 151define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { 152; AKF_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat 153; AKF_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { 154; AKF_HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) 155; AKF_HSA-NEXT: ret void 156; 157; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat 158; ATTRIBUTOR_HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 159; ATTRIBUTOR_HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) 160; ATTRIBUTOR_HSA-NEXT: ret void 161; 162 call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) 163 ret void 164} 165 166; Can't just search the pointer value 167define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 { 168; AKF_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat 169; AKF_HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { 170; AKF_HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8 171; AKF_HSA-NEXT: ret void 172; 173; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat 174; ATTRIBUTOR_HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 175; ATTRIBUTOR_HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8 176; ATTRIBUTOR_HSA-NEXT: ret void 177; 178 store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out 179 ret void 180} 181 182; Can't just search pointer types 183define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 { 184; AKF_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat 185; AKF_HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR1]] { 186; AKF_HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4 187; AKF_HSA-NEXT: ret void 188; 189; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat 190; ATTRIBUTOR_HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 191; ATTRIBUTOR_HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4 192; ATTRIBUTOR_HSA-NEXT: ret void 193; 194 store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out 195 ret void 196} 197 198; Cast group to flat, do GEP, cast back to group 199define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 { 200; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group 201; AKF_HSA-SAME: () #[[ATTR1]] { 202; AKF_HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4 203; AKF_HSA-NEXT: ret void 204; 205; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group 206; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { 207; ATTRIBUTOR_HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4 208; ATTRIBUTOR_HSA-NEXT: ret void 209; 210 store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) 211 ret void 212} 213 214define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { 215; AKF_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group 216; AKF_HSA-SAME: () #[[ATTR1]] { 217; AKF_HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) 218; 219; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group 220; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { 221; ATTRIBUTOR_HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) 222; 223 ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) 224} 225 226attributes #0 = { argmemonly nounwind } 227attributes #1 = { nounwind } 228;. 229; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } 230; AKF_HSA: attributes #[[ATTR1]] = { nounwind } 231;. 232; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } 233; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 234; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 235;. 236