1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck %s 3 4; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly 5; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but 6; the pass should handle it gracefully if it is 7; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt 8; should now leave these unchanged 9 10%Block = type { [1 x float], i32 } 11%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] } 12%struct = type { i32, i32 } 13 14@block = external addrspace(1) global %Block 15@pv = external addrspace(1) global %gl_PerVertex 16 17define amdgpu_vs void @promote_1d_aggr() #0 { 18; CHECK-LABEL: @promote_1d_aggr( 19; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 20; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4 21; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 1 22; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4 23; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4 24; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK]], [[BLOCK]] addrspace(1)* @block, i32 0, i32 0 25; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], [1 x float] addrspace(1)* [[FOO2]], align 4 26; CHECK-NEXT: store [1 x float] [[FOO3]], [1 x float]* [[F1]], align 4 27; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32* [[I]], align 4 28; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], [1 x float]* [[F1]], i32 0, i32 [[FOO4]] 29; CHECK-NEXT: [[FOO6:%.*]] = load float, float* [[FOO5]], align 4 30; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16 31; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float>* [[FOO7]], align 16 32; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[FOO6]], i32 0 33; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1 34; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2 35; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[FOO6]], i32 3 36; CHECK-NEXT: [[FOO13:%.*]] = getelementptr [[GL_PERVERTEX:%.*]], [[GL_PERVERTEX]] addrspace(1)* @pv, i32 0, i32 0 37; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16 38; CHECK-NEXT: ret void 39; 40 %i = alloca i32 41 %f1 = alloca [1 x float] 42 %foo = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1 43 %foo1 = load i32, i32 addrspace(1)* %foo 44 store i32 %foo1, i32* %i 45 %foo2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0 46 %foo3 = load [1 x float], [1 x float] addrspace(1)* %foo2 47 store [1 x float] %foo3, [1 x float]* %f1 48 %foo4 = load i32, i32* %i 49 %foo5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %foo4 50 %foo6 = load float, float* %foo5 51 %foo7 = alloca <4 x float> 52 %foo8 = load <4 x float>, <4 x float>* %foo7 53 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0 54 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1 55 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2 56 %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3 57 %foo13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 58 store <4 x float> %foo12, <4 x float> addrspace(1)* %foo13 59 ret void 60} 61 62%Block2 = type { i32, [2 x float] } 63@block2 = external addrspace(1) global %Block2 64 65define amdgpu_vs void @promote_store_aggr() #0 { 66; CHECK-LABEL: @promote_store_aggr( 67; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 68; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4 69; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK2:%.*]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 0 70; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4 71; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4 72; CHECK-NEXT: [[FOO2:%.*]] = load i32, i32* [[I]], align 4 73; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO2]] to float 74; CHECK-NEXT: [[FOO4:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 0 75; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>* 76; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 8 77; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[FOO3]], i32 0 78; CHECK-NEXT: store <2 x float> [[TMP3]], <2 x float>* [[TMP1]], align 8 79; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 1 80; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>* 81; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 8 82; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float 2.000000e+00, i64 1 83; CHECK-NEXT: store <2 x float> [[TMP6]], <2 x float>* [[TMP4]], align 8 84; CHECK-NEXT: [[FOO6:%.*]] = load [2 x float], [2 x float]* [[F1]], align 4 85; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2]], [[BLOCK2]] addrspace(1)* @block2, i32 0, i32 1 86; CHECK-NEXT: store [2 x float] [[FOO6]], [2 x float] addrspace(1)* [[FOO7]], align 4 87; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [[GL_PERVERTEX:%.*]], [[GL_PERVERTEX]] addrspace(1)* @pv, i32 0, i32 0 88; CHECK-NEXT: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* [[FOO8]], align 16 89; CHECK-NEXT: ret void 90; 91 %i = alloca i32 92 %f1 = alloca [2 x float] 93 %foo = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0 94 %foo1 = load i32, i32 addrspace(1)* %foo 95 store i32 %foo1, i32* %i 96 %foo2 = load i32, i32* %i 97 %foo3 = sitofp i32 %foo2 to float 98 %foo4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0 99 store float %foo3, float* %foo4 100 %foo5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1 101 store float 2.000000e+00, float* %foo5 102 %foo6 = load [2 x float], [2 x float]* %f1 103 %foo7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1 104 store [2 x float] %foo6, [2 x float] addrspace(1)* %foo7 105 %foo8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 106 store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* %foo8 107 ret void 108} 109 110%Block3 = type { [2 x float], i32 } 111@block3 = external addrspace(1) global %Block3 112 113define amdgpu_vs void @promote_load_from_store_aggr() #0 { 114; CHECK-LABEL: @promote_load_from_store_aggr( 115; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 116; CHECK-NEXT: [[F1:%.*]] = alloca [2 x float], align 4 117; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 1 118; CHECK-NEXT: [[FOO1:%.*]] = load i32, i32 addrspace(1)* [[FOO]], align 4 119; CHECK-NEXT: store i32 [[FOO1]], i32* [[I]], align 4 120; CHECK-NEXT: [[FOO2:%.*]] = getelementptr [[BLOCK3]], [[BLOCK3]] addrspace(1)* @block3, i32 0, i32 0 121; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], [2 x float] addrspace(1)* [[FOO2]], align 4 122; CHECK-NEXT: store [2 x float] [[FOO3]], [2 x float]* [[F1]], align 4 123; CHECK-NEXT: [[FOO4:%.*]] = load i32, i32* [[I]], align 4 124; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [2 x float], [2 x float]* [[F1]], i32 0, i32 [[FOO4]] 125; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x float]* [[F1]] to <2 x float>* 126; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 8 127; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO4]] 128; CHECK-NEXT: [[FOO7:%.*]] = alloca <4 x float>, align 16 129; CHECK-NEXT: [[FOO8:%.*]] = load <4 x float>, <4 x float>* [[FOO7]], align 16 130; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> [[FOO8]], float [[TMP3]], i32 0 131; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1 132; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2 133; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[TMP3]], i32 3 134; CHECK-NEXT: [[FOO13:%.*]] = getelementptr [[GL_PERVERTEX:%.*]], [[GL_PERVERTEX]] addrspace(1)* @pv, i32 0, i32 0 135; CHECK-NEXT: store <4 x float> [[FOO12]], <4 x float> addrspace(1)* [[FOO13]], align 16 136; CHECK-NEXT: ret void 137; 138 %i = alloca i32 139 %f1 = alloca [2 x float] 140 %foo = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1 141 %foo1 = load i32, i32 addrspace(1)* %foo 142 store i32 %foo1, i32* %i 143 %foo2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0 144 %foo3 = load [2 x float], [2 x float] addrspace(1)* %foo2 145 store [2 x float] %foo3, [2 x float]* %f1 146 %foo4 = load i32, i32* %i 147 %foo5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %foo4 148 %foo6 = load float, float* %foo5 149 %foo7 = alloca <4 x float> 150 %foo8 = load <4 x float>, <4 x float>* %foo7 151 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0 152 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1 153 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2 154 %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3 155 %foo13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 156 store <4 x float> %foo12, <4 x float> addrspace(1)* %foo13 157 ret void 158} 159 160@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> } 161@frag_color = external addrspace(1) global <4 x float> 162 163define amdgpu_ps void @promote_double_aggr() #0 { 164; CHECK-LABEL: @promote_double_aggr( 165; CHECK-NEXT: [[S:%.*]] = alloca [2 x double], align 8 166; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0 167; CHECK-NEXT: [[FOO1:%.*]] = load double, double addrspace(1)* [[FOO]], align 8 168; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1 169; CHECK-NEXT: [[FOO3:%.*]] = load double, double addrspace(1)* [[FOO2]], align 8 170; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0 171; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1 172; CHECK-NEXT: store [2 x double] [[FOO5]], [2 x double]* [[S]], align 8 173; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1 174; CHECK-NEXT: [[TMP1:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* 175; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 16 176; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i64 1 177; CHECK-NEXT: [[FOO8:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1 178; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* 179; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 16 180; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 181; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[TMP3]], [[TMP6]] 182; CHECK-NEXT: [[FOO11:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 0 183; CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* 184; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[TMP7]], align 16 185; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[FOO10]], i32 0 186; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP7]], align 16 187; CHECK-NEXT: [[FOO12:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 0 188; CHECK-NEXT: [[TMP10:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* 189; CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[TMP10]], align 16 190; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0 191; CHECK-NEXT: [[FOO14:%.*]] = getelementptr [2 x double], [2 x double]* [[S]], i32 0, i32 1 192; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x double]* [[S]] to <2 x double>* 193; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[TMP13]], align 16 194; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP14]], i64 1 195; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[TMP12]], [[TMP15]] 196; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float 197; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> undef, float [[FOO17]], i32 0 198; CHECK-NEXT: [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1 199; CHECK-NEXT: [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2 200; CHECK-NEXT: [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3 201; CHECK-NEXT: store <4 x float> [[FOO21]], <4 x float> addrspace(1)* @frag_color, align 16 202; CHECK-NEXT: ret void 203; 204 %s = alloca [2 x double] 205 %foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0 206 %foo1 = load double, double addrspace(1)* %foo 207 %foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1 208 %foo3 = load double, double addrspace(1)* %foo2 209 %foo4 = insertvalue [2 x double] undef, double %foo1, 0 210 %foo5 = insertvalue [2 x double] %foo4, double %foo3, 1 211 store [2 x double] %foo5, [2 x double]* %s 212 %foo6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 213 %foo7 = load double, double* %foo6 214 %foo8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 215 %foo9 = load double, double* %foo8 216 %foo10 = fadd double %foo7, %foo9 217 %foo11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 218 store double %foo10, double* %foo11 219 %foo12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 220 %foo13 = load double, double* %foo12 221 %foo14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 222 %foo15 = load double, double* %foo14 223 %foo16 = fadd double %foo13, %foo15 224 %foo17 = fptrunc double %foo16 to float 225 %foo18 = insertelement <4 x float> undef, float %foo17, i32 0 226 %foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1 227 %foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2 228 %foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3 229 store <4 x float> %foo21, <4 x float> addrspace(1)* @frag_color 230 ret void 231} 232 233; Don't crash on a type that isn't a valid vector element. 234define amdgpu_kernel void @alloca_struct() #0 { 235; CHECK-LABEL: @alloca_struct( 236; CHECK-NEXT: entry: 237; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 238; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[TMP0]] to i32 addrspace(4)* 239; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[TMP1]], i64 1 240; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[TMP2]], align 4, !invariant.load !0 241; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[TMP1]], i64 2 242; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(4)* [[TMP4]], align 4, !range [[RNG1:![0-9]+]], !invariant.load !0 243; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16 244; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2:![0-9]+]] 245; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]] 246; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]] 247; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]] 248; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]] 249; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]] 250; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]] 251; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]] 252; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x [2 x %struct]], [1024 x [2 x %struct]] addrspace(3)* @alloca_struct.alloca, i32 0, i32 [[TMP14]] 253; CHECK-NEXT: ret void 254; 255entry: 256 %alloca = alloca [2 x %struct], align 4 257 ret void 258} 259