1; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s 4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s 5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s 6; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s 7 8; OPT-LABEL: @vector_read( 9; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index 10; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 11 12; FUNC-LABEL: {{^}}vector_read: 13; EG: MOV 14; EG: MOV 15; EG: MOV 16; EG: MOV 17; EG: MOVA_INT 18define void @vector_read(i32 addrspace(1)* %out, i32 %index) { 19entry: 20 %tmp = alloca [4 x i32] 21 %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 22 %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 23 %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 24 %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 25 store i32 0, i32* %x 26 store i32 1, i32* %y 27 store i32 2, i32* %z 28 store i32 3, i32* %w 29 %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index 30 %tmp2 = load i32, i32* %tmp1 31 store i32 %tmp2, i32 addrspace(1)* %out 32 ret void 33} 34 35; OPT-LABEL: @vector_write( 36; OPT: %0 = insertelement <4 x i32> zeroinitializer, i32 1, i32 %w_index 37; OPT: %1 = extractelement <4 x i32> %0, i32 %r_index 38; OPT: store i32 %1, i32 addrspace(1)* %out, align 4 39 40; FUNC-LABEL: {{^}}vector_write: 41; EG: MOV 42; EG: MOV 43; EG: MOV 44; EG: MOV 45; EG: MOVA_INT 46; EG: MOVA_INT 47define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { 48entry: 49 %tmp = alloca [4 x i32] 50 %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 51 %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 52 %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 53 %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 54 store i32 0, i32* %x 55 store i32 0, i32* %y 56 store i32 0, i32* %z 57 store i32 0, i32* %w 58 %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %w_index 59 store i32 1, i32* %tmp1 60 %tmp2 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %r_index 61 %tmp3 = load i32, i32* %tmp2 62 store i32 %tmp3, i32 addrspace(1)* %out 63 ret void 64} 65 66; This test should be optimize to: 67; store i32 0, i32 addrspace(1)* %out 68 69; OPT-LABEL: @bitcast_gep( 70; OPT-LABEL: store i32 0, i32 addrspace(1)* %out, align 4 71 72; FUNC-LABEL: {{^}}bitcast_gep: 73; EG: STORE_RAW 74define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { 75entry: 76 %tmp = alloca [4 x i32] 77 %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 78 %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 79 %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 80 %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 81 store i32 0, i32* %x 82 store i32 0, i32* %y 83 store i32 0, i32* %z 84 store i32 0, i32* %w 85 %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 86 %tmp2 = bitcast i32* %tmp1 to [4 x i32]* 87 %tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp2, i32 0, i32 0 88 %tmp4 = load i32, i32* %tmp3 89 store i32 %tmp4, i32 addrspace(1)* %out 90 ret void 91} 92 93; OPT-LABEL: @vector_read_bitcast_gep( 94; OPT: %0 = extractelement <4 x i32> <i32 1065353216, i32 1, i32 2, i32 3>, i32 %index 95; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 96define void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) { 97entry: 98 %tmp = alloca [4 x i32] 99 %x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 100 %y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 101 %z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 102 %w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 103 %bc = bitcast i32* %x to float* 104 store float 1.0, float* %bc 105 store i32 1, i32* %y 106 store i32 2, i32* %z 107 store i32 3, i32* %w 108 %tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index 109 %tmp2 = load i32, i32* %tmp1 110 store i32 %tmp2, i32 addrspace(1)* %out 111 ret void 112} 113 114; FIXME: Should be able to promote this. Instcombine should fold the 115; cast in the hasOneUse case so it might not matter in practice 116 117; OPT-LABEL: @vector_read_bitcast_alloca( 118; OPT: alloca [4 x float] 119; OPT: store float 120; OPT: store float 121; OPT: store float 122; OPT: store float 123; OPT: load float 124define void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) { 125entry: 126 %tmp = alloca [4 x i32] 127 %tmp.bc = bitcast [4 x i32]* %tmp to [4 x float]* 128 %x = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 0 129 %y = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 1 130 %z = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 2 131 %w = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 3 132 store float 0.0, float* %x 133 store float 1.0, float* %y 134 store float 2.0, float* %z 135 store float 4.0, float* %w 136 %tmp1 = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 %index 137 %tmp2 = load float, float* %tmp1 138 store float %tmp2, float addrspace(1)* %out 139 ret void 140} 141