1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC %s 4; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC %s 5; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck --check-prefixes=EG,FUNC %s 6; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s 7; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-promote-alloca,sroa,instcombine < %s | FileCheck -check-prefix=OPT %s 8target datalayout = "A5" 9 10; OPT-LABEL: @vector_read( 11; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index 12; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 13 14; FUNC-LABEL: {{^}}vector_read: 15; EG: MOV 16; EG: MOV 17; EG: MOV 18; EG: MOV 19; EG: MOVA_INT 20define amdgpu_kernel void @vector_read(i32 addrspace(1)* %out, i32 %index) { 21entry: 22 %tmp = alloca [4 x i32], addrspace(5) 23 %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 24 %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 25 %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 26 %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 27 store i32 0, i32 addrspace(5)* %x 28 store i32 1, i32 addrspace(5)* %y 29 store i32 2, i32 addrspace(5)* %z 30 store i32 3, i32 addrspace(5)* %w 31 %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index 32 %tmp2 = load i32, i32 addrspace(5)* %tmp1 33 store i32 %tmp2, i32 addrspace(1)* %out 34 ret void 35} 36 37; OPT-LABEL: @vector_write( 38; OPT: %0 = insertelement <4 x i32> zeroinitializer, i32 1, i32 %w_index 39; OPT: %1 = extractelement <4 x i32> %0, i32 %r_index 40; OPT: store i32 %1, i32 addrspace(1)* %out, align 4 41 42; FUNC-LABEL: {{^}}vector_write: 43; EG: MOV 44; EG: MOV 45; EG: MOV 46; EG: MOV 47; EG: MOVA_INT 48; EG: MOVA_INT 49define amdgpu_kernel void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { 50entry: 51 %tmp = alloca [4 x i32], addrspace(5) 52 %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 53 %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 54 %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 55 %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 56 store i32 0, i32 addrspace(5)* %x 57 store i32 0, i32 addrspace(5)* %y 58 store i32 0, i32 addrspace(5)* %z 59 store i32 0, i32 addrspace(5)* %w 60 %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %w_index 61 store i32 1, i32 addrspace(5)* %tmp1 62 %tmp2 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %r_index 63 %tmp3 = load i32, i32 addrspace(5)* %tmp2 64 store i32 %tmp3, i32 addrspace(1)* %out 65 ret void 66} 67 68; This test should be optimize to: 69; store i32 0, i32 addrspace(1)* %out 70 71; OPT-LABEL: @bitcast_gep( 72; OPT-LABEL: store i32 0, i32 addrspace(1)* %out, align 4 73 74; FUNC-LABEL: {{^}}bitcast_gep: 75; EG: STORE_RAW 76define amdgpu_kernel void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { 77entry: 78 %tmp = alloca [4 x i32], addrspace(5) 79 %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 80 %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 81 %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 82 %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 83 store i32 0, i32 addrspace(5)* %x 84 store i32 0, i32 addrspace(5)* %y 85 store i32 0, i32 addrspace(5)* %z 86 store i32 0, i32 addrspace(5)* %w 87 %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 88 %tmp2 = bitcast i32 addrspace(5)* %tmp1 to [4 x i32] addrspace(5)* 89 %tmp3 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp2, i32 0, i32 0 90 %tmp4 = load i32, i32 addrspace(5)* %tmp3 91 store i32 %tmp4, i32 addrspace(1)* %out 92 ret void 93} 94 95; OPT-LABEL: @vector_read_bitcast_gep( 96; OPT: %0 = extractelement <4 x i32> <i32 1065353216, i32 1, i32 2, i32 3>, i32 %index 97; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 98define amdgpu_kernel void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) { 99entry: 100 %tmp = alloca [4 x i32], addrspace(5) 101 %x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 102 %y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 103 %z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 104 %w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 105 %bc = bitcast i32 addrspace(5)* %x to float addrspace(5)* 106 store float 1.0, float addrspace(5)* %bc 107 store i32 1, i32 addrspace(5)* %y 108 store i32 2, i32 addrspace(5)* %z 109 store i32 3, i32 addrspace(5)* %w 110 %tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index 111 %tmp2 = load i32, i32 addrspace(5)* %tmp1 112 store i32 %tmp2, i32 addrspace(1)* %out 113 ret void 114} 115 116; OPT-LABEL: @vector_read_bitcast_alloca( 117; OPT: %0 = extractelement <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>, i32 %index 118; OPT: store float %0, float addrspace(1)* %out, align 4 119define amdgpu_kernel void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) { 120entry: 121 %tmp = alloca [4 x i32], addrspace(5) 122 %tmp.bc = bitcast [4 x i32] addrspace(5)* %tmp to [4 x float] addrspace(5)* 123 %x = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 0 124 %y = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 1 125 %z = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 2 126 %w = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 3 127 store float 0.0, float addrspace(5)* %x 128 store float 1.0, float addrspace(5)* %y 129 store float 2.0, float addrspace(5)* %z 130 store float 4.0, float addrspace(5)* %w 131 %tmp1 = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 %index 132 %tmp2 = load float, float addrspace(5)* %tmp1 133 store float %tmp2, float addrspace(1)* %out 134 ret void 135} 136 137; The pointer arguments in local address space should not affect promotion to vector. 138 139; OPT-LABEL: @vector_read_with_local_arg( 140; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index 141; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 142define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) { 143entry: 144 %tmp = alloca [4 x i32], addrspace(5) 145 %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 146 %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 147 %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 148 %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 149 store i32 0, i32 addrspace(5)* %x 150 store i32 1, i32 addrspace(5)* %y 151 store i32 2, i32 addrspace(5)* %z 152 store i32 3, i32 addrspace(5)* %w 153 %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index 154 %tmp2 = load i32, i32 addrspace(5)* %tmp1 155 store i32 %tmp2, i32 addrspace(1)* %out 156 ret void 157} 158