1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) 5 6; GCN-LABEL: {{^}}vgpr: 7; GCN: v_mov_b32_e32 v1, v0 8; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 9; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm 10; GCN: s_waitcnt expcnt(0) 11; GCN-NOT: s_endpgm 12define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 13 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) 14 %x = fadd float %3, 1.0 15 %a = insertvalue {float, float} undef, float %x, 0 16 %b = insertvalue {float, float} %a, float %3, 1 17 ret {float, float} %b 18} 19 20; GCN-LABEL: {{^}}vgpr_literal: 21; GCN: v_mov_b32_e32 v4, v0 22; GCN: exp mrt0 v4, v4, v4, v4 done compr vm 23 24; GCN-DAG: v_mov_b32_e32 v0, 1.0 25; GCN-DAG: v_mov_b32_e32 v1, 2.0 26; GCN-DAG: v_mov_b32_e32 v2, 4.0 27; GCN-DAG: v_mov_b32_e32 v3, -1.0 28; GCN: s_waitcnt expcnt(0) 29; GCN-NOT: s_endpgm 30define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 31 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) 32 ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0} 33} 34 35 36; GCN: .long 165580 37; GCN-NEXT: .long 562 38; GCN-NEXT: .long 165584 39; GCN-NEXT: .long 562 40; GCN-LABEL: {{^}}vgpr_ps_addr0: 41; GCN-NOT: v_mov_b32_e32 v0 42; GCN-NOT: v_mov_b32_e32 v1 43; GCN-NOT: v_mov_b32_e32 v2 44; GCN: v_mov_b32_e32 v3, v4 45; GCN: v_mov_b32_e32 v4, v6 46; GCN-NOT: s_endpgm 47define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { 48 %i0 = extractelement <2 x i32> %4, i32 0 49 %i1 = extractelement <2 x i32> %4, i32 1 50 %i2 = extractelement <2 x i32> %7, i32 0 51 %i3 = extractelement <2 x i32> %8, i32 0 52 %f0 = bitcast i32 %i0 to float 53 %f1 = bitcast i32 %i1 to float 54 %f2 = bitcast i32 %i2 to float 55 %f3 = bitcast i32 %i3 to float 56 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 57 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 58 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 59 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 60 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 61 ret {float, float, float, float, float} %r4 62} 63 64 65; GCN: .long 165580 66; GCN-NEXT: .long 1 67; GCN-NEXT: .long 165584 68; GCN-NEXT: .long 1 69; GCN-LABEL: {{^}}ps_input_ena_no_inputs: 70; GCN: v_mov_b32_e32 v0, 1.0 71; GCN-NOT: s_endpgm 72define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { 73 ret float 1.0 74} 75 76 77; GCN: .long 165580 78; GCN-NEXT: .long 2081 79; GCN-NEXT: .long 165584 80; GCN-NEXT: .long 2081 81; GCN-LABEL: {{^}}ps_input_ena_pos_w: 82; GCN-DAG: v_mov_b32_e32 v0, v4 83; GCN-DAG: v_mov_b32_e32 v1, v2 84; GCN: v_mov_b32_e32 v2, v3 85; GCN-NOT: s_endpgm 86define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { 87 %f = bitcast <2 x i32> %8 to <2 x float> 88 %s = insertvalue {float, <2 x float>} undef, float %14, 0 89 %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1 90 ret {float, <2 x float>} %s1 91} 92 93 94; GCN: .long 165580 95; GCN-NEXT: .long 562 96; GCN-NEXT: .long 165584 97; GCN-NEXT: .long 563 98; GCN-LABEL: {{^}}vgpr_ps_addr1: 99; GCN-DAG: v_mov_b32_e32 v0, v2 100; GCN-DAG: v_mov_b32_e32 v1, v3 101; GCN: v_mov_b32_e32 v2, v4 102; GCN-DAG: v_mov_b32_e32 v3, v6 103; GCN-DAG: v_mov_b32_e32 v4, v8 104; GCN-NOT: s_endpgm 105attributes #1 = { "InitialPSInputAddr"="1" } 106define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 { 107 %i0 = extractelement <2 x i32> %4, i32 0 108 %i1 = extractelement <2 x i32> %4, i32 1 109 %i2 = extractelement <2 x i32> %7, i32 0 110 %i3 = extractelement <2 x i32> %8, i32 0 111 %f0 = bitcast i32 %i0 to float 112 %f1 = bitcast i32 %i1 to float 113 %f2 = bitcast i32 %i2 to float 114 %f3 = bitcast i32 %i3 to float 115 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 116 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 117 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 118 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 119 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 120 ret {float, float, float, float, float} %r4 121} 122 123 124; GCN: .long 165580 125; GCN-NEXT: .long 562 126; GCN-NEXT: .long 165584 127; GCN-NEXT: .long 631 128; GCN-LABEL: {{^}}vgpr_ps_addr119: 129; GCN-DAG: v_mov_b32_e32 v0, v2 130; GCN-DAG: v_mov_b32_e32 v1, v3 131; GCN: v_mov_b32_e32 v2, v6 132; GCN: v_mov_b32_e32 v3, v8 133; GCN: v_mov_b32_e32 v4, v12 134; GCN-NOT: s_endpgm 135attributes #2 = { "InitialPSInputAddr"="119" } 136define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 { 137 %i0 = extractelement <2 x i32> %4, i32 0 138 %i1 = extractelement <2 x i32> %4, i32 1 139 %i2 = extractelement <2 x i32> %7, i32 0 140 %i3 = extractelement <2 x i32> %8, i32 0 141 %f0 = bitcast i32 %i0 to float 142 %f1 = bitcast i32 %i1 to float 143 %f2 = bitcast i32 %i2 to float 144 %f3 = bitcast i32 %i3 to float 145 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 146 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 147 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 148 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 149 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 150 ret {float, float, float, float, float} %r4 151} 152 153 154; GCN: .long 165580 155; GCN-NEXT: .long 562 156; GCN-NEXT: .long 165584 157; GCN-NEXT: .long 946 158; GCN-LABEL: {{^}}vgpr_ps_addr418: 159; GCN-NOT: v_mov_b32_e32 v0 160; GCN-NOT: v_mov_b32_e32 v1 161; GCN-NOT: v_mov_b32_e32 v2 162; GCN: v_mov_b32_e32 v3, v4 163; GCN: v_mov_b32_e32 v4, v8 164; GCN-NOT: s_endpgm 165attributes #3 = { "InitialPSInputAddr"="418" } 166define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 { 167 %i0 = extractelement <2 x i32> %4, i32 0 168 %i1 = extractelement <2 x i32> %4, i32 1 169 %i2 = extractelement <2 x i32> %7, i32 0 170 %i3 = extractelement <2 x i32> %8, i32 0 171 %f0 = bitcast i32 %i0 to float 172 %f1 = bitcast i32 %i1 to float 173 %f2 = bitcast i32 %i2 to float 174 %f3 = bitcast i32 %i3 to float 175 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 176 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 177 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 178 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 179 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 180 ret {float, float, float, float, float} %r4 181} 182 183 184; GCN-LABEL: {{^}}sgpr: 185; GCN: s_add_i32 s0, s3, 2 186; GCN: s_mov_b32 s2, s3 187; GCN-NOT: s_endpgm 188define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 189 %x = add i32 %2, 2 190 %a = insertvalue {i32, i32, i32} undef, i32 %x, 0 191 %b = insertvalue {i32, i32, i32} %a, i32 %1, 1 192 %c = insertvalue {i32, i32, i32} %a, i32 %2, 2 193 ret {i32, i32, i32} %c 194} 195 196 197; GCN-LABEL: {{^}}sgpr_literal: 198; GCN: s_mov_b32 s0, 5 199; GCN-NOT: s_mov_b32 s0, s0 200; GCN-DAG: s_mov_b32 s1, 6 201; GCN-DAG: s_mov_b32 s2, 7 202; GCN-DAG: s_mov_b32 s3, 8 203; GCN-NOT: s_endpgm 204define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 205 %x = add i32 %2, 2 206 ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8} 207} 208 209 210; GCN-LABEL: {{^}}both: 211; GCN: v_mov_b32_e32 v1, v0 212; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm 213; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 214; GCN-DAG: s_add_i32 s0, s3, 2 215; GCN-DAG: s_mov_b32 s1, s2 216; GCN: s_mov_b32 s2, s3 217; GCN: s_waitcnt expcnt(0) 218; GCN-NOT: s_endpgm 219define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 220 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) 221 %v = fadd float %3, 1.0 222 %s = add i32 %2, 2 223 %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0 224 %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1 225 %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2 226 %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3 227 %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4 228 ret {float, i32, float, i32, i32} %a4 229} 230 231 232; GCN-LABEL: {{^}}structure_literal: 233; GCN: v_mov_b32_e32 v3, v0 234; GCN: exp mrt0 v3, v3, v3, v3 done compr vm 235 236; GCN-DAG: v_mov_b32_e32 v0, 1.0 237; GCN-DAG: s_mov_b32 s0, 2 238; GCN-DAG: s_mov_b32 s1, 3 239; GCN-DAG: v_mov_b32_e32 v1, 2.0 240; GCN-DAG: v_mov_b32_e32 v2, 4.0 241; GCN: s_waitcnt expcnt(0) 242define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 243 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) 244 ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}} 245} 246 247attributes #0 = { nounwind "InitialPSInputAddr"="0" } 248