1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefixes=R600,ALL 2; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GFX6,GFX678,ALL 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GFX8,GFX678,ALL 4; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10,SI,ALL 5 6; ALL-LABEL: {{^}}build_vector2: 7; R600: MOV 8; R600: MOV 9; R600-NOT: MOV 10; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 11; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 12; GFX678: buffer_store_dwordx2 v[[[X]]:[[Y]]] 13; GFX10: global_store_dwordx2 v2, v[0:1], s[0:1] 14define amdgpu_kernel void @build_vector2 (<2 x i32> addrspace(1)* %out) { 15entry: 16 store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out 17 ret void 18} 19 20; ALL-LABEL: {{^}}build_vector4: 21; R600: MOV 22; R600: MOV 23; R600: MOV 24; R600: MOV 25; R600-NOT: MOV 26; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 27; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 28; SI-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7 29; SI-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8 30; GFX678: buffer_store_dwordx4 v[[[X]]:[[W]]] 31; GFX10: global_store_dwordx4 v4, v[0:3], s[0:1] 32define amdgpu_kernel void @build_vector4 (<4 x i32> addrspace(1)* %out) { 33entry: 34 store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out 35 ret void 36} 37 38 39; ALL-LABEL: {{^}}build_vector_v2i16: 40; R600: MOV 41; R600-NOT: MOV 42; GFX678: s_mov_b32 s3, 0xf000 43; GFX678: s_mov_b32 s2, -1 44; GFX678: v_mov_b32_e32 v0, 0x60005 45; GFX678: s_waitcnt lgkmcnt(0) 46; GFX678: buffer_store_dword v0, off, s[0:3], 0 47; GFX10: v_mov_b32_e32 v0, 0 48; GFX10: v_mov_b32_e32 v1, 0x60005 49; GFX10: s_waitcnt lgkmcnt(0) 50; GFX10: global_store_dword v0, v1, s[0:1] 51define amdgpu_kernel void @build_vector_v2i16 (<2 x i16> addrspace(1)* %out) { 52entry: 53 store <2 x i16> <i16 5, i16 6>, <2 x i16> addrspace(1)* %out 54 ret void 55} 56 57; ALL-LABEL: {{^}}build_vector_v2i16_trunc: 58; R600: LSHR 59; R600: OR_INT 60; R600: LSHR 61; R600-NOT: MOV 62; GFX6: s_mov_b32 s3, 0xf000 63; GFX6: s_waitcnt lgkmcnt(0) 64; GFX6: s_lshr_b32 s2, s2, 16 65; GFX6: s_or_b32 s4, s2, 0x50000 66; GFX6: s_mov_b32 s2, -1 67; GFX6: v_mov_b32_e32 v0, s4 68; GFX6: buffer_store_dword v0, off, s[0:3], 0 69; GFX8: s_mov_b32 s3, 0xf000 70; GFX8: s_mov_b32 s2, -1 71; GFX8: s_waitcnt lgkmcnt(0) 72; GFX8: s_lshr_b32 s4, s4, 16 73; GFX8: s_or_b32 s4, s4, 0x50000 74; GFX8: v_mov_b32_e32 v0, s4 75; GFX8: buffer_store_dword v0, off, s[0:3], 0 76; GFX10: v_mov_b32_e32 v0, 0 77; GFX10: s_waitcnt lgkmcnt(0) 78; GFX10: s_lshr_b32 s2, s2, 16 79; GFX10: s_pack_ll_b32_b16 s2, s2, 5 80; GFX10: v_mov_b32_e32 v1, s2 81; GFX10: global_store_dword v0, v1, s[0:1] 82define amdgpu_kernel void @build_vector_v2i16_trunc (<2 x i16> addrspace(1)* %out, i32 %a) { 83 %srl = lshr i32 %a, 16 84 %trunc = trunc i32 %srl to i16 85 %ins.0 = insertelement <2 x i16> undef, i16 %trunc, i32 0 86 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1 87 store <2 x i16> %ins.1, <2 x i16> addrspace(1)* %out 88 ret void 89} 90