1; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX9 %s 3 4; GCN-LABEL: name: uniform_vec_0_i16 5; GCN: S_LSHL_B32 6define amdgpu_kernel void @uniform_vec_0_i16(i32 addrspace(1)* %out, i16 %a) { 7 %tmp = insertelement <2 x i16> undef, i16 0, i32 0 8 %vec = insertelement <2 x i16> %tmp, i16 %a, i32 1 9 %val = bitcast <2 x i16> %vec to i32 10 store i32 %val, i32 addrspace(1)* %out, align 4 11 ret void 12} 13 14; GCN-LABEL: name: divergent_vec_0_i16 15; GCN: V_LSHLREV_B32_e64 16define i32 @divergent_vec_0_i16(i16 %a) { 17 %tmp = insertelement <2 x i16> undef, i16 0, i32 0 18 %vec = insertelement <2 x i16> %tmp, i16 %a, i32 1 19 %val = bitcast <2 x i16> %vec to i32 20 ret i32 %val 21} 22 23; GCN-LABEL: name: uniform_vec_i16_0 24; GCN: S_AND_B32 25define amdgpu_kernel void @uniform_vec_i16_0(i32 addrspace(1)* %out, i16 %a) { 26 %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 27 %vec = insertelement <2 x i16> %tmp, i16 0, i32 1 28 %val = bitcast <2 x i16> %vec to i32 29 store i32 %val, i32 addrspace(1)* %out, align 4 30 ret void 31} 32 33; GCN-LABEL: name: divergent_vec_i16_0 34; GCN: V_AND_B32_e64 35define i32 @divergent_vec_i16_0(i16 %a) { 36 %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 37 %vec = insertelement <2 x i16> %tmp, i16 0, i32 1 38 %val = bitcast <2 x i16> %vec to i32 39 ret i32 %val 40} 41 42; GCN-LABEL: name: uniform_vec_f16_0 43; GCN: S_AND_B32 44define amdgpu_kernel void @uniform_vec_f16_0(float addrspace(1)* %out, half %a) { 45 %tmp = insertelement <2 x half> undef, half %a, i32 0 46 %vec = insertelement <2 x half> %tmp, half 0.0, i32 1 47 %val = bitcast <2 x half> %vec to float 48 store float %val, float addrspace(1)* %out, align 4 49 ret void 50} 51 52; GCN-LABEL: name: divergent_vec_f16_0 53; GCN: V_CVT_F16_F32_e64 0, %0 54; GCN: COPY %1 55 56; GFX9-LABEL: name: divergent_vec_f16_0 57; GFX9: V_AND_B32_e64 58define float @divergent_vec_f16_0(half %a) { 59 %tmp = insertelement <2 x half> undef, half %a, i32 0 60 %vec = insertelement <2 x half> %tmp, half 0.0, i32 1 61 %val = bitcast <2 x half> %vec to float 62 ret float %val 63} 64 65; GCN-LABEL: name: uniform_vec_i16_LL 66; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 67; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] 68; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 69; GCN: %[[SHL:[0-9]+]]:sreg_32 = S_LSHL_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] 70; GCN: S_OR_B32 killed %[[AND]], killed %[[SHL]] 71 72; GFX9-LABEL: name: uniform_vec_i16_LL 73; GFX9: S_PACK_LL_B32_B16 74define amdgpu_kernel void @uniform_vec_i16_LL(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) { 75 %val0 = load volatile i32, i32 addrspace(4)* %in0 76 %val1 = load volatile i32, i32 addrspace(4)* %in1 77 %lo = trunc i32 %val0 to i16 78 %hi = trunc i32 %val1 to i16 79 %vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0 80 %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1 81 %vec.i32 = bitcast <2 x i16> %vec.1 to i32 82 call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0 83 ret void 84} 85 86; GCN-LABEL: name: divergent_vec_i16_LL 87; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 88; GCN: %[[SHL:[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed %[[SHIFT]], %1, implicit $exec 89; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 90; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 %0, killed %[[IMM]], implicit $exec 91; GCN: V_OR_B32_e64 killed %[[AND]], killed %[[SHL]], implicit $exec 92 93; GFX9-LABEL: name: divergent_vec_i16_LL 94; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535 95; GFX9: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[IMM]] 96; GFX9: V_LSHL_OR_B32_e64 %{{[0-9]+}}, 16, killed %[[AND]] 97define i32 @divergent_vec_i16_LL(i16 %a, i16 %b) { 98 %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 99 %vec = insertelement <2 x i16> %tmp, i16 %b, i32 1 100 %val = bitcast <2 x i16> %vec to i32 101 ret i32 %val 102} 103 104; GCN-LABEL: name: uniform_vec_i16_LH 105; GCN-DAG: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 106; GCN-DAG: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] 107; GCN-DAG: %[[NEG:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 108; GCN-DAG: %[[ANDN:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[NEG]] 109; GCN: S_OR_B32 killed %[[AND]], killed %[[ANDN]] 110 111; GFX9-LABEL: name: uniform_vec_i16_LH 112; GFX9: S_PACK_LH_B32_B16 113define amdgpu_kernel void @uniform_vec_i16_LH(i32 addrspace(1)* %out, i16 %a, i32 %b) { 114 %shift = lshr i32 %b, 16 115 %tr = trunc i32 %shift to i16 116 %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 117 %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1 118 %val = bitcast <2 x i16> %vec to i32 119 store i32 %val, i32 addrspace(1)* %out, align 4 120 ret void 121} 122 123; GCN-LABEL: name: divergent_vec_i16_LH 124; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 125; GCN: V_BFI_B32_e64 killed %[[IMM]] 126define i32 @divergent_vec_i16_LH(i16 %a, i32 %b) { 127 %shift = lshr i32 %b, 16 128 %tr = trunc i32 %shift to i16 129 %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 130 %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1 131 %val = bitcast <2 x i16> %vec to i32 132 ret i32 %val 133} 134 135; GCN-LABEL: name: uniform_vec_i16_HH 136; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 137; GCN: %[[SHR:[0-9]+]]:sreg_32 = S_LSHR_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] 138; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 139; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] 140; GCN: S_OR_B32 killed %[[SHR]], killed %[[AND]] 141 142; GFX9-LABEL: name: uniform_vec_i16_HH 143; GFX9: S_PACK_HH_B32_B16 144define amdgpu_kernel void @uniform_vec_i16_HH(i32 addrspace(1)* %out, i32 %a, i32 %b) { 145 %shift_a = lshr i32 %a, 16 146 %tr_a = trunc i32 %shift_a to i16 147 %shift_b = lshr i32 %b, 16 148 %tr_b = trunc i32 %shift_b to i16 149 %tmp = insertelement <2 x i16> undef, i16 %tr_a, i32 0 150 %vec = insertelement <2 x i16> %tmp, i16 %tr_b, i32 1 151 %val = bitcast <2 x i16> %vec to i32 152 store i32 %val, i32 addrspace(1)* %out, align 4 153 ret void 154} 155 156; GCN-LABEL: name: divergent_vec_i16_HH 157; GCN: %[[SHR:[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 killed %{{[0-9]+}}, %0, implicit $exec 158; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 159; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 %1, killed %[[IMM]], implicit $exec 160; GCN: V_OR_B32_e64 killed %[[SHR]], killed %[[AND]], implicit $exec 161 162; GFX9-LABEL: name: divergent_vec_i16_HH 163; GFX9: %[[SHR:[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %0 164; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -65536, implicit $exec 165; GFX9: V_AND_OR_B32_e64 %1, killed %[[IMM]], killed %[[SHR]] 166define i32 @divergent_vec_i16_HH(i32 %a, i32 %b) { 167 %shift_a = lshr i32 %a, 16 168 %tr_a = trunc i32 %shift_a to i16 169 %shift_b = lshr i32 %b, 16 170 %tr_b = trunc i32 %shift_b to i16 171 %tmp = insertelement <2 x i16> undef, i16 %tr_a, i32 0 172 %vec = insertelement <2 x i16> %tmp, i16 %tr_b, i32 1 173 %val = bitcast <2 x i16> %vec to i32 174 ret i32 %val 175} 176 177; GCN-LABEL: name: uniform_vec_f16_LL 178; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 179; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] 180; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 181; GCN: %[[SHL:[0-9]+]]:sreg_32 = S_LSHL_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] 182; GCN: S_OR_B32 killed %[[AND]], killed %[[SHL]] 183 184; GFX9-LABEL: name: uniform_vec_f16_LL 185; GFX9: S_PACK_LL_B32_B16 186define amdgpu_kernel void @uniform_vec_f16_LL(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) { 187 %val0 = load volatile i32, i32 addrspace(4)* %in0 188 %val1 = load volatile i32, i32 addrspace(4)* %in1 189 %lo.i = trunc i32 %val0 to i16 190 %hi.i = trunc i32 %val1 to i16 191 %lo = bitcast i16 %lo.i to half 192 %hi = bitcast i16 %hi.i to half 193 %vec.0 = insertelement <2 x half> undef, half %lo, i32 0 194 %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1 195 %vec.i32 = bitcast <2 x half> %vec.1 to i32 196 197 call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0 198 ret void 199} 200 201; GCN-LABEL: name: divergent_vec_f16_LL 202; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 203; GCN: %[[SHL:[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed %[[SHIFT]] 204; GCN: V_OR_B32_e64 killed %{{[0-9]+}}, killed %[[SHL]], implicit $exec 205 206; GFX9-LABEL: name: divergent_vec_f16_LL 207; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535 208; GFX9: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[IMM]] 209; GFX9: V_LSHL_OR_B32_e64 %{{[0-9]+}}, 16, killed %[[AND]] 210define float @divergent_vec_f16_LL(half %a, half %b) { 211 %tmp = insertelement <2 x half> undef, half %a, i32 0 212 %vec = insertelement <2 x half> %tmp, half %b, i32 1 213 %val = bitcast <2 x half> %vec to float 214 ret float %val 215} 216