1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}lsh8_or_and: 4; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400 5; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 6define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 7bb: 8 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 9 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 10 %tmp = load i32, i32 addrspace(1)* %gep, align 4 11 %tmp2 = shl i32 %tmp, 8 12 %tmp3 = and i32 %arg1, 255 13 %tmp4 = or i32 %tmp2, %tmp3 14 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 15 ret void 16} 17 18; GCN-LABEL: {{^}}lsr24_or_and: 19; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 20; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 21define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 22bb: 23 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 24 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 25 %tmp = load i32, i32 addrspace(1)* %gep, align 4 26 %tmp2 = lshr i32 %tmp, 24 27 %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00 28 %tmp4 = or i32 %tmp2, %tmp3 29 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 30 ret void 31} 32 33; GCN-LABEL: {{^}}and_or_lsr24: 34; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 35; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 36define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 37bb: 38 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 39 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 40 %tmp = load i32, i32 addrspace(1)* %gep, align 4 41 %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00 42 %tmp3 = lshr i32 %arg1, 24 43 %tmp4 = or i32 %tmp2, %tmp3 44 %tmp5 = xor i32 %tmp4, -2147483648 45 store i32 %tmp5, i32 addrspace(1)* %gep, align 4 46 ret void 47} 48 49; GCN-LABEL: {{^}}and_or_and: 50; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500 51; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 52define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 53bb: 54 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 55 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 56 %tmp = load i32, i32 addrspace(1)* %gep, align 4 57 %tmp2 = and i32 %tmp, -16711936 58 %tmp3 = and i32 %arg1, 16711935 59 %tmp4 = or i32 %tmp2, %tmp3 60 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 61 ret void 62} 63 64; GCN-LABEL: {{^}}lsh8_or_lsr24: 65; GCN: v_alignbit_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, 24 66define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 67bb: 68 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 69 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 70 %tmp = load i32, i32 addrspace(1)* %gep, align 4 71 %tmp2 = shl i32 %tmp, 8 72 %tmp3 = lshr i32 %arg1, 24 73 %tmp4 = or i32 %tmp2, %tmp3 74 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 75 ret void 76} 77 78; GCN-LABEL: {{^}}lsh16_or_lsr24: 79; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03 80; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 81define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 82bb: 83 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 84 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 85 %tmp = load i32, i32 addrspace(1)* %gep, align 4 86 %tmp2 = shl i32 %tmp, 16 87 %tmp3 = lshr i32 %arg1, 24 88 %tmp4 = or i32 %tmp2, %tmp3 89 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 90 ret void 91} 92 93; GCN-LABEL: {{^}}and_xor_and: 94; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 95; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 96define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 97bb: 98 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 99 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 100 %tmp = load i32, i32 addrspace(1)* %gep, align 4 101 %tmp2 = and i32 %tmp, -16776961 102 %tmp3 = and i32 %arg1, 16776960 103 %tmp4 = xor i32 %tmp2, %tmp3 104 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 105 ret void 106} 107 108; GCN-LABEL: {{^}}and_or_or_and: 109; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff00 110; GCN: s_or_b32 [[SREG:s[0-9]+]], s{{[0-9]+}}, 0xffff0000 111; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, v{{[0-9]+}} 112; GCN: v_or_b32_e32 v{{[0-9]+}}, [[SREG]], [[VREG]] 113; FIXME here should have been "v_perm_b32" with 0xffff0500 mask. 114define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 115bb: 116 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 117 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 118 %tmp = load i32, i32 addrspace(1)* %gep, align 4 119 %and = and i32 %tmp, 16711935 ; 0x00ff00ff 120 %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00 121 %tmp2 = or i32 %tmp1, -65536 122 %tmp3 = or i32 %tmp2, %and 123 store i32 %tmp3, i32 addrspace(1)* %gep, align 4 124 ret void 125} 126 127; GCN-LABEL: {{^}}and_or_and_shl: 128; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 129; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 130define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 131bb: 132 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 133 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 134 %tmp = load i32, i32 addrspace(1)* %gep, align 4 135 %tmp2 = shl i32 %tmp, 16 136 %tmp3 = and i32 %arg1, 65535 137 %tmp4 = or i32 %tmp2, %tmp3 138 %and = and i32 %tmp4, 4278190335 139 store i32 %and, i32 addrspace(1)* %gep, align 4 140 ret void 141} 142 143; GCN-LABEL: {{^}}or_and_or: 144; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 145; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 146define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 147bb: 148 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 149 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 150 %tmp = load i32, i32 addrspace(1)* %gep, align 4 151 %or1 = or i32 %tmp, 16776960 ; 0x00ffff00 152 %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff 153 %and = and i32 %or1, %or2 154 store i32 %and, i32 addrspace(1)* %gep, align 4 155 ret void 156} 157 158; GCN-LABEL: {{^}}known_ffff0500: 159; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 160; GCN: s_and_b32 [[SREG:s[0-9]+]], [[SREG]], 0xff00 161; GCN: s_or_b32 [[SREG]], [[SREG]], 0xffff0000 162; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, [[VREG]] 163; GCN: v_or_b32_e32 [[VREG]], [[SREG]], [[VREG]] 164; GCN: store_dword v[{{[0-9:]+}}], [[VREG]]{{$}} 165; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} 166; FIXME here should have been "v_perm_b32" with 0xffff0500 mask. 167define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 168bb: 169 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 170 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 171 %load = load i32, i32 addrspace(1)* %gep, align 4 172 %mask1 = or i32 %arg1, 32768 ; 0x8000 173 %mask2 = or i32 %load, 4 174 %and = and i32 %mask2, 16711935 ; 0x00ff00ff 175 %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00 176 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000 177 %tmp3 = or i32 %tmp2, %and 178 store i32 %tmp3, i32 addrspace(1)* %gep, align 4 179 %v = and i32 %tmp3, 4294934532 ; 0xffff8004 180 store i32 %v, i32 addrspace(1)* %arg, align 4 181 ret void 182} 183 184; GCN-LABEL: {{^}}known_050c0c00: 185; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 186; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}} 187; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 188; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} 189define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 190bb: 191 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 192 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 193 %tmp = load i32, i32 addrspace(1)* %gep, align 4 194 %tmp2 = shl i32 %tmp, 16 195 %mask = or i32 %arg1, 4 196 %tmp3 = and i32 %mask, 65535 197 %tmp4 = or i32 %tmp2, %tmp3 198 %and = and i32 %tmp4, 4278190335 199 store i32 %and, i32 addrspace(1)* %gep, align 4 200 %v = and i32 %and, 16776964 201 store i32 %v, i32 addrspace(1)* %arg, align 4 202 ret void 203} 204 205; GCN-LABEL: {{^}}known_ffff8004: 206; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 207; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 208; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 209; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} 210define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 211bb: 212 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 213 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 214 %load = load i32, i32 addrspace(1)* %gep, align 4 215 %mask1 = or i32 %arg1, 4 216 %mask2 = or i32 %load, 32768 ; 0x8000 217 %and = and i32 %mask1, 16711935 ; 0x00ff00ff 218 %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00 219 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000 220 %tmp3 = or i32 %tmp2, %and 221 store i32 %tmp3, i32 addrspace(1)* %gep, align 4 222 %v = and i32 %tmp3, 4294934532 ; 0xffff8004 223 store i32 %v, i32 addrspace(1)* %arg, align 4 224 ret void 225} 226 227declare i32 @llvm.amdgcn.workitem.id.x() 228