1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
4
5define amdgpu_kernel void @s_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
6; SI-LABEL: s_bfm_pattern:
7; SI:       ; %bb.0:
8; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
9; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
10; SI-NEXT:    s_mov_b32 s3, 0xf000
11; SI-NEXT:    s_waitcnt lgkmcnt(0)
12; SI-NEXT:    s_bfm_b32 s4, s4, s5
13; SI-NEXT:    s_mov_b32 s2, -1
14; SI-NEXT:    v_mov_b32_e32 v0, s4
15; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
16; SI-NEXT:    s_endpgm
17;
18; VI-LABEL: s_bfm_pattern:
19; VI:       ; %bb.0:
20; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
21; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
22; VI-NEXT:    s_waitcnt lgkmcnt(0)
23; VI-NEXT:    s_bfm_b32 s2, s2, s3
24; VI-NEXT:    v_mov_b32_e32 v0, s0
25; VI-NEXT:    v_mov_b32_e32 v1, s1
26; VI-NEXT:    v_mov_b32_e32 v2, s2
27; VI-NEXT:    flat_store_dword v[0:1], v2
28; VI-NEXT:    s_endpgm
29  %a = shl i32 1, %x
30  %b = sub i32 %a, 1
31  %c = shl i32 %b, %y
32  store i32 %c, i32 addrspace(1)* %out
33  ret void
34}
35
36define amdgpu_kernel void @s_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
37; SI-LABEL: s_bfm_pattern_simple:
38; SI:       ; %bb.0:
39; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
40; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
41; SI-NEXT:    s_mov_b32 s3, 0xf000
42; SI-NEXT:    s_waitcnt lgkmcnt(0)
43; SI-NEXT:    s_bfm_b32 s4, s2, 0
44; SI-NEXT:    s_mov_b32 s2, -1
45; SI-NEXT:    v_mov_b32_e32 v0, s4
46; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
47; SI-NEXT:    s_endpgm
48;
49; VI-LABEL: s_bfm_pattern_simple:
50; VI:       ; %bb.0:
51; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
52; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
53; VI-NEXT:    s_waitcnt lgkmcnt(0)
54; VI-NEXT:    s_bfm_b32 s2, s2, 0
55; VI-NEXT:    v_mov_b32_e32 v0, s0
56; VI-NEXT:    v_mov_b32_e32 v1, s1
57; VI-NEXT:    v_mov_b32_e32 v2, s2
58; VI-NEXT:    flat_store_dword v[0:1], v2
59; VI-NEXT:    s_endpgm
60  %a = shl i32 1, %x
61  %b = sub i32 %a, 1
62  store i32 %b, i32 addrspace(1)* %out
63  ret void
64}
65
66define void @v_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
67; SI-LABEL: v_bfm_pattern:
68; SI:       ; %bb.0:
69; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; SI-NEXT:    s_mov_b32 s7, 0xf000
71; SI-NEXT:    s_mov_b32 s6, 0
72; SI-NEXT:    v_bfm_b32_e32 v2, v2, v3
73; SI-NEXT:    s_mov_b32 s4, s6
74; SI-NEXT:    s_mov_b32 s5, s6
75; SI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
76; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
77; SI-NEXT:    s_setpc_b64 s[30:31]
78;
79; VI-LABEL: v_bfm_pattern:
80; VI:       ; %bb.0:
81; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; VI-NEXT:    v_bfm_b32 v2, v2, v3
83; VI-NEXT:    flat_store_dword v[0:1], v2
84; VI-NEXT:    s_waitcnt vmcnt(0)
85; VI-NEXT:    s_setpc_b64 s[30:31]
86  %a = shl i32 1, %x
87  %b = sub i32 %a, 1
88  %c = shl i32 %b, %y
89  store i32 %c, i32 addrspace(1)* %out
90  ret void
91}
92
93define void @v_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
94; SI-LABEL: v_bfm_pattern_simple:
95; SI:       ; %bb.0:
96; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; SI-NEXT:    s_mov_b32 s7, 0xf000
98; SI-NEXT:    s_mov_b32 s6, 0
99; SI-NEXT:    v_bfm_b32_e64 v2, v2, 0
100; SI-NEXT:    s_mov_b32 s4, s6
101; SI-NEXT:    s_mov_b32 s5, s6
102; SI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
103; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
104; SI-NEXT:    s_setpc_b64 s[30:31]
105;
106; VI-LABEL: v_bfm_pattern_simple:
107; VI:       ; %bb.0:
108; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; VI-NEXT:    v_bfm_b32 v2, v2, 0
110; VI-NEXT:    flat_store_dword v[0:1], v2
111; VI-NEXT:    s_waitcnt vmcnt(0)
112; VI-NEXT:    s_setpc_b64 s[30:31]
113  %a = shl i32 1, %x
114  %b = sub i32 %a, 1
115  store i32 %b, i32 addrspace(1)* %out
116  ret void
117}
118
119attributes #0 = { nounwind }
120