1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
4; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
5
6; FIXME: Merge into imm.ll
7
8define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) {
9; GFX10-LABEL: store_inline_imm_neg_0.0_i16:
10; GFX10:       ; %bb.0:
11; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
12; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
13; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
14; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
15; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
16; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
17; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
18; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
19;
20; VI-LABEL: store_inline_imm_neg_0.0_i16:
21; VI:       ; %bb.0:
22; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
23; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
24; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
25; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
26; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
27; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
28; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
29; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
30;
31; SI-LABEL: store_inline_imm_neg_0.0_i16:
32; SI:       ; %bb.0:
33; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
34; SI-NEXT:    s_mov_b32 s3, 0xf000
35; SI-NEXT:    s_mov_b32 s2, -1
36; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
37; SI-NEXT:    s_waitcnt lgkmcnt(0)
38; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
39; SI-NEXT:    s_waitcnt vmcnt(0)
40; SI-NEXT:    s_endpgm
41  store volatile i16 -32768, i16 addrspace(1)* %out
42  ret void
43}
44
45define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
46; GFX10-LABEL: store_inline_imm_0.0_f16:
47; GFX10:       ; %bb.0:
48; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
49; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
50; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
51; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
52; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
53; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
54; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
55;
56; VI-LABEL: store_inline_imm_0.0_f16:
57; VI:       ; %bb.0:
58; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
59; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
60; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
61; VI-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
62; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
63; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
64; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
65;
66; SI-LABEL: store_inline_imm_0.0_f16:
67; SI:       ; %bb.0:
68; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
69; SI-NEXT:    s_mov_b32 s3, 0xf000
70; SI-NEXT:    s_mov_b32 s2, -1
71; SI-NEXT:    v_mov_b32_e32 v0, 0
72; SI-NEXT:    s_waitcnt lgkmcnt(0)
73; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
74; SI-NEXT:    s_endpgm
75  store half 0.0, half addrspace(1)* %out
76  ret void
77}
78
79define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
80; GFX10-LABEL: store_imm_neg_0.0_f16:
81; GFX10:       ; %bb.0:
82; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
83; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
84; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
85; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
86; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
87; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
88; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
89;
90; VI-LABEL: store_imm_neg_0.0_f16:
91; VI:       ; %bb.0:
92; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
93; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
94; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
95; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
96; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
97; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
98; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
99;
100; SI-LABEL: store_imm_neg_0.0_f16:
101; SI:       ; %bb.0:
102; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
103; SI-NEXT:    s_mov_b32 s3, 0xf000
104; SI-NEXT:    s_mov_b32 s2, -1
105; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
106; SI-NEXT:    s_waitcnt lgkmcnt(0)
107; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
108; SI-NEXT:    s_endpgm
109  store half -0.0, half addrspace(1)* %out
110  ret void
111}
112
113define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
114; GFX10-LABEL: store_inline_imm_0.5_f16:
115; GFX10:       ; %bb.0:
116; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
117; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
118; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
119; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
120; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
121; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
122; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
123;
124; VI-LABEL: store_inline_imm_0.5_f16:
125; VI:       ; %bb.0:
126; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
127; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
128; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
129; VI-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
130; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
131; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
132; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
133;
134; SI-LABEL: store_inline_imm_0.5_f16:
135; SI:       ; %bb.0:
136; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
137; SI-NEXT:    s_mov_b32 s3, 0xf000
138; SI-NEXT:    s_mov_b32 s2, -1
139; SI-NEXT:    v_mov_b32_e32 v0, 0x3800
140; SI-NEXT:    s_waitcnt lgkmcnt(0)
141; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
142; SI-NEXT:    s_endpgm
143  store half 0.5, half addrspace(1)* %out
144  ret void
145}
146
147define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
148; GFX10-LABEL: store_inline_imm_m_0.5_f16:
149; GFX10:       ; %bb.0:
150; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
151; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
152; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
153; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
154; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
155; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
156; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
157;
158; VI-LABEL: store_inline_imm_m_0.5_f16:
159; VI:       ; %bb.0:
160; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
161; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
162; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
163; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
164; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
165; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
166; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
167;
168; SI-LABEL: store_inline_imm_m_0.5_f16:
169; SI:       ; %bb.0:
170; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
171; SI-NEXT:    s_mov_b32 s3, 0xf000
172; SI-NEXT:    s_mov_b32 s2, -1
173; SI-NEXT:    v_mov_b32_e32 v0, 0xb800
174; SI-NEXT:    s_waitcnt lgkmcnt(0)
175; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
176; SI-NEXT:    s_endpgm
177  store half -0.5, half addrspace(1)* %out
178  ret void
179}
180
181define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
182; GFX10-LABEL: store_inline_imm_1.0_f16:
183; GFX10:       ; %bb.0:
184; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
185; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
186; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
187; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
188; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
189; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
190; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
191;
192; VI-LABEL: store_inline_imm_1.0_f16:
193; VI:       ; %bb.0:
194; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
195; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
196; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
197; VI-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
198; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
199; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
200; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
201;
202; SI-LABEL: store_inline_imm_1.0_f16:
203; SI:       ; %bb.0:
204; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
205; SI-NEXT:    s_mov_b32 s3, 0xf000
206; SI-NEXT:    s_mov_b32 s2, -1
207; SI-NEXT:    v_mov_b32_e32 v0, 0x3c00
208; SI-NEXT:    s_waitcnt lgkmcnt(0)
209; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
210; SI-NEXT:    s_endpgm
211  store half 1.0, half addrspace(1)* %out
212  ret void
213}
214
215define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
216; GFX10-LABEL: store_inline_imm_m_1.0_f16:
217; GFX10:       ; %bb.0:
218; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
219; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
220; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
221; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
222; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
223; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
224; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
225;
226; VI-LABEL: store_inline_imm_m_1.0_f16:
227; VI:       ; %bb.0:
228; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
229; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
230; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
231; VI-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
232; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
233; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
234; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
235;
236; SI-LABEL: store_inline_imm_m_1.0_f16:
237; SI:       ; %bb.0:
238; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
239; SI-NEXT:    s_mov_b32 s3, 0xf000
240; SI-NEXT:    s_mov_b32 s2, -1
241; SI-NEXT:    v_mov_b32_e32 v0, 0xbc00
242; SI-NEXT:    s_waitcnt lgkmcnt(0)
243; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
244; SI-NEXT:    s_endpgm
245  store half -1.0, half addrspace(1)* %out
246  ret void
247}
248
249define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
250; GFX10-LABEL: store_inline_imm_2.0_f16:
251; GFX10:       ; %bb.0:
252; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
253; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
254; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
255; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
256; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
257; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
258; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
259;
260; VI-LABEL: store_inline_imm_2.0_f16:
261; VI:       ; %bb.0:
262; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
263; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
264; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
265; VI-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
266; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
267; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
268; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
269;
270; SI-LABEL: store_inline_imm_2.0_f16:
271; SI:       ; %bb.0:
272; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
273; SI-NEXT:    s_mov_b32 s3, 0xf000
274; SI-NEXT:    s_mov_b32 s2, -1
275; SI-NEXT:    v_mov_b32_e32 v0, 0x4000
276; SI-NEXT:    s_waitcnt lgkmcnt(0)
277; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
278; SI-NEXT:    s_endpgm
279  store half 2.0, half addrspace(1)* %out
280  ret void
281}
282
283define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
284; GFX10-LABEL: store_inline_imm_m_2.0_f16:
285; GFX10:       ; %bb.0:
286; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
287; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
288; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
289; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
290; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
291; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
292; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
293;
294; VI-LABEL: store_inline_imm_m_2.0_f16:
295; VI:       ; %bb.0:
296; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
297; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
298; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
299; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
300; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
301; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
302; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
303;
304; SI-LABEL: store_inline_imm_m_2.0_f16:
305; SI:       ; %bb.0:
306; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
307; SI-NEXT:    s_mov_b32 s3, 0xf000
308; SI-NEXT:    s_mov_b32 s2, -1
309; SI-NEXT:    v_mov_b32_e32 v0, 0xc000
310; SI-NEXT:    s_waitcnt lgkmcnt(0)
311; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
312; SI-NEXT:    s_endpgm
313  store half -2.0, half addrspace(1)* %out
314  ret void
315}
316
317define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
318; GFX10-LABEL: store_inline_imm_4.0_f16:
319; GFX10:       ; %bb.0:
320; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
321; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
322; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
323; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
324; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
325; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
326; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
327;
328; VI-LABEL: store_inline_imm_4.0_f16:
329; VI:       ; %bb.0:
330; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
331; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
332; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
333; VI-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
334; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
335; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
336; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
337;
338; SI-LABEL: store_inline_imm_4.0_f16:
339; SI:       ; %bb.0:
340; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
341; SI-NEXT:    s_mov_b32 s3, 0xf000
342; SI-NEXT:    s_mov_b32 s2, -1
343; SI-NEXT:    v_mov_b32_e32 v0, 0x4400
344; SI-NEXT:    s_waitcnt lgkmcnt(0)
345; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
346; SI-NEXT:    s_endpgm
347  store half 4.0, half addrspace(1)* %out
348  ret void
349}
350
351define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
352; GFX10-LABEL: store_inline_imm_m_4.0_f16:
353; GFX10:       ; %bb.0:
354; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
355; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
356; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
357; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
358; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
359; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
360; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
361;
362; VI-LABEL: store_inline_imm_m_4.0_f16:
363; VI:       ; %bb.0:
364; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
365; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
366; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
367; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
368; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
369; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
370; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
371;
372; SI-LABEL: store_inline_imm_m_4.0_f16:
373; SI:       ; %bb.0:
374; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
375; SI-NEXT:    s_mov_b32 s3, 0xf000
376; SI-NEXT:    s_mov_b32 s2, -1
377; SI-NEXT:    v_mov_b32_e32 v0, 0xc400
378; SI-NEXT:    s_waitcnt lgkmcnt(0)
379; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
380; SI-NEXT:    s_endpgm
381  store half -4.0, half addrspace(1)* %out
382  ret void
383}
384
385define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) {
386; GFX10-LABEL: store_inline_imm_inv_2pi_f16:
387; GFX10:       ; %bb.0:
388; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
389; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
390; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
391; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
392; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
393; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
394; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
395;
396; VI-LABEL: store_inline_imm_inv_2pi_f16:
397; VI:       ; %bb.0:
398; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
399; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
400; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
401; VI-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
402; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
403; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
404; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
405;
406; SI-LABEL: store_inline_imm_inv_2pi_f16:
407; SI:       ; %bb.0:
408; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
409; SI-NEXT:    s_mov_b32 s3, 0xf000
410; SI-NEXT:    s_mov_b32 s2, -1
411; SI-NEXT:    v_mov_b32_e32 v0, 0x3118
412; SI-NEXT:    s_waitcnt lgkmcnt(0)
413; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
414; SI-NEXT:    s_endpgm
415  store half 0xH3118, half addrspace(1)* %out
416  ret void
417}
418
419define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) {
420; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16:
421; GFX10:       ; %bb.0:
422; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
423; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
424; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
425; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
426; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
427; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
428; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
429;
430; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
431; VI:       ; %bb.0:
432; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
433; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
434; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
435; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
436; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
437; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
438; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
439;
440; SI-LABEL: store_inline_imm_m_inv_2pi_f16:
441; SI:       ; %bb.0:
442; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
443; SI-NEXT:    s_mov_b32 s3, 0xf000
444; SI-NEXT:    s_mov_b32 s2, -1
445; SI-NEXT:    v_mov_b32_e32 v0, 0xb118
446; SI-NEXT:    s_waitcnt lgkmcnt(0)
447; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
448; SI-NEXT:    s_endpgm
449  store half 0xHB118, half addrspace(1)* %out
450  ret void
451}
452
453define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) {
454; GFX10-LABEL: store_literal_imm_f16:
455; GFX10:       ; %bb.0:
456; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
457; GFX10-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
458; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
459; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
460; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
461; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
462; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
463;
464; VI-LABEL: store_literal_imm_f16:
465; VI:       ; %bb.0:
466; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
467; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
468; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
469; VI-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
470; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
471; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
472; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
473;
474; SI-LABEL: store_literal_imm_f16:
475; SI:       ; %bb.0:
476; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
477; SI-NEXT:    s_mov_b32 s3, 0xf000
478; SI-NEXT:    s_mov_b32 s2, -1
479; SI-NEXT:    v_mov_b32_e32 v0, 0x6c00
480; SI-NEXT:    s_waitcnt lgkmcnt(0)
481; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
482; SI-NEXT:    s_endpgm
483  store half 4096.0, half addrspace(1)* %out
484  ret void
485}
486
487define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) {
488; GFX10-LABEL: add_inline_imm_0.0_f16:
489; GFX10:       ; %bb.0:
490; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
491; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
492; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
493; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
494; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
495; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
496; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
497; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
498; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
499;
500; VI-LABEL: add_inline_imm_0.0_f16:
501; VI:       ; %bb.0:
502; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
503; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
504; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
505; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
506; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
507; VI-NEXT:    v_add_f16_e64 v0, s6, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x00,0x01,0x00]
508; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
509; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
510;
511; SI-LABEL: add_inline_imm_0.0_f16:
512; SI:       ; %bb.0:
513; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
514; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
515; SI-NEXT:    s_mov_b32 s3, 0xf000
516; SI-NEXT:    s_waitcnt lgkmcnt(0)
517; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
518; SI-NEXT:    s_mov_b32 s2, -1
519; SI-NEXT:    v_add_f32_e32 v0, 0, v0
520; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
521; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
522; SI-NEXT:    s_endpgm
523  %y = fadd half %x, 0.0
524  store half %y, half addrspace(1)* %out
525  ret void
526}
527
528define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) {
529; GFX10-LABEL: add_inline_imm_0.5_f16:
530; GFX10:       ; %bb.0:
531; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
532; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
533; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
534; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
535; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
536; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
537; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
538; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
539; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
540;
541; VI-LABEL: add_inline_imm_0.5_f16:
542; VI:       ; %bb.0:
543; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
544; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
545; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
546; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
547; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
548; VI-NEXT:    v_add_f16_e64 v0, s6, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe0,0x01,0x00]
549; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
550; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
551;
552; SI-LABEL: add_inline_imm_0.5_f16:
553; SI:       ; %bb.0:
554; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
555; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
556; SI-NEXT:    s_mov_b32 s3, 0xf000
557; SI-NEXT:    s_waitcnt lgkmcnt(0)
558; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
559; SI-NEXT:    s_mov_b32 s2, -1
560; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
561; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
562; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
563; SI-NEXT:    s_endpgm
564  %y = fadd half %x, 0.5
565  store half %y, half addrspace(1)* %out
566  ret void
567}
568
569define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) {
570; GFX10-LABEL: add_inline_imm_neg_0.5_f16:
571; GFX10:       ; %bb.0:
572; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
573; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
574; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
575; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
576; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
577; GFX10-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
578; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
579; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
580; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
581;
582; VI-LABEL: add_inline_imm_neg_0.5_f16:
583; VI:       ; %bb.0:
584; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
585; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
586; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
587; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
588; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
589; VI-NEXT:    v_add_f16_e64 v0, s6, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe2,0x01,0x00]
590; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
591; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
592;
593; SI-LABEL: add_inline_imm_neg_0.5_f16:
594; SI:       ; %bb.0:
595; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
596; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
597; SI-NEXT:    s_mov_b32 s3, 0xf000
598; SI-NEXT:    s_waitcnt lgkmcnt(0)
599; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
600; SI-NEXT:    s_mov_b32 s2, -1
601; SI-NEXT:    v_add_f32_e32 v0, -0.5, v0
602; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
603; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
604; SI-NEXT:    s_endpgm
605  %y = fadd half %x, -0.5
606  store half %y, half addrspace(1)* %out
607  ret void
608}
609
610define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) {
611; GFX10-LABEL: add_inline_imm_1.0_f16:
612; GFX10:       ; %bb.0:
613; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
614; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
615; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
616; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
617; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
618; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
619; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
620; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
621; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
622;
623; VI-LABEL: add_inline_imm_1.0_f16:
624; VI:       ; %bb.0:
625; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
626; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
627; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
628; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
629; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
630; VI-NEXT:    v_add_f16_e64 v0, s6, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe4,0x01,0x00]
631; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
632; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
633;
634; SI-LABEL: add_inline_imm_1.0_f16:
635; SI:       ; %bb.0:
636; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
637; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
638; SI-NEXT:    s_mov_b32 s3, 0xf000
639; SI-NEXT:    s_waitcnt lgkmcnt(0)
640; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
641; SI-NEXT:    s_mov_b32 s2, -1
642; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
643; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
644; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
645; SI-NEXT:    s_endpgm
646  %y = fadd half %x, 1.0
647  store half %y, half addrspace(1)* %out
648  ret void
649}
650
651define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) {
652; GFX10-LABEL: add_inline_imm_neg_1.0_f16:
653; GFX10:       ; %bb.0:
654; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
655; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
656; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
657; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
658; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
659; GFX10-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
660; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
661; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
662; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
663;
664; VI-LABEL: add_inline_imm_neg_1.0_f16:
665; VI:       ; %bb.0:
666; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
667; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
668; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
669; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
670; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
671; VI-NEXT:    v_add_f16_e64 v0, s6, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe6,0x01,0x00]
672; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
673; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
674;
675; SI-LABEL: add_inline_imm_neg_1.0_f16:
676; SI:       ; %bb.0:
677; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
678; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
679; SI-NEXT:    s_mov_b32 s3, 0xf000
680; SI-NEXT:    s_waitcnt lgkmcnt(0)
681; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
682; SI-NEXT:    s_mov_b32 s2, -1
683; SI-NEXT:    v_add_f32_e32 v0, -1.0, v0
684; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
685; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
686; SI-NEXT:    s_endpgm
687  %y = fadd half %x, -1.0
688  store half %y, half addrspace(1)* %out
689  ret void
690}
691
692define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) {
693; GFX10-LABEL: add_inline_imm_2.0_f16:
694; GFX10:       ; %bb.0:
695; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
696; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
697; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
698; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
699; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
700; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
701; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
702; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
703; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
704;
705; VI-LABEL: add_inline_imm_2.0_f16:
706; VI:       ; %bb.0:
707; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
708; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
709; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
710; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
711; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
712; VI-NEXT:    v_add_f16_e64 v0, s6, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe8,0x01,0x00]
713; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
714; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
715;
716; SI-LABEL: add_inline_imm_2.0_f16:
717; SI:       ; %bb.0:
718; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
719; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
720; SI-NEXT:    s_mov_b32 s3, 0xf000
721; SI-NEXT:    s_waitcnt lgkmcnt(0)
722; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
723; SI-NEXT:    s_mov_b32 s2, -1
724; SI-NEXT:    v_add_f32_e32 v0, 2.0, v0
725; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
726; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
727; SI-NEXT:    s_endpgm
728  %y = fadd half %x, 2.0
729  store half %y, half addrspace(1)* %out
730  ret void
731}
732
733define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) {
734; GFX10-LABEL: add_inline_imm_neg_2.0_f16:
735; GFX10:       ; %bb.0:
736; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
737; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
738; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
739; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
740; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
741; GFX10-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
742; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
743; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
744; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
745;
746; VI-LABEL: add_inline_imm_neg_2.0_f16:
747; VI:       ; %bb.0:
748; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
749; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
750; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
751; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
752; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
753; VI-NEXT:    v_add_f16_e64 v0, s6, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xea,0x01,0x00]
754; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
755; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
756;
757; SI-LABEL: add_inline_imm_neg_2.0_f16:
758; SI:       ; %bb.0:
759; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
760; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
761; SI-NEXT:    s_mov_b32 s3, 0xf000
762; SI-NEXT:    s_waitcnt lgkmcnt(0)
763; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
764; SI-NEXT:    s_mov_b32 s2, -1
765; SI-NEXT:    v_add_f32_e32 v0, -2.0, v0
766; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
767; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
768; SI-NEXT:    s_endpgm
769  %y = fadd half %x, -2.0
770  store half %y, half addrspace(1)* %out
771  ret void
772}
773
774define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) {
775; GFX10-LABEL: add_inline_imm_4.0_f16:
776; GFX10:       ; %bb.0:
777; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
778; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
779; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
780; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
781; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
782; GFX10-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
783; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
784; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
785; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
786;
787; VI-LABEL: add_inline_imm_4.0_f16:
788; VI:       ; %bb.0:
789; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
790; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
791; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
792; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
793; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
794; VI-NEXT:    v_add_f16_e64 v0, s6, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xec,0x01,0x00]
795; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
796; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
797;
798; SI-LABEL: add_inline_imm_4.0_f16:
799; SI:       ; %bb.0:
800; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
801; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
802; SI-NEXT:    s_mov_b32 s3, 0xf000
803; SI-NEXT:    s_waitcnt lgkmcnt(0)
804; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
805; SI-NEXT:    s_mov_b32 s2, -1
806; SI-NEXT:    v_add_f32_e32 v0, 4.0, v0
807; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
808; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
809; SI-NEXT:    s_endpgm
810  %y = fadd half %x, 4.0
811  store half %y, half addrspace(1)* %out
812  ret void
813}
814
815define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) {
816; GFX10-LABEL: add_inline_imm_neg_4.0_f16:
817; GFX10:       ; %bb.0:
818; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
819; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
820; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
821; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
822; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
823; GFX10-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
824; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
825; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
826; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
827;
828; VI-LABEL: add_inline_imm_neg_4.0_f16:
829; VI:       ; %bb.0:
830; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
831; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
832; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
833; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
834; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
835; VI-NEXT:    v_add_f16_e64 v0, s6, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xee,0x01,0x00]
836; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
837; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
838;
839; SI-LABEL: add_inline_imm_neg_4.0_f16:
840; SI:       ; %bb.0:
841; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
842; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
843; SI-NEXT:    s_mov_b32 s3, 0xf000
844; SI-NEXT:    s_waitcnt lgkmcnt(0)
845; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
846; SI-NEXT:    s_mov_b32 s2, -1
847; SI-NEXT:    v_add_f32_e32 v0, -4.0, v0
848; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
849; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
850; SI-NEXT:    s_endpgm
851  %y = fadd half %x, -4.0
852  store half %y, half addrspace(1)* %out
853  ret void
854}
855
856define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
857; GFX10-LABEL: commute_add_inline_imm_0.5_f16:
858; GFX10:       ; %bb.0:
859; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
860; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
861; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
862; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
863; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
864; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
865; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
866; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
867; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
868; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
869; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
870; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
871; GFX10-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
872; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
873; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
874;
875; VI-LABEL: commute_add_inline_imm_0.5_f16:
876; VI:       ; %bb.0:
877; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
878; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
879; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
880; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
881; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
882; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
883; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
884; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
885; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
886; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
887; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
888; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
889; VI-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e]
890; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
891; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
892;
893; SI-LABEL: commute_add_inline_imm_0.5_f16:
894; SI:       ; %bb.0:
895; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
896; SI-NEXT:    s_mov_b32 s7, 0xf000
897; SI-NEXT:    s_mov_b32 s6, -1
898; SI-NEXT:    s_mov_b32 s10, s6
899; SI-NEXT:    s_mov_b32 s11, s7
900; SI-NEXT:    s_waitcnt lgkmcnt(0)
901; SI-NEXT:    s_mov_b32 s8, s2
902; SI-NEXT:    s_mov_b32 s9, s3
903; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
904; SI-NEXT:    s_mov_b32 s4, s0
905; SI-NEXT:    s_mov_b32 s5, s1
906; SI-NEXT:    s_waitcnt vmcnt(0)
907; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
908; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
909; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
910; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
911; SI-NEXT:    s_endpgm
912  %x = load half, half addrspace(1)* %in
913  %y = fadd half %x, 0.5
914  store half %y, half addrspace(1)* %out
915  ret void
916}
917
918define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
919; GFX10-LABEL: commute_add_literal_f16:
920; GFX10:       ; %bb.0:
921; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
922; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
923; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
924; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
925; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
926; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
927; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
928; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
929; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
930; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
931; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
932; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
933; GFX10-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
934; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
935; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
936;
937; VI-LABEL: commute_add_literal_f16:
938; VI:       ; %bb.0:
939; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
940; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
941; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
942; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
943; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
944; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
945; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
946; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
947; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
948; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
949; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
950; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
951; VI-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00]
952; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
953; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
954;
955; SI-LABEL: commute_add_literal_f16:
956; SI:       ; %bb.0:
957; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
958; SI-NEXT:    s_mov_b32 s7, 0xf000
959; SI-NEXT:    s_mov_b32 s6, -1
960; SI-NEXT:    s_mov_b32 s10, s6
961; SI-NEXT:    s_mov_b32 s11, s7
962; SI-NEXT:    s_waitcnt lgkmcnt(0)
963; SI-NEXT:    s_mov_b32 s8, s2
964; SI-NEXT:    s_mov_b32 s9, s3
965; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
966; SI-NEXT:    s_mov_b32 s4, s0
967; SI-NEXT:    s_mov_b32 s5, s1
968; SI-NEXT:    s_waitcnt vmcnt(0)
969; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
970; SI-NEXT:    v_add_f32_e32 v0, 0x44800000, v0
971; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
972; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
973; SI-NEXT:    s_endpgm
974  %x = load half, half addrspace(1)* %in
975  %y = fadd half %x, 1024.0
976  store half %y, half addrspace(1)* %out
977  ret void
978}
979
980define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) {
981; GFX10-LABEL: add_inline_imm_1_f16:
982; GFX10:       ; %bb.0:
983; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
984; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
985; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
986; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
987; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
988; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
989; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
990; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
991; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
992;
993; VI-LABEL: add_inline_imm_1_f16:
994; VI:       ; %bb.0:
995; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
996; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
997; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
998; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
999; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1000; VI-NEXT:    v_add_f16_e64 v0, s6, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x02,0x01,0x00]
1001; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1002; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1003;
1004; SI-LABEL: add_inline_imm_1_f16:
1005; SI:       ; %bb.0:
1006; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1007; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1008; SI-NEXT:    s_mov_b32 s3, 0xf000
1009; SI-NEXT:    s_waitcnt lgkmcnt(0)
1010; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1011; SI-NEXT:    s_mov_b32 s2, -1
1012; SI-NEXT:    v_add_f32_e32 v0, 0x33800000, v0
1013; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1014; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1015; SI-NEXT:    s_endpgm
1016  %y = fadd half %x, 0xH0001
1017  store half %y, half addrspace(1)* %out
1018  ret void
1019}
1020
1021define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) {
1022; GFX10-LABEL: add_inline_imm_2_f16:
1023; GFX10:       ; %bb.0:
1024; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1025; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1026; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1027; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1028; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1029; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1030; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1031; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1032; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1033;
1034; VI-LABEL: add_inline_imm_2_f16:
1035; VI:       ; %bb.0:
1036; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1037; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1038; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1039; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1040; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1041; VI-NEXT:    v_add_f16_e64 v0, s6, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x04,0x01,0x00]
1042; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1043; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1044;
1045; SI-LABEL: add_inline_imm_2_f16:
1046; SI:       ; %bb.0:
1047; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1048; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1049; SI-NEXT:    s_mov_b32 s3, 0xf000
1050; SI-NEXT:    s_waitcnt lgkmcnt(0)
1051; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1052; SI-NEXT:    s_mov_b32 s2, -1
1053; SI-NEXT:    v_add_f32_e32 v0, 0x34000000, v0
1054; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1055; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1056; SI-NEXT:    s_endpgm
1057  %y = fadd half %x, 0xH0002
1058  store half %y, half addrspace(1)* %out
1059  ret void
1060}
1061
1062define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) {
1063; GFX10-LABEL: add_inline_imm_16_f16:
1064; GFX10:       ; %bb.0:
1065; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1066; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1067; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1068; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1069; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1070; GFX10-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1071; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1072; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1073; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1074;
1075; VI-LABEL: add_inline_imm_16_f16:
1076; VI:       ; %bb.0:
1077; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1078; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1079; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1080; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1081; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1082; VI-NEXT:    v_add_f16_e64 v0, s6, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x20,0x01,0x00]
1083; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1084; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1085;
1086; SI-LABEL: add_inline_imm_16_f16:
1087; SI:       ; %bb.0:
1088; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1089; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1090; SI-NEXT:    s_mov_b32 s3, 0xf000
1091; SI-NEXT:    s_waitcnt lgkmcnt(0)
1092; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1093; SI-NEXT:    s_mov_b32 s2, -1
1094; SI-NEXT:    v_add_f32_e32 v0, 0x35800000, v0
1095; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1096; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1097; SI-NEXT:    s_endpgm
1098  %y = fadd half %x, 0xH0010
1099  store half %y, half addrspace(1)* %out
1100  ret void
1101}
1102
1103define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1104; GFX10-LABEL: add_inline_imm_neg_1_f16:
1105; GFX10:       ; %bb.0:
1106; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1107; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1108; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1109; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1110; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1111; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1112; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1113; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1114; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1115; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1116; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1117; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1118; GFX10-NEXT:    v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00]
1119; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1120; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1121;
1122; VI-LABEL: add_inline_imm_neg_1_f16:
1123; VI:       ; %bb.0:
1124; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1125; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1126; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1127; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1128; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1129; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1130; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1131; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1132; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1133; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1134; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1135; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1136; VI-NEXT:    v_add_u16_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4c]
1137; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1138; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1139;
1140; SI-LABEL: add_inline_imm_neg_1_f16:
1141; SI:       ; %bb.0:
1142; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1143; SI-NEXT:    s_mov_b32 s7, 0xf000
1144; SI-NEXT:    s_mov_b32 s6, -1
1145; SI-NEXT:    s_mov_b32 s10, s6
1146; SI-NEXT:    s_mov_b32 s11, s7
1147; SI-NEXT:    s_waitcnt lgkmcnt(0)
1148; SI-NEXT:    s_mov_b32 s8, s2
1149; SI-NEXT:    s_mov_b32 s9, s3
1150; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1151; SI-NEXT:    s_mov_b32 s4, s0
1152; SI-NEXT:    s_mov_b32 s5, s1
1153; SI-NEXT:    s_waitcnt vmcnt(0)
1154; SI-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
1155; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1156; SI-NEXT:    s_endpgm
1157  %x = load i16, i16 addrspace(1)* %in
1158  %y = add i16 %x, -1
1159  %ybc = bitcast i16 %y to half
1160  store half %ybc, half addrspace(1)* %out
1161  ret void
1162}
1163
1164define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1165; GFX10-LABEL: add_inline_imm_neg_2_f16:
1166; GFX10:       ; %bb.0:
1167; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1168; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1169; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1170; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1171; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1172; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1173; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1174; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1175; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1176; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1177; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1178; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1179; GFX10-NEXT:    v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00]
1180; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1181; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1182;
1183; VI-LABEL: add_inline_imm_neg_2_f16:
1184; VI:       ; %bb.0:
1185; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1186; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1187; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1188; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1189; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1190; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1191; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1192; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1193; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1194; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1195; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1196; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1197; VI-NEXT:    v_add_u16_e32 v0, -2, v0 ; encoding: [0xc2,0x00,0x00,0x4c]
1198; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1199; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1200;
1201; SI-LABEL: add_inline_imm_neg_2_f16:
1202; SI:       ; %bb.0:
1203; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1204; SI-NEXT:    s_mov_b32 s7, 0xf000
1205; SI-NEXT:    s_mov_b32 s6, -1
1206; SI-NEXT:    s_mov_b32 s10, s6
1207; SI-NEXT:    s_mov_b32 s11, s7
1208; SI-NEXT:    s_waitcnt lgkmcnt(0)
1209; SI-NEXT:    s_mov_b32 s8, s2
1210; SI-NEXT:    s_mov_b32 s9, s3
1211; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1212; SI-NEXT:    s_mov_b32 s4, s0
1213; SI-NEXT:    s_mov_b32 s5, s1
1214; SI-NEXT:    s_waitcnt vmcnt(0)
1215; SI-NEXT:    v_add_i32_e32 v0, vcc, -2, v0
1216; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1217; SI-NEXT:    s_endpgm
1218  %x = load i16, i16 addrspace(1)* %in
1219  %y = add i16 %x, -2
1220  %ybc = bitcast i16 %y to half
1221  store half %ybc, half addrspace(1)* %out
1222  ret void
1223}
1224
1225define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1226; GFX10-LABEL: add_inline_imm_neg_16_f16:
1227; GFX10:       ; %bb.0:
1228; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1229; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1230; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1231; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1232; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1233; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1234; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1235; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1236; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1237; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1238; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1239; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1240; GFX10-NEXT:    v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00]
1241; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1242; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1243;
1244; VI-LABEL: add_inline_imm_neg_16_f16:
1245; VI:       ; %bb.0:
1246; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1247; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1248; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1249; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1250; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1251; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1252; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1253; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1254; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1255; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1256; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1257; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1258; VI-NEXT:    v_add_u16_e32 v0, -16, v0 ; encoding: [0xd0,0x00,0x00,0x4c]
1259; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1260; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1261;
1262; SI-LABEL: add_inline_imm_neg_16_f16:
1263; SI:       ; %bb.0:
1264; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1265; SI-NEXT:    s_mov_b32 s7, 0xf000
1266; SI-NEXT:    s_mov_b32 s6, -1
1267; SI-NEXT:    s_mov_b32 s10, s6
1268; SI-NEXT:    s_mov_b32 s11, s7
1269; SI-NEXT:    s_waitcnt lgkmcnt(0)
1270; SI-NEXT:    s_mov_b32 s8, s2
1271; SI-NEXT:    s_mov_b32 s9, s3
1272; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1273; SI-NEXT:    s_mov_b32 s4, s0
1274; SI-NEXT:    s_mov_b32 s5, s1
1275; SI-NEXT:    s_waitcnt vmcnt(0)
1276; SI-NEXT:    v_add_i32_e32 v0, vcc, -16, v0
1277; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1278; SI-NEXT:    s_endpgm
1279  %x = load i16, i16 addrspace(1)* %in
1280  %y = add i16 %x, -16
1281  %ybc = bitcast i16 %y to half
1282  store half %ybc, half addrspace(1)* %out
1283  ret void
1284}
1285
1286define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) {
1287; GFX10-LABEL: add_inline_imm_63_f16:
1288; GFX10:       ; %bb.0:
1289; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1290; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1291; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1292; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1293; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1294; GFX10-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1295; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1296; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1297; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1298;
1299; VI-LABEL: add_inline_imm_63_f16:
1300; VI:       ; %bb.0:
1301; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1302; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1303; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1304; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1305; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1306; VI-NEXT:    v_add_f16_e64 v0, s6, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x7e,0x01,0x00]
1307; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1308; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1309;
1310; SI-LABEL: add_inline_imm_63_f16:
1311; SI:       ; %bb.0:
1312; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1313; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1314; SI-NEXT:    s_mov_b32 s3, 0xf000
1315; SI-NEXT:    s_waitcnt lgkmcnt(0)
1316; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1317; SI-NEXT:    s_mov_b32 s2, -1
1318; SI-NEXT:    v_add_f32_e32 v0, 0x367c0000, v0
1319; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1320; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1321; SI-NEXT:    s_endpgm
1322  %y = fadd half %x, 0xH003F
1323  store half %y, half addrspace(1)* %out
1324  ret void
1325}
1326
1327define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) {
1328; GFX10-LABEL: add_inline_imm_64_f16:
1329; GFX10:       ; %bb.0:
1330; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1331; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1332; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1333; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1334; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1335; GFX10-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1336; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1337; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1338; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1339;
1340; VI-LABEL: add_inline_imm_64_f16:
1341; VI:       ; %bb.0:
1342; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1343; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1344; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1345; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1346; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1347; VI-NEXT:    v_add_f16_e64 v0, s6, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x80,0x01,0x00]
1348; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1349; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1350;
1351; SI-LABEL: add_inline_imm_64_f16:
1352; SI:       ; %bb.0:
1353; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1354; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1355; SI-NEXT:    s_mov_b32 s3, 0xf000
1356; SI-NEXT:    s_waitcnt lgkmcnt(0)
1357; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1358; SI-NEXT:    s_mov_b32 s2, -1
1359; SI-NEXT:    v_add_f32_e32 v0, 0x36800000, v0
1360; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1361; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1362; SI-NEXT:    s_endpgm
1363  %y = fadd half %x, 0xH0040
1364  store half %y, half addrspace(1)* %out
1365  ret void
1366}
1367
1368; This needs to be emitted as a literal constant since the 16-bit
1369; float values do not work for 16-bit integer operations.
1370define void @mul_inline_imm_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
1371; GFX10-LABEL: mul_inline_imm_0.5_i16:
1372; GFX10:       ; %bb.0:
1373; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1374; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1375; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1376; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1377; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1378; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1379;
1380; VI-LABEL: mul_inline_imm_0.5_i16:
1381; VI:       ; %bb.0:
1382; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1383; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00]
1384; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1385; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1386; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1387;
1388; SI-LABEL: mul_inline_imm_0.5_i16:
1389; SI:       ; %bb.0:
1390; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1391; SI-NEXT:    s_mov_b32 s6, 0
1392; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1393; SI-NEXT:    s_mov_b32 s7, 0xf000
1394; SI-NEXT:    s_mov_b32 s4, s6
1395; SI-NEXT:    s_mov_b32 s5, s6
1396; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3800, v2
1397; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1398; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1399; SI-NEXT:    s_setpc_b64 s[30:31]
1400  %y = mul i16 %x, bitcast (half 0.5 to i16)
1401  store i16 %y, i16 addrspace(1)* %out
1402  ret void
1403}
1404
1405define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
1406; GFX10-LABEL: mul_inline_imm_neg_0.5_i16:
1407; GFX10:       ; %bb.0:
1408; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1409; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1410; GFX10-NEXT:    v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1411; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1412; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1413; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1414;
1415; VI-LABEL: mul_inline_imm_neg_0.5_i16:
1416; VI:       ; %bb.0:
1417; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1418; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
1419; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1420; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1421; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1422;
1423; SI-LABEL: mul_inline_imm_neg_0.5_i16:
1424; SI:       ; %bb.0:
1425; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1426; SI-NEXT:    s_mov_b32 s6, 0
1427; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1428; SI-NEXT:    s_mov_b32 s7, 0xf000
1429; SI-NEXT:    s_mov_b32 s4, s6
1430; SI-NEXT:    s_mov_b32 s5, s6
1431; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xb800, v2
1432; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1433; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1434; SI-NEXT:    s_setpc_b64 s[30:31]
1435  %y = mul i16 %x, bitcast (half -0.5 to i16)
1436  store i16 %y, i16 addrspace(1)* %out
1437  ret void
1438}
1439
1440define void @mul_inline_imm_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
1441; GFX10-LABEL: mul_inline_imm_1.0_i16:
1442; GFX10:       ; %bb.0:
1443; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1444; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1445; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1446; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1447; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1448; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1449;
1450; VI-LABEL: mul_inline_imm_1.0_i16:
1451; VI:       ; %bb.0:
1452; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1453; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00]
1454; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1455; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1456; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1457;
1458; SI-LABEL: mul_inline_imm_1.0_i16:
1459; SI:       ; %bb.0:
1460; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1461; SI-NEXT:    s_mov_b32 s6, 0
1462; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1463; SI-NEXT:    s_mov_b32 s7, 0xf000
1464; SI-NEXT:    s_mov_b32 s4, s6
1465; SI-NEXT:    s_mov_b32 s5, s6
1466; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3c00, v2
1467; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1468; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1469; SI-NEXT:    s_setpc_b64 s[30:31]
1470  %y = mul i16 %x, bitcast (half 1.0 to i16)
1471  store i16 %y, i16 addrspace(1)* %out
1472  ret void
1473}
1474
1475define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
1476; GFX10-LABEL: mul_inline_imm_neg_1.0_i16:
1477; GFX10:       ; %bb.0:
1478; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1479; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1480; GFX10-NEXT:    v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1481; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1482; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1483; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1484;
1485; VI-LABEL: mul_inline_imm_neg_1.0_i16:
1486; VI:       ; %bb.0:
1487; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1488; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
1489; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1490; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1491; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1492;
1493; SI-LABEL: mul_inline_imm_neg_1.0_i16:
1494; SI:       ; %bb.0:
1495; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1496; SI-NEXT:    s_mov_b32 s6, 0
1497; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1498; SI-NEXT:    s_mov_b32 s7, 0xf000
1499; SI-NEXT:    s_mov_b32 s4, s6
1500; SI-NEXT:    s_mov_b32 s5, s6
1501; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xbc00, v2
1502; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1503; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1504; SI-NEXT:    s_setpc_b64 s[30:31]
1505  %y = mul i16 %x, bitcast (half -1.0 to i16)
1506  store i16 %y, i16 addrspace(1)* %out
1507  ret void
1508}
1509
1510define void @shl_inline_imm_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
1511; GFX10-LABEL: shl_inline_imm_2.0_i16:
1512; GFX10:       ; %bb.0:
1513; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1514; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1515; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
1516; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1517; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1518; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1519;
1520; VI-LABEL: shl_inline_imm_2.0_i16:
1521; VI:       ; %bb.0:
1522; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1523; VI-NEXT:    s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0]
1524; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
1525; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1526; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1527; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1528;
1529; SI-LABEL: shl_inline_imm_2.0_i16:
1530; SI:       ; %bb.0:
1531; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532; SI-NEXT:    s_mov_b32 s6, 0
1533; SI-NEXT:    s_mov_b32 s7, 0xf000
1534; SI-NEXT:    s_mov_b32 s4, s6
1535; SI-NEXT:    s_mov_b32 s5, s6
1536; SI-NEXT:    v_lshl_b32_e32 v2, 0x4000, v2
1537; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1538; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1539; SI-NEXT:    s_setpc_b64 s[30:31]
1540  %y = shl i16 bitcast (half 2.0 to i16), %x
1541  store i16 %y, i16 addrspace(1)* %out
1542  ret void
1543}
1544
1545define void @shl_inline_imm_neg_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
1546; GFX10-LABEL: shl_inline_imm_neg_2.0_i16:
1547; GFX10:       ; %bb.0:
1548; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1549; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1550; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
1551; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1552; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1553; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1554;
1555; VI-LABEL: shl_inline_imm_neg_2.0_i16:
1556; VI:       ; %bb.0:
1557; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1558; VI-NEXT:    s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0]
1559; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
1560; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1561; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1562; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1563;
1564; SI-LABEL: shl_inline_imm_neg_2.0_i16:
1565; SI:       ; %bb.0:
1566; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1567; SI-NEXT:    s_mov_b32 s6, 0
1568; SI-NEXT:    s_mov_b32 s7, 0xf000
1569; SI-NEXT:    s_mov_b32 s4, s6
1570; SI-NEXT:    s_mov_b32 s5, s6
1571; SI-NEXT:    v_lshl_b32_e32 v2, 0xffffc000, v2
1572; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1573; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1574; SI-NEXT:    s_setpc_b64 s[30:31]
1575  %y = shl i16 bitcast (half -2.0 to i16), %x
1576  store i16 %y, i16 addrspace(1)* %out
1577  ret void
1578}
1579
1580define void @mul_inline_imm_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
1581; GFX10-LABEL: mul_inline_imm_4.0_i16:
1582; GFX10:       ; %bb.0:
1583; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1584; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1585; GFX10-NEXT:    v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
1586; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1587; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1588; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1589;
1590; VI-LABEL: mul_inline_imm_4.0_i16:
1591; VI:       ; %bb.0:
1592; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1593; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00]
1594; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1595; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1596; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1597;
1598; SI-LABEL: mul_inline_imm_4.0_i16:
1599; SI:       ; %bb.0:
1600; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601; SI-NEXT:    s_mov_b32 s6, 0
1602; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1603; SI-NEXT:    s_mov_b32 s7, 0xf000
1604; SI-NEXT:    s_mov_b32 s4, s6
1605; SI-NEXT:    s_mov_b32 s5, s6
1606; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x4400, v2
1607; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1608; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1609; SI-NEXT:    s_setpc_b64 s[30:31]
1610  %y = mul i16 %x, bitcast (half 4.0 to i16)
1611  store i16 %y, i16 addrspace(1)* %out
1612  ret void
1613}
1614
1615define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
1616; GFX10-LABEL: mul_inline_imm_neg_4.0_i16:
1617; GFX10:       ; %bb.0:
1618; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1619; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1620; GFX10-NEXT:    v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
1621; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1622; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1623; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1624;
1625; VI-LABEL: mul_inline_imm_neg_4.0_i16:
1626; VI:       ; %bb.0:
1627; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1628; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
1629; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1630; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1631; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1632;
1633; SI-LABEL: mul_inline_imm_neg_4.0_i16:
1634; SI:       ; %bb.0:
1635; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1636; SI-NEXT:    s_mov_b32 s6, 0
1637; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1638; SI-NEXT:    s_mov_b32 s7, 0xf000
1639; SI-NEXT:    s_mov_b32 s4, s6
1640; SI-NEXT:    s_mov_b32 s5, s6
1641; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xc400, v2
1642; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1643; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1644; SI-NEXT:    s_setpc_b64 s[30:31]
1645  %y = mul i16 %x, bitcast (half -4.0 to i16)
1646  store i16 %y, i16 addrspace(1)* %out
1647  ret void
1648}
1649
1650define void @mul_inline_imm_inv2pi_i16(i16 addrspace(1)* %out, i16 %x) {
1651; GFX10-LABEL: mul_inline_imm_inv2pi_i16:
1652; GFX10:       ; %bb.0:
1653; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1654; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1655; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
1656; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1657; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1658; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1659;
1660; VI-LABEL: mul_inline_imm_inv2pi_i16:
1661; VI:       ; %bb.0:
1662; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1663; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00]
1664; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1665; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1666; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1667;
1668; SI-LABEL: mul_inline_imm_inv2pi_i16:
1669; SI:       ; %bb.0:
1670; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1671; SI-NEXT:    s_mov_b32 s6, 0
1672; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1673; SI-NEXT:    s_mov_b32 s7, 0xf000
1674; SI-NEXT:    s_mov_b32 s4, s6
1675; SI-NEXT:    s_mov_b32 s5, s6
1676; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3118, v2
1677; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1678; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1679; SI-NEXT:    s_setpc_b64 s[30:31]
1680  %y = mul i16 %x, bitcast (half 0xH3118 to i16)
1681  store i16 %y, i16 addrspace(1)* %out
1682  ret void
1683}
1684