1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX11 %s
4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
5; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
6
7; FIXME: Merge into imm.ll
8
9define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) {
10; GFX10-LABEL: store_inline_imm_neg_0.0_i16:
11; GFX10:       ; %bb.0:
12; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
13; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
14; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
15; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
16; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
17; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
18; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
19; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
20;
21; GFX11-LABEL: store_inline_imm_neg_0.0_i16:
22; GFX11:       ; %bb.0:
23; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
24; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
25; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
26; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
27; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
28; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
29; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
30; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
31; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
32;
33; VI-LABEL: store_inline_imm_neg_0.0_i16:
34; VI:       ; %bb.0:
35; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
36; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
37; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
38; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
39; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
40; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
41; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
42; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
43;
44; SI-LABEL: store_inline_imm_neg_0.0_i16:
45; SI:       ; %bb.0:
46; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
47; SI-NEXT:    s_mov_b32 s3, 0xf000
48; SI-NEXT:    s_mov_b32 s2, -1
49; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
50; SI-NEXT:    s_waitcnt lgkmcnt(0)
51; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
52; SI-NEXT:    s_waitcnt vmcnt(0)
53; SI-NEXT:    s_endpgm
54  store volatile i16 -32768, i16 addrspace(1)* %out
55  ret void
56}
57
58define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
59; GFX10-LABEL: store_inline_imm_0.0_f16:
60; GFX10:       ; %bb.0:
61; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
62; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
63; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
64; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
65; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
66; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
67; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
68;
69; GFX11-LABEL: store_inline_imm_0.0_f16:
70; GFX11:       ; %bb.0:
71; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
72; GFX11-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
73; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
74; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
75; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
76; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
77; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
78; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
79;
80; VI-LABEL: store_inline_imm_0.0_f16:
81; VI:       ; %bb.0:
82; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
83; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
84; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
85; VI-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
86; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
87; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
88; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
89;
90; SI-LABEL: store_inline_imm_0.0_f16:
91; SI:       ; %bb.0:
92; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
93; SI-NEXT:    s_mov_b32 s3, 0xf000
94; SI-NEXT:    s_mov_b32 s2, -1
95; SI-NEXT:    v_mov_b32_e32 v0, 0
96; SI-NEXT:    s_waitcnt lgkmcnt(0)
97; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
98; SI-NEXT:    s_endpgm
99  store half 0.0, half addrspace(1)* %out
100  ret void
101}
102
103define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
104; GFX10-LABEL: store_imm_neg_0.0_f16:
105; GFX10:       ; %bb.0:
106; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
107; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
108; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
109; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
110; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
111; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
112; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
113;
114; GFX11-LABEL: store_imm_neg_0.0_f16:
115; GFX11:       ; %bb.0:
116; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
117; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
118; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
119; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
120; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
121; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
122; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
123; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
124;
125; VI-LABEL: store_imm_neg_0.0_f16:
126; VI:       ; %bb.0:
127; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
128; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
129; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
130; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
131; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
132; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
133; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
134;
135; SI-LABEL: store_imm_neg_0.0_f16:
136; SI:       ; %bb.0:
137; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
138; SI-NEXT:    s_mov_b32 s3, 0xf000
139; SI-NEXT:    s_mov_b32 s2, -1
140; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
141; SI-NEXT:    s_waitcnt lgkmcnt(0)
142; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
143; SI-NEXT:    s_endpgm
144  store half -0.0, half addrspace(1)* %out
145  ret void
146}
147
148define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
149; GFX10-LABEL: store_inline_imm_0.5_f16:
150; GFX10:       ; %bb.0:
151; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
152; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
153; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
154; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
155; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
156; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
157; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
158;
159; GFX11-LABEL: store_inline_imm_0.5_f16:
160; GFX11:       ; %bb.0:
161; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
162; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
163; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
164; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
165; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
166; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
167; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
168; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
169;
170; VI-LABEL: store_inline_imm_0.5_f16:
171; VI:       ; %bb.0:
172; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
173; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
174; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
175; VI-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
176; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
177; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
178; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
179;
180; SI-LABEL: store_inline_imm_0.5_f16:
181; SI:       ; %bb.0:
182; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
183; SI-NEXT:    s_mov_b32 s3, 0xf000
184; SI-NEXT:    s_mov_b32 s2, -1
185; SI-NEXT:    v_mov_b32_e32 v0, 0x3800
186; SI-NEXT:    s_waitcnt lgkmcnt(0)
187; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
188; SI-NEXT:    s_endpgm
189  store half 0.5, half addrspace(1)* %out
190  ret void
191}
192
193define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
194; GFX10-LABEL: store_inline_imm_m_0.5_f16:
195; GFX10:       ; %bb.0:
196; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
197; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
198; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
199; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
200; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
201; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
202; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
203;
204; GFX11-LABEL: store_inline_imm_m_0.5_f16:
205; GFX11:       ; %bb.0:
206; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
207; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
208; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
209; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
210; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
211; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
212; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
213; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
214;
215; VI-LABEL: store_inline_imm_m_0.5_f16:
216; VI:       ; %bb.0:
217; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
218; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
219; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
220; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
221; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
222; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
223; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
224;
225; SI-LABEL: store_inline_imm_m_0.5_f16:
226; SI:       ; %bb.0:
227; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
228; SI-NEXT:    s_mov_b32 s3, 0xf000
229; SI-NEXT:    s_mov_b32 s2, -1
230; SI-NEXT:    v_mov_b32_e32 v0, 0xb800
231; SI-NEXT:    s_waitcnt lgkmcnt(0)
232; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
233; SI-NEXT:    s_endpgm
234  store half -0.5, half addrspace(1)* %out
235  ret void
236}
237
238define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
239; GFX10-LABEL: store_inline_imm_1.0_f16:
240; GFX10:       ; %bb.0:
241; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
242; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
243; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
244; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
245; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
246; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
247; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
248;
249; GFX11-LABEL: store_inline_imm_1.0_f16:
250; GFX11:       ; %bb.0:
251; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
252; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
253; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
254; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
255; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
256; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
257; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
258; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
259;
260; VI-LABEL: store_inline_imm_1.0_f16:
261; VI:       ; %bb.0:
262; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
263; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
264; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
265; VI-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
266; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
267; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
268; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
269;
270; SI-LABEL: store_inline_imm_1.0_f16:
271; SI:       ; %bb.0:
272; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
273; SI-NEXT:    s_mov_b32 s3, 0xf000
274; SI-NEXT:    s_mov_b32 s2, -1
275; SI-NEXT:    v_mov_b32_e32 v0, 0x3c00
276; SI-NEXT:    s_waitcnt lgkmcnt(0)
277; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
278; SI-NEXT:    s_endpgm
279  store half 1.0, half addrspace(1)* %out
280  ret void
281}
282
283define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
284; GFX10-LABEL: store_inline_imm_m_1.0_f16:
285; GFX10:       ; %bb.0:
286; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
287; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
288; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
289; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
290; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
291; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
292; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
293;
294; GFX11-LABEL: store_inline_imm_m_1.0_f16:
295; GFX11:       ; %bb.0:
296; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
297; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
298; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
299; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
300; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
301; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
302; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
303; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
304;
305; VI-LABEL: store_inline_imm_m_1.0_f16:
306; VI:       ; %bb.0:
307; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
308; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
309; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
310; VI-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
311; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
312; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
313; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
314;
315; SI-LABEL: store_inline_imm_m_1.0_f16:
316; SI:       ; %bb.0:
317; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
318; SI-NEXT:    s_mov_b32 s3, 0xf000
319; SI-NEXT:    s_mov_b32 s2, -1
320; SI-NEXT:    v_mov_b32_e32 v0, 0xbc00
321; SI-NEXT:    s_waitcnt lgkmcnt(0)
322; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
323; SI-NEXT:    s_endpgm
324  store half -1.0, half addrspace(1)* %out
325  ret void
326}
327
328define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
329; GFX10-LABEL: store_inline_imm_2.0_f16:
330; GFX10:       ; %bb.0:
331; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
332; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
333; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
334; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
335; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
336; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
337; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
338;
339; GFX11-LABEL: store_inline_imm_2.0_f16:
340; GFX11:       ; %bb.0:
341; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
342; GFX11-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
343; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
344; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
345; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
346; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
347; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
348; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
349;
350; VI-LABEL: store_inline_imm_2.0_f16:
351; VI:       ; %bb.0:
352; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
353; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
354; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
355; VI-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
356; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
357; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
358; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
359;
360; SI-LABEL: store_inline_imm_2.0_f16:
361; SI:       ; %bb.0:
362; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
363; SI-NEXT:    s_mov_b32 s3, 0xf000
364; SI-NEXT:    s_mov_b32 s2, -1
365; SI-NEXT:    v_mov_b32_e32 v0, 0x4000
366; SI-NEXT:    s_waitcnt lgkmcnt(0)
367; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
368; SI-NEXT:    s_endpgm
369  store half 2.0, half addrspace(1)* %out
370  ret void
371}
372
373define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
374; GFX10-LABEL: store_inline_imm_m_2.0_f16:
375; GFX10:       ; %bb.0:
376; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
377; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
378; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
379; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
380; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
381; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
382; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
383;
384; GFX11-LABEL: store_inline_imm_m_2.0_f16:
385; GFX11:       ; %bb.0:
386; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
387; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
388; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
389; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
390; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
391; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
392; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
393; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
394;
395; VI-LABEL: store_inline_imm_m_2.0_f16:
396; VI:       ; %bb.0:
397; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
398; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
399; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
400; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
401; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
402; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
403; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
404;
405; SI-LABEL: store_inline_imm_m_2.0_f16:
406; SI:       ; %bb.0:
407; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
408; SI-NEXT:    s_mov_b32 s3, 0xf000
409; SI-NEXT:    s_mov_b32 s2, -1
410; SI-NEXT:    v_mov_b32_e32 v0, 0xc000
411; SI-NEXT:    s_waitcnt lgkmcnt(0)
412; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
413; SI-NEXT:    s_endpgm
414  store half -2.0, half addrspace(1)* %out
415  ret void
416}
417
418define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
419; GFX10-LABEL: store_inline_imm_4.0_f16:
420; GFX10:       ; %bb.0:
421; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
422; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
423; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
424; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
425; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
426; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
427; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
428;
429; GFX11-LABEL: store_inline_imm_4.0_f16:
430; GFX11:       ; %bb.0:
431; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
432; GFX11-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
433; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
434; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
435; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
436; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
437; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
438; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
439;
440; VI-LABEL: store_inline_imm_4.0_f16:
441; VI:       ; %bb.0:
442; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
443; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
444; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
445; VI-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
446; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
447; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
448; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
449;
450; SI-LABEL: store_inline_imm_4.0_f16:
451; SI:       ; %bb.0:
452; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
453; SI-NEXT:    s_mov_b32 s3, 0xf000
454; SI-NEXT:    s_mov_b32 s2, -1
455; SI-NEXT:    v_mov_b32_e32 v0, 0x4400
456; SI-NEXT:    s_waitcnt lgkmcnt(0)
457; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
458; SI-NEXT:    s_endpgm
459  store half 4.0, half addrspace(1)* %out
460  ret void
461}
462
463define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
464; GFX10-LABEL: store_inline_imm_m_4.0_f16:
465; GFX10:       ; %bb.0:
466; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
467; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
468; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
469; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
470; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
471; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
472; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
473;
474; GFX11-LABEL: store_inline_imm_m_4.0_f16:
475; GFX11:       ; %bb.0:
476; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
477; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
478; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
479; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
480; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
481; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
482; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
483; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
484;
485; VI-LABEL: store_inline_imm_m_4.0_f16:
486; VI:       ; %bb.0:
487; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
488; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
489; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
490; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
491; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
492; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
493; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
494;
495; SI-LABEL: store_inline_imm_m_4.0_f16:
496; SI:       ; %bb.0:
497; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
498; SI-NEXT:    s_mov_b32 s3, 0xf000
499; SI-NEXT:    s_mov_b32 s2, -1
500; SI-NEXT:    v_mov_b32_e32 v0, 0xc400
501; SI-NEXT:    s_waitcnt lgkmcnt(0)
502; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
503; SI-NEXT:    s_endpgm
504  store half -4.0, half addrspace(1)* %out
505  ret void
506}
507
508define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) {
509; GFX10-LABEL: store_inline_imm_inv_2pi_f16:
510; GFX10:       ; %bb.0:
511; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
512; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
513; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
514; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
515; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
516; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
517; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
518;
519; GFX11-LABEL: store_inline_imm_inv_2pi_f16:
520; GFX11:       ; %bb.0:
521; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
522; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
523; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
524; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
525; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
526; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
527; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
528; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
529;
530; VI-LABEL: store_inline_imm_inv_2pi_f16:
531; VI:       ; %bb.0:
532; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
533; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
534; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
535; VI-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
536; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
537; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
538; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
539;
540; SI-LABEL: store_inline_imm_inv_2pi_f16:
541; SI:       ; %bb.0:
542; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
543; SI-NEXT:    s_mov_b32 s3, 0xf000
544; SI-NEXT:    s_mov_b32 s2, -1
545; SI-NEXT:    v_mov_b32_e32 v0, 0x3118
546; SI-NEXT:    s_waitcnt lgkmcnt(0)
547; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
548; SI-NEXT:    s_endpgm
549  store half 0xH3118, half addrspace(1)* %out
550  ret void
551}
552
553define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) {
554; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16:
555; GFX10:       ; %bb.0:
556; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
557; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
558; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
559; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
560; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
561; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
562; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
563;
564; GFX11-LABEL: store_inline_imm_m_inv_2pi_f16:
565; GFX11:       ; %bb.0:
566; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
567; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
568; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
569; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
570; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
571; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
572; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
573; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
574;
575; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
576; VI:       ; %bb.0:
577; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
578; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
579; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
580; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
581; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
582; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
583; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
584;
585; SI-LABEL: store_inline_imm_m_inv_2pi_f16:
586; SI:       ; %bb.0:
587; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
588; SI-NEXT:    s_mov_b32 s3, 0xf000
589; SI-NEXT:    s_mov_b32 s2, -1
590; SI-NEXT:    v_mov_b32_e32 v0, 0xb118
591; SI-NEXT:    s_waitcnt lgkmcnt(0)
592; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
593; SI-NEXT:    s_endpgm
594  store half 0xHB118, half addrspace(1)* %out
595  ret void
596}
597
598define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) {
599; GFX10-LABEL: store_literal_imm_f16:
600; GFX10:       ; %bb.0:
601; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
602; GFX10-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
603; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
604; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
605; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
606; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
607; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
608;
609; GFX11-LABEL: store_literal_imm_f16:
610; GFX11:       ; %bb.0:
611; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
612; GFX11-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
613; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
614; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
615; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
616; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
617; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
618; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
619;
620; VI-LABEL: store_literal_imm_f16:
621; VI:       ; %bb.0:
622; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
623; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
624; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
625; VI-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
626; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
627; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
628; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
629;
630; SI-LABEL: store_literal_imm_f16:
631; SI:       ; %bb.0:
632; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
633; SI-NEXT:    s_mov_b32 s3, 0xf000
634; SI-NEXT:    s_mov_b32 s2, -1
635; SI-NEXT:    v_mov_b32_e32 v0, 0x6c00
636; SI-NEXT:    s_waitcnt lgkmcnt(0)
637; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
638; SI-NEXT:    s_endpgm
639  store half 4096.0, half addrspace(1)* %out
640  ret void
641}
642
643define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) {
644; GFX10-LABEL: add_inline_imm_0.0_f16:
645; GFX10:       ; %bb.0:
646; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
647; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
648; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
649; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
650; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
651; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
652; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
653; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
654; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
655;
656; GFX11-LABEL: add_inline_imm_0.0_f16:
657; GFX11:       ; %bb.0:
658; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
659; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
660; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
661; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
662; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
663; GFX11-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
664; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
665; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
666; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
667; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
668;
669; VI-LABEL: add_inline_imm_0.0_f16:
670; VI:       ; %bb.0:
671; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
672; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
673; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
674; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
675; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
676; VI-NEXT:    v_add_f16_e64 v0, s6, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x00,0x01,0x00]
677; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
678; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
679;
680; SI-LABEL: add_inline_imm_0.0_f16:
681; SI:       ; %bb.0:
682; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
683; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
684; SI-NEXT:    s_mov_b32 s3, 0xf000
685; SI-NEXT:    s_waitcnt lgkmcnt(0)
686; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
687; SI-NEXT:    s_mov_b32 s2, -1
688; SI-NEXT:    v_add_f32_e32 v0, 0, v0
689; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
690; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
691; SI-NEXT:    s_endpgm
692  %y = fadd half %x, 0.0
693  store half %y, half addrspace(1)* %out
694  ret void
695}
696
697define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) {
698; GFX10-LABEL: add_inline_imm_0.5_f16:
699; GFX10:       ; %bb.0:
700; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
701; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
702; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
703; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
704; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
705; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
706; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
707; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
708; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
709;
710; GFX11-LABEL: add_inline_imm_0.5_f16:
711; GFX11:       ; %bb.0:
712; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
713; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
714; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
715; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
716; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
717; GFX11-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
718; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
719; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
720; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
721; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
722;
723; VI-LABEL: add_inline_imm_0.5_f16:
724; VI:       ; %bb.0:
725; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
726; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
727; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
728; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
729; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
730; VI-NEXT:    v_add_f16_e64 v0, s6, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe0,0x01,0x00]
731; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
732; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
733;
734; SI-LABEL: add_inline_imm_0.5_f16:
735; SI:       ; %bb.0:
736; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
737; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
738; SI-NEXT:    s_mov_b32 s3, 0xf000
739; SI-NEXT:    s_waitcnt lgkmcnt(0)
740; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
741; SI-NEXT:    s_mov_b32 s2, -1
742; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
743; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
744; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
745; SI-NEXT:    s_endpgm
746  %y = fadd half %x, 0.5
747  store half %y, half addrspace(1)* %out
748  ret void
749}
750
751define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) {
752; GFX10-LABEL: add_inline_imm_neg_0.5_f16:
753; GFX10:       ; %bb.0:
754; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
755; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
756; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
757; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
758; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
759; GFX10-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
760; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
761; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
762; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
763;
764; GFX11-LABEL: add_inline_imm_neg_0.5_f16:
765; GFX11:       ; %bb.0:
766; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
767; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
768; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
769; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
770; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
771; GFX11-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
772; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
773; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
774; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
775; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
776;
777; VI-LABEL: add_inline_imm_neg_0.5_f16:
778; VI:       ; %bb.0:
779; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
780; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
781; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
782; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
783; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
784; VI-NEXT:    v_add_f16_e64 v0, s6, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe2,0x01,0x00]
785; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
786; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
787;
788; SI-LABEL: add_inline_imm_neg_0.5_f16:
789; SI:       ; %bb.0:
790; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
791; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
792; SI-NEXT:    s_mov_b32 s3, 0xf000
793; SI-NEXT:    s_waitcnt lgkmcnt(0)
794; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
795; SI-NEXT:    s_mov_b32 s2, -1
796; SI-NEXT:    v_add_f32_e32 v0, -0.5, v0
797; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
798; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
799; SI-NEXT:    s_endpgm
800  %y = fadd half %x, -0.5
801  store half %y, half addrspace(1)* %out
802  ret void
803}
804
805define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) {
806; GFX10-LABEL: add_inline_imm_1.0_f16:
807; GFX10:       ; %bb.0:
808; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
809; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
810; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
811; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
812; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
813; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
814; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
815; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
816; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
817;
818; GFX11-LABEL: add_inline_imm_1.0_f16:
819; GFX11:       ; %bb.0:
820; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
821; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
822; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
823; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
824; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
825; GFX11-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
826; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
827; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
828; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
829; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
830;
831; VI-LABEL: add_inline_imm_1.0_f16:
832; VI:       ; %bb.0:
833; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
834; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
835; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
836; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
837; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
838; VI-NEXT:    v_add_f16_e64 v0, s6, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe4,0x01,0x00]
839; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
840; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
841;
842; SI-LABEL: add_inline_imm_1.0_f16:
843; SI:       ; %bb.0:
844; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
845; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
846; SI-NEXT:    s_mov_b32 s3, 0xf000
847; SI-NEXT:    s_waitcnt lgkmcnt(0)
848; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
849; SI-NEXT:    s_mov_b32 s2, -1
850; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
851; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
852; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
853; SI-NEXT:    s_endpgm
854  %y = fadd half %x, 1.0
855  store half %y, half addrspace(1)* %out
856  ret void
857}
858
859define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) {
860; GFX10-LABEL: add_inline_imm_neg_1.0_f16:
861; GFX10:       ; %bb.0:
862; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
863; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
864; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
865; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
866; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
867; GFX10-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
868; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
869; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
870; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
871;
872; GFX11-LABEL: add_inline_imm_neg_1.0_f16:
873; GFX11:       ; %bb.0:
874; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
875; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
876; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
877; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
878; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
879; GFX11-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
880; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
881; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
882; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
883; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
884;
885; VI-LABEL: add_inline_imm_neg_1.0_f16:
886; VI:       ; %bb.0:
887; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
888; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
889; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
890; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
891; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
892; VI-NEXT:    v_add_f16_e64 v0, s6, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe6,0x01,0x00]
893; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
894; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
895;
896; SI-LABEL: add_inline_imm_neg_1.0_f16:
897; SI:       ; %bb.0:
898; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
899; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
900; SI-NEXT:    s_mov_b32 s3, 0xf000
901; SI-NEXT:    s_waitcnt lgkmcnt(0)
902; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
903; SI-NEXT:    s_mov_b32 s2, -1
904; SI-NEXT:    v_add_f32_e32 v0, -1.0, v0
905; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
906; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
907; SI-NEXT:    s_endpgm
908  %y = fadd half %x, -1.0
909  store half %y, half addrspace(1)* %out
910  ret void
911}
912
913define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) {
914; GFX10-LABEL: add_inline_imm_2.0_f16:
915; GFX10:       ; %bb.0:
916; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
917; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
918; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
919; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
920; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
921; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
922; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
923; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
924; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
925;
926; GFX11-LABEL: add_inline_imm_2.0_f16:
927; GFX11:       ; %bb.0:
928; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
929; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
930; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
931; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
932; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
933; GFX11-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
934; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
935; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
936; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
937; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
938;
939; VI-LABEL: add_inline_imm_2.0_f16:
940; VI:       ; %bb.0:
941; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
942; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
943; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
944; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
945; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
946; VI-NEXT:    v_add_f16_e64 v0, s6, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe8,0x01,0x00]
947; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
948; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
949;
950; SI-LABEL: add_inline_imm_2.0_f16:
951; SI:       ; %bb.0:
952; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
953; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
954; SI-NEXT:    s_mov_b32 s3, 0xf000
955; SI-NEXT:    s_waitcnt lgkmcnt(0)
956; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
957; SI-NEXT:    s_mov_b32 s2, -1
958; SI-NEXT:    v_add_f32_e32 v0, 2.0, v0
959; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
960; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
961; SI-NEXT:    s_endpgm
962  %y = fadd half %x, 2.0
963  store half %y, half addrspace(1)* %out
964  ret void
965}
966
967define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) {
968; GFX10-LABEL: add_inline_imm_neg_2.0_f16:
969; GFX10:       ; %bb.0:
970; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
971; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
972; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
973; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
974; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
975; GFX10-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
976; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
977; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
978; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
979;
980; GFX11-LABEL: add_inline_imm_neg_2.0_f16:
981; GFX11:       ; %bb.0:
982; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
983; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
984; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
985; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
986; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
987; GFX11-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
988; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
989; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
990; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
991; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
992;
993; VI-LABEL: add_inline_imm_neg_2.0_f16:
994; VI:       ; %bb.0:
995; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
996; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
997; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
998; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
999; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1000; VI-NEXT:    v_add_f16_e64 v0, s6, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xea,0x01,0x00]
1001; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1002; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1003;
1004; SI-LABEL: add_inline_imm_neg_2.0_f16:
1005; SI:       ; %bb.0:
1006; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1007; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1008; SI-NEXT:    s_mov_b32 s3, 0xf000
1009; SI-NEXT:    s_waitcnt lgkmcnt(0)
1010; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1011; SI-NEXT:    s_mov_b32 s2, -1
1012; SI-NEXT:    v_add_f32_e32 v0, -2.0, v0
1013; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1014; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1015; SI-NEXT:    s_endpgm
1016  %y = fadd half %x, -2.0
1017  store half %y, half addrspace(1)* %out
1018  ret void
1019}
1020
1021define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) {
1022; GFX10-LABEL: add_inline_imm_4.0_f16:
1023; GFX10:       ; %bb.0:
1024; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1025; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1026; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1027; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1028; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1029; GFX10-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
1030; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1031; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1032; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1033;
1034; GFX11-LABEL: add_inline_imm_4.0_f16:
1035; GFX11:       ; %bb.0:
1036; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1037; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1038; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1039; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1040; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1041; GFX11-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
1042; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1043; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1044; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1045; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1046;
1047; VI-LABEL: add_inline_imm_4.0_f16:
1048; VI:       ; %bb.0:
1049; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1050; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1051; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1052; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1053; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1054; VI-NEXT:    v_add_f16_e64 v0, s6, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xec,0x01,0x00]
1055; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1056; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1057;
1058; SI-LABEL: add_inline_imm_4.0_f16:
1059; SI:       ; %bb.0:
1060; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1061; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1062; SI-NEXT:    s_mov_b32 s3, 0xf000
1063; SI-NEXT:    s_waitcnt lgkmcnt(0)
1064; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1065; SI-NEXT:    s_mov_b32 s2, -1
1066; SI-NEXT:    v_add_f32_e32 v0, 4.0, v0
1067; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1068; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1069; SI-NEXT:    s_endpgm
1070  %y = fadd half %x, 4.0
1071  store half %y, half addrspace(1)* %out
1072  ret void
1073}
1074
1075define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) {
1076; GFX10-LABEL: add_inline_imm_neg_4.0_f16:
1077; GFX10:       ; %bb.0:
1078; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1079; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1080; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1081; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1082; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1083; GFX10-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
1084; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1085; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1086; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1087;
1088; GFX11-LABEL: add_inline_imm_neg_4.0_f16:
1089; GFX11:       ; %bb.0:
1090; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1091; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1092; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1093; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1094; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1095; GFX11-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
1096; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1097; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1098; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1099; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1100;
1101; VI-LABEL: add_inline_imm_neg_4.0_f16:
1102; VI:       ; %bb.0:
1103; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1104; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1105; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1106; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1107; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1108; VI-NEXT:    v_add_f16_e64 v0, s6, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xee,0x01,0x00]
1109; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1110; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1111;
1112; SI-LABEL: add_inline_imm_neg_4.0_f16:
1113; SI:       ; %bb.0:
1114; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1115; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1116; SI-NEXT:    s_mov_b32 s3, 0xf000
1117; SI-NEXT:    s_waitcnt lgkmcnt(0)
1118; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1119; SI-NEXT:    s_mov_b32 s2, -1
1120; SI-NEXT:    v_add_f32_e32 v0, -4.0, v0
1121; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1122; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1123; SI-NEXT:    s_endpgm
1124  %y = fadd half %x, -4.0
1125  store half %y, half addrspace(1)* %out
1126  ret void
1127}
1128
1129define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
1130; GFX10-LABEL: commute_add_inline_imm_0.5_f16:
1131; GFX10:       ; %bb.0:
1132; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1133; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1134; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1135; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1136; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1137; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1138; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1139; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1140; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1141; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1142; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1143; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1144; GFX10-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
1145; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1146; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1147;
1148; GFX11-LABEL: commute_add_inline_imm_0.5_f16:
1149; GFX11:       ; %bb.0:
1150; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1151; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1152; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1153; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1154; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1155; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1156; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1157; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1158; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1159; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1160; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1161; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1162; GFX11-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
1163; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1164; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1165; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1166;
1167; VI-LABEL: commute_add_inline_imm_0.5_f16:
1168; VI:       ; %bb.0:
1169; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1170; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1171; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1172; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1173; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1174; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1175; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1176; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1177; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1178; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1179; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1180; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1181; VI-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e]
1182; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1183; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1184;
1185; SI-LABEL: commute_add_inline_imm_0.5_f16:
1186; SI:       ; %bb.0:
1187; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1188; SI-NEXT:    s_mov_b32 s7, 0xf000
1189; SI-NEXT:    s_mov_b32 s6, -1
1190; SI-NEXT:    s_mov_b32 s10, s6
1191; SI-NEXT:    s_mov_b32 s11, s7
1192; SI-NEXT:    s_waitcnt lgkmcnt(0)
1193; SI-NEXT:    s_mov_b32 s8, s2
1194; SI-NEXT:    s_mov_b32 s9, s3
1195; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1196; SI-NEXT:    s_mov_b32 s4, s0
1197; SI-NEXT:    s_mov_b32 s5, s1
1198; SI-NEXT:    s_waitcnt vmcnt(0)
1199; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1200; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
1201; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1202; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1203; SI-NEXT:    s_endpgm
1204  %x = load half, half addrspace(1)* %in
1205  %y = fadd half %x, 0.5
1206  store half %y, half addrspace(1)* %out
1207  ret void
1208}
1209
1210define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
1211; GFX10-LABEL: commute_add_literal_f16:
1212; GFX10:       ; %bb.0:
1213; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1214; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1215; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1216; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1217; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1218; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1219; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1220; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1221; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1222; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1223; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1224; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1225; GFX10-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
1226; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1227; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1228;
1229; GFX11-LABEL: commute_add_literal_f16:
1230; GFX11:       ; %bb.0:
1231; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1232; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1233; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1234; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1235; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1236; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1237; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1238; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1239; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1240; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1241; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1242; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1243; GFX11-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
1244; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1245; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1246; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1247;
1248; VI-LABEL: commute_add_literal_f16:
1249; VI:       ; %bb.0:
1250; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1251; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1252; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1253; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1254; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1255; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1256; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1257; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1258; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1259; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1260; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1261; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1262; VI-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00]
1263; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1264; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1265;
1266; SI-LABEL: commute_add_literal_f16:
1267; SI:       ; %bb.0:
1268; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1269; SI-NEXT:    s_mov_b32 s7, 0xf000
1270; SI-NEXT:    s_mov_b32 s6, -1
1271; SI-NEXT:    s_mov_b32 s10, s6
1272; SI-NEXT:    s_mov_b32 s11, s7
1273; SI-NEXT:    s_waitcnt lgkmcnt(0)
1274; SI-NEXT:    s_mov_b32 s8, s2
1275; SI-NEXT:    s_mov_b32 s9, s3
1276; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1277; SI-NEXT:    s_mov_b32 s4, s0
1278; SI-NEXT:    s_mov_b32 s5, s1
1279; SI-NEXT:    s_waitcnt vmcnt(0)
1280; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1281; SI-NEXT:    v_add_f32_e32 v0, 0x44800000, v0
1282; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1283; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1284; SI-NEXT:    s_endpgm
1285  %x = load half, half addrspace(1)* %in
1286  %y = fadd half %x, 1024.0
1287  store half %y, half addrspace(1)* %out
1288  ret void
1289}
1290
1291define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) {
1292; GFX10-LABEL: add_inline_imm_1_f16:
1293; GFX10:       ; %bb.0:
1294; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1295; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1296; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1297; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1298; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1299; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
1300; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1301; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1302; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1303;
1304; GFX11-LABEL: add_inline_imm_1_f16:
1305; GFX11:       ; %bb.0:
1306; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1307; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1308; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1309; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1310; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1311; GFX11-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
1312; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1313; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1314; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1315; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1316;
1317; VI-LABEL: add_inline_imm_1_f16:
1318; VI:       ; %bb.0:
1319; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1320; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1321; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1322; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1323; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1324; VI-NEXT:    v_add_f16_e64 v0, s6, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x02,0x01,0x00]
1325; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1326; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1327;
1328; SI-LABEL: add_inline_imm_1_f16:
1329; SI:       ; %bb.0:
1330; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1331; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1332; SI-NEXT:    s_mov_b32 s3, 0xf000
1333; SI-NEXT:    s_waitcnt lgkmcnt(0)
1334; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1335; SI-NEXT:    s_mov_b32 s2, -1
1336; SI-NEXT:    v_add_f32_e32 v0, 0x33800000, v0
1337; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1338; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1339; SI-NEXT:    s_endpgm
1340  %y = fadd half %x, 0xH0001
1341  store half %y, half addrspace(1)* %out
1342  ret void
1343}
1344
1345define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) {
1346; GFX10-LABEL: add_inline_imm_2_f16:
1347; GFX10:       ; %bb.0:
1348; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1349; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1350; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1351; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1352; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1353; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1354; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1355; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1356; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1357;
1358; GFX11-LABEL: add_inline_imm_2_f16:
1359; GFX11:       ; %bb.0:
1360; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1361; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1362; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1363; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1364; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1365; GFX11-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1366; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1367; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1368; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1369; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1370;
1371; VI-LABEL: add_inline_imm_2_f16:
1372; VI:       ; %bb.0:
1373; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1374; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1375; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1376; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1377; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1378; VI-NEXT:    v_add_f16_e64 v0, s6, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x04,0x01,0x00]
1379; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1380; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1381;
1382; SI-LABEL: add_inline_imm_2_f16:
1383; SI:       ; %bb.0:
1384; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1385; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1386; SI-NEXT:    s_mov_b32 s3, 0xf000
1387; SI-NEXT:    s_waitcnt lgkmcnt(0)
1388; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1389; SI-NEXT:    s_mov_b32 s2, -1
1390; SI-NEXT:    v_add_f32_e32 v0, 0x34000000, v0
1391; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1392; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1393; SI-NEXT:    s_endpgm
1394  %y = fadd half %x, 0xH0002
1395  store half %y, half addrspace(1)* %out
1396  ret void
1397}
1398
1399define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) {
1400; GFX10-LABEL: add_inline_imm_16_f16:
1401; GFX10:       ; %bb.0:
1402; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1403; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1404; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1405; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1406; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1407; GFX10-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1408; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1409; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1410; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1411;
1412; GFX11-LABEL: add_inline_imm_16_f16:
1413; GFX11:       ; %bb.0:
1414; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1415; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1416; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1417; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1418; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1419; GFX11-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1420; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1421; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1422; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1423; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1424;
1425; VI-LABEL: add_inline_imm_16_f16:
1426; VI:       ; %bb.0:
1427; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1428; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1429; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1430; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1431; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1432; VI-NEXT:    v_add_f16_e64 v0, s6, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x20,0x01,0x00]
1433; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1434; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1435;
1436; SI-LABEL: add_inline_imm_16_f16:
1437; SI:       ; %bb.0:
1438; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1439; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1440; SI-NEXT:    s_mov_b32 s3, 0xf000
1441; SI-NEXT:    s_waitcnt lgkmcnt(0)
1442; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1443; SI-NEXT:    s_mov_b32 s2, -1
1444; SI-NEXT:    v_add_f32_e32 v0, 0x35800000, v0
1445; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1446; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1447; SI-NEXT:    s_endpgm
1448  %y = fadd half %x, 0xH0010
1449  store half %y, half addrspace(1)* %out
1450  ret void
1451}
1452
1453define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1454; GFX10-LABEL: add_inline_imm_neg_1_f16:
1455; GFX10:       ; %bb.0:
1456; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1457; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1458; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1459; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1460; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1461; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1462; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1463; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1464; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1465; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1466; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1467; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1468; GFX10-NEXT:    v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00]
1469; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1470; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1471;
1472; GFX11-LABEL: add_inline_imm_neg_1_f16:
1473; GFX11:       ; %bb.0:
1474; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1475; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1476; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1477; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1478; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1479; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1480; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1481; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1482; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1483; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1484; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1485; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1486; GFX11-NEXT:    v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00]
1487; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1488; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1489; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1490;
1491; VI-LABEL: add_inline_imm_neg_1_f16:
1492; VI:       ; %bb.0:
1493; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1494; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1495; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1496; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1497; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1498; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1499; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1500; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1501; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1502; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1503; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1504; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1505; VI-NEXT:    v_add_u16_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4c]
1506; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1507; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1508;
1509; SI-LABEL: add_inline_imm_neg_1_f16:
1510; SI:       ; %bb.0:
1511; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1512; SI-NEXT:    s_mov_b32 s7, 0xf000
1513; SI-NEXT:    s_mov_b32 s6, -1
1514; SI-NEXT:    s_mov_b32 s10, s6
1515; SI-NEXT:    s_mov_b32 s11, s7
1516; SI-NEXT:    s_waitcnt lgkmcnt(0)
1517; SI-NEXT:    s_mov_b32 s8, s2
1518; SI-NEXT:    s_mov_b32 s9, s3
1519; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1520; SI-NEXT:    s_mov_b32 s4, s0
1521; SI-NEXT:    s_mov_b32 s5, s1
1522; SI-NEXT:    s_waitcnt vmcnt(0)
1523; SI-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
1524; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1525; SI-NEXT:    s_endpgm
1526  %x = load i16, i16 addrspace(1)* %in
1527  %y = add i16 %x, -1
1528  %ybc = bitcast i16 %y to half
1529  store half %ybc, half addrspace(1)* %out
1530  ret void
1531}
1532
1533define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1534; GFX10-LABEL: add_inline_imm_neg_2_f16:
1535; GFX10:       ; %bb.0:
1536; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1537; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1538; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1539; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1540; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1541; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1542; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1543; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1544; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1545; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1546; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1547; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1548; GFX10-NEXT:    v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00]
1549; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1550; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1551;
1552; GFX11-LABEL: add_inline_imm_neg_2_f16:
1553; GFX11:       ; %bb.0:
1554; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1555; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1556; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1557; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1558; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1559; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1560; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1561; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1562; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1563; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1564; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1565; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1566; GFX11-NEXT:    v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00]
1567; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1568; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1569; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1570;
1571; VI-LABEL: add_inline_imm_neg_2_f16:
1572; VI:       ; %bb.0:
1573; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1574; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1575; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1576; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1577; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1578; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1579; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1580; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1581; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1582; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1583; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1584; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1585; VI-NEXT:    v_add_u16_e32 v0, -2, v0 ; encoding: [0xc2,0x00,0x00,0x4c]
1586; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1587; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1588;
1589; SI-LABEL: add_inline_imm_neg_2_f16:
1590; SI:       ; %bb.0:
1591; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1592; SI-NEXT:    s_mov_b32 s7, 0xf000
1593; SI-NEXT:    s_mov_b32 s6, -1
1594; SI-NEXT:    s_mov_b32 s10, s6
1595; SI-NEXT:    s_mov_b32 s11, s7
1596; SI-NEXT:    s_waitcnt lgkmcnt(0)
1597; SI-NEXT:    s_mov_b32 s8, s2
1598; SI-NEXT:    s_mov_b32 s9, s3
1599; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1600; SI-NEXT:    s_mov_b32 s4, s0
1601; SI-NEXT:    s_mov_b32 s5, s1
1602; SI-NEXT:    s_waitcnt vmcnt(0)
1603; SI-NEXT:    v_add_i32_e32 v0, vcc, -2, v0
1604; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1605; SI-NEXT:    s_endpgm
1606  %x = load i16, i16 addrspace(1)* %in
1607  %y = add i16 %x, -2
1608  %ybc = bitcast i16 %y to half
1609  store half %ybc, half addrspace(1)* %out
1610  ret void
1611}
1612
1613define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1614; GFX10-LABEL: add_inline_imm_neg_16_f16:
1615; GFX10:       ; %bb.0:
1616; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1617; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1618; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1619; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1620; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1621; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1622; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1623; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1624; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1625; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1626; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1627; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1628; GFX10-NEXT:    v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00]
1629; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1630; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1631;
1632; GFX11-LABEL: add_inline_imm_neg_16_f16:
1633; GFX11:       ; %bb.0:
1634; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1635; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1636; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1637; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1638; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1639; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1640; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1641; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1642; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1643; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1644; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1645; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1646; GFX11-NEXT:    v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00]
1647; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1648; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1649; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1650;
1651; VI-LABEL: add_inline_imm_neg_16_f16:
1652; VI:       ; %bb.0:
1653; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1654; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1655; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1656; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1657; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1658; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1659; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1660; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1661; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1662; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1663; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1664; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1665; VI-NEXT:    v_add_u16_e32 v0, -16, v0 ; encoding: [0xd0,0x00,0x00,0x4c]
1666; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1667; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1668;
1669; SI-LABEL: add_inline_imm_neg_16_f16:
1670; SI:       ; %bb.0:
1671; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1672; SI-NEXT:    s_mov_b32 s7, 0xf000
1673; SI-NEXT:    s_mov_b32 s6, -1
1674; SI-NEXT:    s_mov_b32 s10, s6
1675; SI-NEXT:    s_mov_b32 s11, s7
1676; SI-NEXT:    s_waitcnt lgkmcnt(0)
1677; SI-NEXT:    s_mov_b32 s8, s2
1678; SI-NEXT:    s_mov_b32 s9, s3
1679; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1680; SI-NEXT:    s_mov_b32 s4, s0
1681; SI-NEXT:    s_mov_b32 s5, s1
1682; SI-NEXT:    s_waitcnt vmcnt(0)
1683; SI-NEXT:    v_add_i32_e32 v0, vcc, -16, v0
1684; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1685; SI-NEXT:    s_endpgm
1686  %x = load i16, i16 addrspace(1)* %in
1687  %y = add i16 %x, -16
1688  %ybc = bitcast i16 %y to half
1689  store half %ybc, half addrspace(1)* %out
1690  ret void
1691}
1692
1693define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) {
1694; GFX10-LABEL: add_inline_imm_63_f16:
1695; GFX10:       ; %bb.0:
1696; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1697; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1698; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1699; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1700; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1701; GFX10-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1702; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1703; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1704; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1705;
1706; GFX11-LABEL: add_inline_imm_63_f16:
1707; GFX11:       ; %bb.0:
1708; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1709; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1710; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1711; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1712; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1713; GFX11-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1714; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1715; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1716; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1717; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1718;
1719; VI-LABEL: add_inline_imm_63_f16:
1720; VI:       ; %bb.0:
1721; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1722; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1723; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1724; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1725; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1726; VI-NEXT:    v_add_f16_e64 v0, s6, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x7e,0x01,0x00]
1727; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1728; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1729;
1730; SI-LABEL: add_inline_imm_63_f16:
1731; SI:       ; %bb.0:
1732; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1733; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1734; SI-NEXT:    s_mov_b32 s3, 0xf000
1735; SI-NEXT:    s_waitcnt lgkmcnt(0)
1736; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1737; SI-NEXT:    s_mov_b32 s2, -1
1738; SI-NEXT:    v_add_f32_e32 v0, 0x367c0000, v0
1739; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1740; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1741; SI-NEXT:    s_endpgm
1742  %y = fadd half %x, 0xH003F
1743  store half %y, half addrspace(1)* %out
1744  ret void
1745}
1746
1747define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) {
1748; GFX10-LABEL: add_inline_imm_64_f16:
1749; GFX10:       ; %bb.0:
1750; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1751; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1752; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1753; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1754; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1755; GFX10-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1756; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1757; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1758; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1759;
1760; GFX11-LABEL: add_inline_imm_64_f16:
1761; GFX11:       ; %bb.0:
1762; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1763; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1764; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1765; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1766; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1767; GFX11-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1768; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1769; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1770; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1771; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1772;
1773; VI-LABEL: add_inline_imm_64_f16:
1774; VI:       ; %bb.0:
1775; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1776; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1777; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1778; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1779; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1780; VI-NEXT:    v_add_f16_e64 v0, s6, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x80,0x01,0x00]
1781; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1782; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1783;
1784; SI-LABEL: add_inline_imm_64_f16:
1785; SI:       ; %bb.0:
1786; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1787; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1788; SI-NEXT:    s_mov_b32 s3, 0xf000
1789; SI-NEXT:    s_waitcnt lgkmcnt(0)
1790; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1791; SI-NEXT:    s_mov_b32 s2, -1
1792; SI-NEXT:    v_add_f32_e32 v0, 0x36800000, v0
1793; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1794; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1795; SI-NEXT:    s_endpgm
1796  %y = fadd half %x, 0xH0040
1797  store half %y, half addrspace(1)* %out
1798  ret void
1799}
1800
1801; This needs to be emitted as a literal constant since the 16-bit
1802; float values do not work for 16-bit integer operations.
1803define void @mul_inline_imm_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
1804; GFX10-LABEL: mul_inline_imm_0.5_i16:
1805; GFX10:       ; %bb.0:
1806; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1807; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1808; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1809; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1810; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1811; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1812;
1813; GFX11-LABEL: mul_inline_imm_0.5_i16:
1814; GFX11:       ; %bb.0:
1815; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1816; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1817; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1818; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1819; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1820; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1821;
1822; VI-LABEL: mul_inline_imm_0.5_i16:
1823; VI:       ; %bb.0:
1824; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1825; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00]
1826; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1827; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1828; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1829;
1830; SI-LABEL: mul_inline_imm_0.5_i16:
1831; SI:       ; %bb.0:
1832; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1833; SI-NEXT:    s_mov_b32 s6, 0
1834; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1835; SI-NEXT:    s_mov_b32 s7, 0xf000
1836; SI-NEXT:    s_mov_b32 s4, s6
1837; SI-NEXT:    s_mov_b32 s5, s6
1838; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3800, v2
1839; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1840; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1841; SI-NEXT:    s_setpc_b64 s[30:31]
1842  %y = mul i16 %x, bitcast (half 0.5 to i16)
1843  store i16 %y, i16 addrspace(1)* %out
1844  ret void
1845}
1846
1847define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
1848; GFX10-LABEL: mul_inline_imm_neg_0.5_i16:
1849; GFX10:       ; %bb.0:
1850; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1851; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1852; GFX10-NEXT:    v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1853; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1854; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1855; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1856;
1857; GFX11-LABEL: mul_inline_imm_neg_0.5_i16:
1858; GFX11:       ; %bb.0:
1859; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1860; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1861; GFX11-NEXT:    v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1862; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1863; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1864; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1865;
1866; VI-LABEL: mul_inline_imm_neg_0.5_i16:
1867; VI:       ; %bb.0:
1868; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1869; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
1870; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1871; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1872; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1873;
1874; SI-LABEL: mul_inline_imm_neg_0.5_i16:
1875; SI:       ; %bb.0:
1876; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1877; SI-NEXT:    s_mov_b32 s6, 0
1878; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1879; SI-NEXT:    s_mov_b32 s7, 0xf000
1880; SI-NEXT:    s_mov_b32 s4, s6
1881; SI-NEXT:    s_mov_b32 s5, s6
1882; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xb800, v2
1883; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1884; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1885; SI-NEXT:    s_setpc_b64 s[30:31]
1886  %y = mul i16 %x, bitcast (half -0.5 to i16)
1887  store i16 %y, i16 addrspace(1)* %out
1888  ret void
1889}
1890
1891define void @mul_inline_imm_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
1892; GFX10-LABEL: mul_inline_imm_1.0_i16:
1893; GFX10:       ; %bb.0:
1894; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1895; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1896; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1897; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1898; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1899; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1900;
1901; GFX11-LABEL: mul_inline_imm_1.0_i16:
1902; GFX11:       ; %bb.0:
1903; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1904; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1905; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1906; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1907; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1908; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1909;
1910; VI-LABEL: mul_inline_imm_1.0_i16:
1911; VI:       ; %bb.0:
1912; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1913; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00]
1914; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1915; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1916; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1917;
1918; SI-LABEL: mul_inline_imm_1.0_i16:
1919; SI:       ; %bb.0:
1920; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1921; SI-NEXT:    s_mov_b32 s6, 0
1922; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1923; SI-NEXT:    s_mov_b32 s7, 0xf000
1924; SI-NEXT:    s_mov_b32 s4, s6
1925; SI-NEXT:    s_mov_b32 s5, s6
1926; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3c00, v2
1927; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1928; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1929; SI-NEXT:    s_setpc_b64 s[30:31]
1930  %y = mul i16 %x, bitcast (half 1.0 to i16)
1931  store i16 %y, i16 addrspace(1)* %out
1932  ret void
1933}
1934
1935define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
1936; GFX10-LABEL: mul_inline_imm_neg_1.0_i16:
1937; GFX10:       ; %bb.0:
1938; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1939; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1940; GFX10-NEXT:    v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1941; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1942; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1943; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1944;
1945; GFX11-LABEL: mul_inline_imm_neg_1.0_i16:
1946; GFX11:       ; %bb.0:
1947; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1948; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1949; GFX11-NEXT:    v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1950; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1951; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1952; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1953;
1954; VI-LABEL: mul_inline_imm_neg_1.0_i16:
1955; VI:       ; %bb.0:
1956; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1957; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
1958; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1959; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1960; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1961;
1962; SI-LABEL: mul_inline_imm_neg_1.0_i16:
1963; SI:       ; %bb.0:
1964; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1965; SI-NEXT:    s_mov_b32 s6, 0
1966; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1967; SI-NEXT:    s_mov_b32 s7, 0xf000
1968; SI-NEXT:    s_mov_b32 s4, s6
1969; SI-NEXT:    s_mov_b32 s5, s6
1970; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xbc00, v2
1971; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1972; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1973; SI-NEXT:    s_setpc_b64 s[30:31]
1974  %y = mul i16 %x, bitcast (half -1.0 to i16)
1975  store i16 %y, i16 addrspace(1)* %out
1976  ret void
1977}
1978
1979define void @shl_inline_imm_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
1980; GFX10-LABEL: shl_inline_imm_2.0_i16:
1981; GFX10:       ; %bb.0:
1982; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1983; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1984; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
1985; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1986; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1987; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1988;
1989; GFX11-LABEL: shl_inline_imm_2.0_i16:
1990; GFX11:       ; %bb.0:
1991; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1992; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1993; GFX11-NEXT:    v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
1994; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1995; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
1996; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1997;
1998; VI-LABEL: shl_inline_imm_2.0_i16:
1999; VI:       ; %bb.0:
2000; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2001; VI-NEXT:    s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0]
2002; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
2003; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2004; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2005; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2006;
2007; SI-LABEL: shl_inline_imm_2.0_i16:
2008; SI:       ; %bb.0:
2009; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2010; SI-NEXT:    s_mov_b32 s6, 0
2011; SI-NEXT:    s_mov_b32 s7, 0xf000
2012; SI-NEXT:    s_mov_b32 s4, s6
2013; SI-NEXT:    s_mov_b32 s5, s6
2014; SI-NEXT:    v_lshl_b32_e32 v2, 0x4000, v2
2015; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2016; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2017; SI-NEXT:    s_setpc_b64 s[30:31]
2018  %y = shl i16 bitcast (half 2.0 to i16), %x
2019  store i16 %y, i16 addrspace(1)* %out
2020  ret void
2021}
2022
2023define void @shl_inline_imm_neg_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
2024; GFX10-LABEL: shl_inline_imm_neg_2.0_i16:
2025; GFX10:       ; %bb.0:
2026; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2027; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
2028; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
2029; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2030; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
2031; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2032;
2033; GFX11-LABEL: shl_inline_imm_neg_2.0_i16:
2034; GFX11:       ; %bb.0:
2035; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2036; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
2037; GFX11-NEXT:    v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
2038; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2039; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
2040; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2041;
2042; VI-LABEL: shl_inline_imm_neg_2.0_i16:
2043; VI:       ; %bb.0:
2044; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2045; VI-NEXT:    s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0]
2046; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
2047; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2048; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2049; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2050;
2051; SI-LABEL: shl_inline_imm_neg_2.0_i16:
2052; SI:       ; %bb.0:
2053; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2054; SI-NEXT:    s_mov_b32 s6, 0
2055; SI-NEXT:    s_mov_b32 s7, 0xf000
2056; SI-NEXT:    s_mov_b32 s4, s6
2057; SI-NEXT:    s_mov_b32 s5, s6
2058; SI-NEXT:    v_lshl_b32_e32 v2, 0xffffc000, v2
2059; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2060; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2061; SI-NEXT:    s_setpc_b64 s[30:31]
2062  %y = shl i16 bitcast (half -2.0 to i16), %x
2063  store i16 %y, i16 addrspace(1)* %out
2064  ret void
2065}
2066
2067define void @mul_inline_imm_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
2068; GFX10-LABEL: mul_inline_imm_4.0_i16:
2069; GFX10:       ; %bb.0:
2070; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2071; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
2072; GFX10-NEXT:    v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
2073; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2074; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
2075; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2076;
2077; GFX11-LABEL: mul_inline_imm_4.0_i16:
2078; GFX11:       ; %bb.0:
2079; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2080; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
2081; GFX11-NEXT:    v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
2082; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2083; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
2084; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2085;
2086; VI-LABEL: mul_inline_imm_4.0_i16:
2087; VI:       ; %bb.0:
2088; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2089; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00]
2090; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2091; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2092; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2093;
2094; SI-LABEL: mul_inline_imm_4.0_i16:
2095; SI:       ; %bb.0:
2096; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2097; SI-NEXT:    s_mov_b32 s6, 0
2098; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2099; SI-NEXT:    s_mov_b32 s7, 0xf000
2100; SI-NEXT:    s_mov_b32 s4, s6
2101; SI-NEXT:    s_mov_b32 s5, s6
2102; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x4400, v2
2103; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2104; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2105; SI-NEXT:    s_setpc_b64 s[30:31]
2106  %y = mul i16 %x, bitcast (half 4.0 to i16)
2107  store i16 %y, i16 addrspace(1)* %out
2108  ret void
2109}
2110
2111define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
2112; GFX10-LABEL: mul_inline_imm_neg_4.0_i16:
2113; GFX10:       ; %bb.0:
2114; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2115; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
2116; GFX10-NEXT:    v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
2117; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2118; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
2119; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2120;
2121; GFX11-LABEL: mul_inline_imm_neg_4.0_i16:
2122; GFX11:       ; %bb.0:
2123; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2124; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
2125; GFX11-NEXT:    v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
2126; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2127; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
2128; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2129;
2130; VI-LABEL: mul_inline_imm_neg_4.0_i16:
2131; VI:       ; %bb.0:
2132; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2133; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
2134; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2135; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2136; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2137;
2138; SI-LABEL: mul_inline_imm_neg_4.0_i16:
2139; SI:       ; %bb.0:
2140; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141; SI-NEXT:    s_mov_b32 s6, 0
2142; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2143; SI-NEXT:    s_mov_b32 s7, 0xf000
2144; SI-NEXT:    s_mov_b32 s4, s6
2145; SI-NEXT:    s_mov_b32 s5, s6
2146; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xc400, v2
2147; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2148; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2149; SI-NEXT:    s_setpc_b64 s[30:31]
2150  %y = mul i16 %x, bitcast (half -4.0 to i16)
2151  store i16 %y, i16 addrspace(1)* %out
2152  ret void
2153}
2154
2155define void @mul_inline_imm_inv2pi_i16(i16 addrspace(1)* %out, i16 %x) {
2156; GFX10-LABEL: mul_inline_imm_inv2pi_i16:
2157; GFX10:       ; %bb.0:
2158; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2159; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
2160; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
2161; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2162; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
2163; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2164;
2165; GFX11-LABEL: mul_inline_imm_inv2pi_i16:
2166; GFX11:       ; %bb.0:
2167; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2168; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
2169; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
2170; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2171; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
2172; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2173;
2174; VI-LABEL: mul_inline_imm_inv2pi_i16:
2175; VI:       ; %bb.0:
2176; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2177; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00]
2178; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2179; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2180; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2181;
2182; SI-LABEL: mul_inline_imm_inv2pi_i16:
2183; SI:       ; %bb.0:
2184; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2185; SI-NEXT:    s_mov_b32 s6, 0
2186; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2187; SI-NEXT:    s_mov_b32 s7, 0xf000
2188; SI-NEXT:    s_mov_b32 s4, s6
2189; SI-NEXT:    s_mov_b32 s5, s6
2190; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3118, v2
2191; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2192; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2193; SI-NEXT:    s_setpc_b64 s[30:31]
2194  %y = mul i16 %x, bitcast (half 0xH3118 to i16)
2195  store i16 %y, i16 addrspace(1)* %out
2196  ret void
2197}
2198