1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s 3 4; CHECK-LABEL: {{^}}inline_asm: 5; CHECK: s_endpgm 6; CHECK: s_endpgm 7define amdgpu_kernel void @inline_asm(i32 addrspace(1)* %out) { 8entry: 9 store i32 5, i32 addrspace(1)* %out 10 call void asm sideeffect "s_endpgm", ""() 11 ret void 12} 13 14; CHECK-LABEL: {{^}}inline_asm_shader: 15; CHECK: s_endpgm 16; CHECK: s_endpgm 17define amdgpu_ps void @inline_asm_shader() { 18entry: 19 call void asm sideeffect "s_endpgm", ""() 20 ret void 21} 22 23 24; CHECK: {{^}}branch_on_asm: 25; Make sure inline assembly is treted as divergent. 26; CHECK: s_mov_b32 s{{[0-9]+}}, 0 27; CHECK: s_and_saveexec_b64 28define amdgpu_kernel void @branch_on_asm(i32 addrspace(1)* %out) { 29 %zero = call i32 asm "s_mov_b32 $0, 0", "=s"() 30 %cmp = icmp eq i32 %zero, 0 31 br i1 %cmp, label %if, label %endif 32 33if: 34 store i32 0, i32 addrspace(1)* %out 35 br label %endif 36 37endif: 38 ret void 39} 40 41; CHECK-LABEL: {{^}}v_cmp_asm: 42; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 43; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]] 44; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]] 45; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]] 46; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} 47define amdgpu_kernel void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) { 48 %sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in) 49 store i64 %sgpr, i64 addrspace(1)* %out 50 ret void 51} 52 53; CHECK-LABEL: {{^}}code_size_inline_asm: 54; CHECK: codeLenInByte = 12 55define amdgpu_kernel void @code_size_inline_asm(i32 addrspace(1)* %out) { 56entry: 57 call void asm sideeffect "v_nop_e64", ""() 58 ret void 59} 60 61; All inlineasm instructions are assumed to be the maximum size 62; CHECK-LABEL: {{^}}code_size_inline_asm_small_inst: 63; CHECK: codeLenInByte = 12 64define amdgpu_kernel void @code_size_inline_asm_small_inst(i32 addrspace(1)* %out) { 65entry: 66 call void asm sideeffect "v_nop_e32", ""() 67 ret void 68} 69 70; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst: 71; CHECK: codeLenInByte = 20 72define amdgpu_kernel void @code_size_inline_asm_2_inst(i32 addrspace(1)* %out) { 73entry: 74 call void asm sideeffect " 75 v_nop_e64 76 v_nop_e64 77 ", ""() 78 ret void 79} 80 81; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst_extra_newline: 82; CHECK: codeLenInByte = 20 83define amdgpu_kernel void @code_size_inline_asm_2_inst_extra_newline(i32 addrspace(1)* %out) { 84entry: 85 call void asm sideeffect " 86 v_nop_e64 87 88 v_nop_e64 89 ", ""() 90 ret void 91} 92 93; CHECK-LABEL: {{^}}code_size_inline_asm_0_inst: 94; CHECK: codeLenInByte = 4 95define amdgpu_kernel void @code_size_inline_asm_0_inst(i32 addrspace(1)* %out) { 96entry: 97 call void asm sideeffect "", ""() 98 ret void 99} 100 101; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment: 102; CHECK: codeLenInByte = 4 103define amdgpu_kernel void @code_size_inline_asm_1_comment(i32 addrspace(1)* %out) { 104entry: 105 call void asm sideeffect "; comment", ""() 106 ret void 107} 108 109; CHECK-LABEL: {{^}}code_size_inline_asm_newline_1_comment: 110; CHECK: codeLenInByte = 4 111define amdgpu_kernel void @code_size_inline_asm_newline_1_comment(i32 addrspace(1)* %out) { 112entry: 113 call void asm sideeffect " 114; comment", ""() 115 ret void 116} 117 118; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment_newline: 119; CHECK: codeLenInByte = 4 120define amdgpu_kernel void @code_size_inline_asm_1_comment_newline(i32 addrspace(1)* %out) { 121entry: 122 call void asm sideeffect "; comment 123", ""() 124 ret void 125} 126 127; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line: 128; CHECK: codeLenInByte = 4 129define amdgpu_kernel void @code_size_inline_asm_2_comments_line(i32 addrspace(1)* %out) { 130entry: 131 call void asm sideeffect "; first comment ; second comment", ""() 132 ret void 133} 134 135; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line_nospace: 136; CHECK: codeLenInByte = 4 137define amdgpu_kernel void @code_size_inline_asm_2_comments_line_nospace(i32 addrspace(1)* %out) { 138entry: 139 call void asm sideeffect "; first comment;second comment", ""() 140 ret void 141} 142 143; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments0: 144; CHECK: codeLenInByte = 20 145define amdgpu_kernel void @code_size_inline_asm_mixed_comments0(i32 addrspace(1)* %out) { 146entry: 147 call void asm sideeffect "; comment 148 v_nop_e64 ; inline comment 149; separate comment 150 v_nop_e64 151 152 ; trailing comment 153 ; extra comment 154 ", ""() 155 ret void 156} 157 158; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments1: 159; CHECK: codeLenInByte = 20 160define amdgpu_kernel void @code_size_inline_asm_mixed_comments1(i32 addrspace(1)* %out) { 161entry: 162 call void asm sideeffect "v_nop_e64 ; inline comment 163; separate comment 164 v_nop_e64 165 166 ; trailing comment 167 ; extra comment 168 ", ""() 169 ret void 170} 171 172; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments_operands: 173; CHECK: codeLenInByte = 20 174define amdgpu_kernel void @code_size_inline_asm_mixed_comments_operands(i32 addrspace(1)* %out) { 175entry: 176 call void asm sideeffect "; comment 177 v_add_i32_e32 v0, vcc, v1, v2 ; inline comment 178; separate comment 179 v_bfrev_b32_e32 v0, 1 180 181 ; trailing comment 182 ; extra comment 183 ", ""() 184 ret void 185} 186 187; FIXME: Should not have intermediate sgprs 188; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr: 189; CHECK-DAG: s_mov_b32 s1, 0 190; CHECK-DAG: s_mov_b32 s0, 0x1e240 191; CHECK: v_mov_b32_e32 v0, s0 192; CHECK: v_mov_b32_e32 v1, s1 193; CHECK: use v[0:1] 194define amdgpu_kernel void @i64_imm_input_phys_vgpr() { 195entry: 196 call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456) 197 ret void 198} 199 200; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr: 201; CHECK: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], -1 202; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, [[MASK]] 203; CHECK: ; use v0 204define amdgpu_kernel void @i1_imm_input_phys_vgpr() { 205entry: 206 call void asm sideeffect "; use $0 ", "{v0}"(i1 true) 207 ret void 208} 209 210; CHECK-LABEL: {{^}}i1_input_phys_vgpr: 211; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]] 212; CHECK: v_and_b32_e32 [[LOAD]], 1, [[LOAD]] 213; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]] 214; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 215; CHECK: ; use v0 216; CHECK: v_cmp_ne_u32_e32 vcc, 0, v1 217; CHECK: v_cndmask_b32_e64 [[STORE:v[0-9]+]], 0, 1, vcc 218; CHECK: {{buffer|flat}}_store_byte [[STORE]], 219define amdgpu_kernel void @i1_input_phys_vgpr() { 220entry: 221 %val = load i1, i1 addrspace(1)* undef 222 %cc = call i1 asm sideeffect "; use $1, def $0 ", "={v1}, {v0}"(i1 %val) 223 store i1 %cc, i1 addrspace(1)* undef 224 ret void 225} 226 227; FIXME: Should be scheduled to shrink vcc 228; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2: 229; CHECK: v_cmp_eq_u32_e32 vcc, 1, v0 230; CHECK: v_cndmask_b32_e64 v0, 0, -1, vcc 231; CHECK: v_cmp_eq_u32_e32 vcc, 1, v1 232; CHECK: v_cndmask_b32_e64 v1, 0, -1, vcc 233define amdgpu_kernel void @i1_input_phys_vgpr_x2() { 234entry: 235 %val0 = load volatile i1, i1 addrspace(1)* undef 236 %val1 = load volatile i1, i1 addrspace(1)* undef 237 call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1) 238 ret void 239} 240 241; CHECK-LABEL: {{^}}muliple_def_phys_vgpr: 242; CHECK: ; def v0 243; CHECK: v_mov_b32_e32 v1, v0 244; CHECK: ; def v0 245; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 246define amdgpu_kernel void @muliple_def_phys_vgpr() { 247entry: 248 %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"() 249 %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"() 250 %add = shl i32 %def0, %def1 251 store i32 %add, i32 addrspace(1)* undef 252 ret void 253} 254 255; CHECK-LABEL: {{^}}asm_constraint_c_n: 256; CHECK: s_trap 10{{$}} 257define amdgpu_kernel void @asm_constraint_c_n() { 258entry: 259 tail call void asm sideeffect "s_trap ${0:c}", "n"(i32 10) #1 260 ret void 261} 262 263; CHECK-LABEL: {{^}}asm_constraint_n_n: 264; CHECK: s_trap -10{{$}} 265define amdgpu_kernel void @asm_constraint_n_n() { 266entry: 267 tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1 268 ret void 269} 270 271; Make sure tuples of 3 SGPRs are printed with the [] syntax instead 272; of the tablegen default. 273; CHECK-LABEL: {{^}}sgpr96_name_format: 274; CHECK: ; sgpr96 s[0:2] 275define amdgpu_kernel void @sgpr96_name_format() { 276entry: 277 tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1 278 ret void 279} 280 281; Check aggregate types are handled properly. 282; CHECK-LABEL: mad_u64 283; CHECK: v_mad_u64_u32 284define void @mad_u64(i32 %x) { 285entry: 286 br i1 undef, label %exit, label %false 287 288false: 289 %s0 = tail call { i64, i64 } asm sideeffect "v_mad_u64_u32 $0, $1, $2, $3, $4", "=v,=s,v,v,v"(i32 -766435501, i32 %x, i64 0) 290 br label %exit 291 292exit: 293 %s1 = phi { i64, i64} [ undef, %entry ], [ %s0, %false] 294 %v0 = extractvalue { i64, i64 } %s1, 0 295 %v1 = extractvalue { i64, i64 } %s1, 1 296 tail call void asm sideeffect "; use $0", "v"(i64 %v0) 297 tail call void asm sideeffect "; use $0", "v"(i64 %v1) 298 ret void 299} 300