1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s 5; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 6 7define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { 8; GCN-LABEL: test_kill_depth_0_imm_pos: 9; GCN: ; %bb.0: 10; GCN-NEXT: s_endpgm 11 call void @llvm.amdgcn.kill(i1 true) 12 ret void 13} 14 15define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { 16; WAVE64-LABEL: test_kill_depth_0_imm_neg: 17; WAVE64: ; %bb.0: 18; WAVE64-NEXT: s_andn2_b64 exec, exec, exec 19; WAVE64-NEXT: s_cbranch_scc0 .LBB1_1 20; WAVE64-NEXT: s_endpgm 21; WAVE64-NEXT: .LBB1_1: 22; WAVE64-NEXT: s_mov_b64 exec, 0 23; WAVE64-NEXT: exp null off, off, off, off done vm 24; WAVE64-NEXT: s_endpgm 25; 26; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg: 27; GFX10-WAVE32: ; %bb.0: 28; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo 29; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB1_1 30; GFX10-WAVE32-NEXT: s_endpgm 31; GFX10-WAVE32-NEXT: .LBB1_1: 32; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 33; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 34; GFX10-WAVE32-NEXT: s_endpgm 35; 36; GFX11-LABEL: test_kill_depth_0_imm_neg: 37; GFX11: ; %bb.0: 38; GFX11-NEXT: s_and_not1_b64 exec, exec, exec 39; GFX11-NEXT: s_cbranch_scc0 .LBB1_1 40; GFX11-NEXT: s_endpgm 41; GFX11-NEXT: .LBB1_1: 42; GFX11-NEXT: s_mov_b64 exec, 0 43; GFX11-NEXT: exp mrt0 off, off, off, off done 44; GFX11-NEXT: s_endpgm 45 call void @llvm.amdgcn.kill(i1 false) 46 ret void 47} 48 49; FIXME: Ideally only one early-exit would be emitted 50define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { 51; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2: 52; WAVE64: ; %bb.0: 53; WAVE64-NEXT: s_mov_b64 s[0:1], exec 54; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 55; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2 56; WAVE64-NEXT: ; %bb.1: 57; WAVE64-NEXT: s_mov_b64 exec, 0 58; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 59; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2 60; WAVE64-NEXT: s_endpgm 61; WAVE64-NEXT: .LBB2_2: 62; WAVE64-NEXT: s_mov_b64 exec, 0 63; WAVE64-NEXT: exp null off, off, off, off done vm 64; WAVE64-NEXT: s_endpgm 65; 66; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2: 67; GFX10-WAVE32: ; %bb.0: 68; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 69; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 70; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2 71; GFX10-WAVE32-NEXT: ; %bb.1: 72; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 73; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 74; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2 75; GFX10-WAVE32-NEXT: s_endpgm 76; GFX10-WAVE32-NEXT: .LBB2_2: 77; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 78; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 79; GFX10-WAVE32-NEXT: s_endpgm 80; 81; GFX11-LABEL: test_kill_depth_0_imm_neg_x2: 82; GFX11: ; %bb.0: 83; GFX11-NEXT: s_mov_b64 s[0:1], exec 84; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 85; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 86; GFX11-NEXT: s_cbranch_scc0 .LBB2_2 87; GFX11-NEXT: ; %bb.1: 88; GFX11-NEXT: s_mov_b64 exec, 0 89; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 90; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 91; GFX11-NEXT: s_cbranch_scc0 .LBB2_2 92; GFX11-NEXT: s_endpgm 93; GFX11-NEXT: .LBB2_2: 94; GFX11-NEXT: s_mov_b64 exec, 0 95; GFX11-NEXT: exp mrt0 off, off, off, off done 96; GFX11-NEXT: s_endpgm 97 call void @llvm.amdgcn.kill(i1 false) 98 call void @llvm.amdgcn.kill(i1 false) 99 ret void 100} 101 102define amdgpu_ps void @test_kill_depth_var(float %x) #0 { 103; WAVE64-LABEL: test_kill_depth_var: 104; WAVE64: ; %bb.0: 105; WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 106; WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 107; WAVE64-NEXT: s_cbranch_scc0 .LBB3_1 108; WAVE64-NEXT: s_endpgm 109; WAVE64-NEXT: .LBB3_1: 110; WAVE64-NEXT: s_mov_b64 exec, 0 111; WAVE64-NEXT: exp null off, off, off, off done vm 112; WAVE64-NEXT: s_endpgm 113; 114; GFX10-WAVE32-LABEL: test_kill_depth_var: 115; GFX10-WAVE32: ; %bb.0: 116; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 117; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 118; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB3_1 119; GFX10-WAVE32-NEXT: s_endpgm 120; GFX10-WAVE32-NEXT: .LBB3_1: 121; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 122; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 123; GFX10-WAVE32-NEXT: s_endpgm 124; 125; GFX11-LABEL: test_kill_depth_var: 126; GFX11: ; %bb.0: 127; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 128; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 129; GFX11-NEXT: s_cbranch_scc0 .LBB3_1 130; GFX11-NEXT: s_endpgm 131; GFX11-NEXT: .LBB3_1: 132; GFX11-NEXT: s_mov_b64 exec, 0 133; GFX11-NEXT: exp mrt0 off, off, off, off done 134; GFX11-NEXT: s_endpgm 135 %cmp = fcmp olt float %x, 0.0 136 call void @llvm.amdgcn.kill(i1 %cmp) 137 ret void 138} 139 140; FIXME: Ideally only one early-exit would be emitted 141define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { 142; SI-LABEL: test_kill_depth_var_x2_same: 143; SI: ; %bb.0: 144; SI-NEXT: s_mov_b64 s[0:1], exec 145; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 146; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 147; SI-NEXT: s_cbranch_scc0 .LBB4_2 148; SI-NEXT: ; %bb.1: 149; SI-NEXT: s_andn2_b64 exec, exec, vcc 150; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 151; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 152; SI-NEXT: s_cbranch_scc0 .LBB4_2 153; SI-NEXT: s_endpgm 154; SI-NEXT: .LBB4_2: 155; SI-NEXT: s_mov_b64 exec, 0 156; SI-NEXT: exp null off, off, off, off done vm 157; SI-NEXT: s_endpgm 158; 159; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same: 160; GFX10-WAVE64: ; %bb.0: 161; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 162; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 163; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 164; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2 165; GFX10-WAVE64-NEXT: ; %bb.1: 166; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 167; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 168; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 169; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2 170; GFX10-WAVE64-NEXT: s_endpgm 171; GFX10-WAVE64-NEXT: .LBB4_2: 172; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 173; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 174; GFX10-WAVE64-NEXT: s_endpgm 175; 176; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same: 177; GFX10-WAVE32: ; %bb.0: 178; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 179; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 180; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 181; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2 182; GFX10-WAVE32-NEXT: ; %bb.1: 183; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 184; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 185; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 186; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2 187; GFX10-WAVE32-NEXT: s_endpgm 188; GFX10-WAVE32-NEXT: .LBB4_2: 189; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 190; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 191; GFX10-WAVE32-NEXT: s_endpgm 192; 193; GFX11-LABEL: test_kill_depth_var_x2_same: 194; GFX11: ; %bb.0: 195; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 196; GFX11-NEXT: s_mov_b64 s[0:1], exec 197; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 198; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 199; GFX11-NEXT: s_cbranch_scc0 .LBB4_2 200; GFX11-NEXT: ; %bb.1: 201; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 202; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 203; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 204; GFX11-NEXT: s_cbranch_scc0 .LBB4_2 205; GFX11-NEXT: s_endpgm 206; GFX11-NEXT: .LBB4_2: 207; GFX11-NEXT: s_mov_b64 exec, 0 208; GFX11-NEXT: exp mrt0 off, off, off, off done 209; GFX11-NEXT: s_endpgm 210 %cmp = fcmp olt float %x, 0.0 211 call void @llvm.amdgcn.kill(i1 %cmp) 212 call void @llvm.amdgcn.kill(i1 %cmp) 213 ret void 214} 215 216; FIXME: Ideally only one early-exit would be emitted 217define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { 218; SI-LABEL: test_kill_depth_var_x2: 219; SI: ; %bb.0: 220; SI-NEXT: s_mov_b64 s[0:1], exec 221; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 222; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 223; SI-NEXT: s_cbranch_scc0 .LBB5_2 224; SI-NEXT: ; %bb.1: 225; SI-NEXT: s_andn2_b64 exec, exec, vcc 226; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 227; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 228; SI-NEXT: s_cbranch_scc0 .LBB5_2 229; SI-NEXT: s_endpgm 230; SI-NEXT: .LBB5_2: 231; SI-NEXT: s_mov_b64 exec, 0 232; SI-NEXT: exp null off, off, off, off done vm 233; SI-NEXT: s_endpgm 234; 235; GFX10-WAVE64-LABEL: test_kill_depth_var_x2: 236; GFX10-WAVE64: ; %bb.0: 237; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 238; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 239; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 240; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2 241; GFX10-WAVE64-NEXT: ; %bb.1: 242; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 243; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 244; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 245; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2 246; GFX10-WAVE64-NEXT: s_endpgm 247; GFX10-WAVE64-NEXT: .LBB5_2: 248; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 249; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 250; GFX10-WAVE64-NEXT: s_endpgm 251; 252; GFX10-WAVE32-LABEL: test_kill_depth_var_x2: 253; GFX10-WAVE32: ; %bb.0: 254; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 255; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 256; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 257; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2 258; GFX10-WAVE32-NEXT: ; %bb.1: 259; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 260; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1 261; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 262; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2 263; GFX10-WAVE32-NEXT: s_endpgm 264; GFX10-WAVE32-NEXT: .LBB5_2: 265; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 266; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 267; GFX10-WAVE32-NEXT: s_endpgm 268; 269; GFX11-LABEL: test_kill_depth_var_x2: 270; GFX11: ; %bb.0: 271; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 272; GFX11-NEXT: s_mov_b64 s[0:1], exec 273; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 274; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 275; GFX11-NEXT: s_cbranch_scc0 .LBB5_2 276; GFX11-NEXT: ; %bb.1: 277; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 278; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 279; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 280; GFX11-NEXT: s_cbranch_scc0 .LBB5_2 281; GFX11-NEXT: s_endpgm 282; GFX11-NEXT: .LBB5_2: 283; GFX11-NEXT: s_mov_b64 exec, 0 284; GFX11-NEXT: exp mrt0 off, off, off, off done 285; GFX11-NEXT: s_endpgm 286 %cmp.x = fcmp olt float %x, 0.0 287 call void @llvm.amdgcn.kill(i1 %cmp.x) 288 %cmp.y = fcmp olt float %y, 0.0 289 call void @llvm.amdgcn.kill(i1 %cmp.y) 290 ret void 291} 292 293define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { 294; SI-LABEL: test_kill_depth_var_x2_instructions: 295; SI: ; %bb.0: 296; SI-NEXT: s_mov_b64 s[0:1], exec 297; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 298; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 299; SI-NEXT: s_cbranch_scc0 .LBB6_2 300; SI-NEXT: ; %bb.1: 301; SI-NEXT: s_andn2_b64 exec, exec, vcc 302; SI-NEXT: ;;#ASMSTART 303; SI-NEXT: v_mov_b32_e64 v7, -1 304; SI-NEXT: ;;#ASMEND 305; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 306; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 307; SI-NEXT: s_cbranch_scc0 .LBB6_2 308; SI-NEXT: s_endpgm 309; SI-NEXT: .LBB6_2: 310; SI-NEXT: s_mov_b64 exec, 0 311; SI-NEXT: exp null off, off, off, off done vm 312; SI-NEXT: s_endpgm 313; 314; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions: 315; GFX10-WAVE64: ; %bb.0: 316; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 317; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 318; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 319; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2 320; GFX10-WAVE64-NEXT: ; %bb.1: 321; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 322; GFX10-WAVE64-NEXT: ;;#ASMSTART 323; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 324; GFX10-WAVE64-NEXT: ;;#ASMEND 325; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 326; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 327; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2 328; GFX10-WAVE64-NEXT: s_endpgm 329; GFX10-WAVE64-NEXT: .LBB6_2: 330; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 331; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 332; GFX10-WAVE64-NEXT: s_endpgm 333; 334; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions: 335; GFX10-WAVE32: ; %bb.0: 336; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 337; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 338; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 339; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2 340; GFX10-WAVE32-NEXT: ; %bb.1: 341; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 342; GFX10-WAVE32-NEXT: ;;#ASMSTART 343; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 344; GFX10-WAVE32-NEXT: ;;#ASMEND 345; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 346; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 347; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2 348; GFX10-WAVE32-NEXT: s_endpgm 349; GFX10-WAVE32-NEXT: .LBB6_2: 350; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 351; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 352; GFX10-WAVE32-NEXT: s_endpgm 353; 354; GFX11-LABEL: test_kill_depth_var_x2_instructions: 355; GFX11: ; %bb.0: 356; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 357; GFX11-NEXT: s_mov_b64 s[0:1], exec 358; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 359; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 360; GFX11-NEXT: s_cbranch_scc0 .LBB6_2 361; GFX11-NEXT: ; %bb.1: 362; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 363; GFX11-NEXT: ;;#ASMSTART 364; GFX11-NEXT: v_mov_b32_e64 v7, -1 365; GFX11-NEXT: ;;#ASMEND 366; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 367; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 368; GFX11-NEXT: s_cbranch_scc0 .LBB6_2 369; GFX11-NEXT: s_endpgm 370; GFX11-NEXT: .LBB6_2: 371; GFX11-NEXT: s_mov_b64 exec, 0 372; GFX11-NEXT: exp mrt0 off, off, off, off done 373; GFX11-NEXT: s_endpgm 374 %cmp.x = fcmp olt float %x, 0.0 375 call void @llvm.amdgcn.kill(i1 %cmp.x) 376 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() 377 %cmp.y = fcmp olt float %y, 0.0 378 call void @llvm.amdgcn.kill(i1 %cmp.y) 379 ret void 380} 381 382; FIXME: why does the skip depend on the asm length in the same block? 383define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 { 384; SI-LABEL: test_kill_control_flow: 385; SI: ; %bb.0: ; %entry 386; SI-NEXT: s_cmp_lg_u32 s0, 0 387; SI-NEXT: s_cbranch_scc0 .LBB7_2 388; SI-NEXT: ; %bb.1: ; %exit 389; SI-NEXT: v_mov_b32_e32 v0, 1.0 390; SI-NEXT: s_branch .LBB7_5 391; SI-NEXT: .LBB7_2: ; %bb 392; SI-NEXT: s_mov_b64 s[2:3], exec 393; SI-NEXT: ;;#ASMSTART 394; SI-NEXT: v_mov_b32_e64 v7, -1 395; SI-NEXT: v_nop_e64 396; SI-NEXT: v_nop_e64 397; SI-NEXT: v_nop_e64 398; SI-NEXT: v_nop_e64 399; SI-NEXT: v_nop_e64 400; SI-NEXT: v_nop_e64 401; SI-NEXT: v_nop_e64 402; SI-NEXT: v_nop_e64 403; SI-NEXT: v_nop_e64 404; SI-NEXT: v_nop_e64 405; SI-NEXT: ;;#ASMEND 406; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 407; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 408; SI-NEXT: s_cbranch_scc0 .LBB7_4 409; SI-NEXT: ; %bb.3: ; %bb 410; SI-NEXT: s_andn2_b64 exec, exec, vcc 411; SI-NEXT: v_mov_b32_e32 v0, 1.0 412; SI-NEXT: s_branch .LBB7_5 413; SI-NEXT: .LBB7_4: 414; SI-NEXT: s_mov_b64 exec, 0 415; SI-NEXT: exp null off, off, off, off done vm 416; SI-NEXT: s_endpgm 417; SI-NEXT: .LBB7_5: 418; 419; GFX10-WAVE64-LABEL: test_kill_control_flow: 420; GFX10-WAVE64: ; %bb.0: ; %entry 421; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 422; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_2 423; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit 424; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0 425; GFX10-WAVE64-NEXT: s_branch .LBB7_5 426; GFX10-WAVE64-NEXT: .LBB7_2: ; %bb 427; GFX10-WAVE64-NEXT: ;;#ASMSTART 428; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 429; GFX10-WAVE64-NEXT: v_nop_e64 430; GFX10-WAVE64-NEXT: v_nop_e64 431; GFX10-WAVE64-NEXT: v_nop_e64 432; GFX10-WAVE64-NEXT: v_nop_e64 433; GFX10-WAVE64-NEXT: v_nop_e64 434; GFX10-WAVE64-NEXT: v_nop_e64 435; GFX10-WAVE64-NEXT: v_nop_e64 436; GFX10-WAVE64-NEXT: v_nop_e64 437; GFX10-WAVE64-NEXT: v_nop_e64 438; GFX10-WAVE64-NEXT: v_nop_e64 439; GFX10-WAVE64-NEXT: ;;#ASMEND 440; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 441; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 442; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 443; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_4 444; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb 445; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 446; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0 447; GFX10-WAVE64-NEXT: s_branch .LBB7_5 448; GFX10-WAVE64-NEXT: .LBB7_4: 449; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 450; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 451; GFX10-WAVE64-NEXT: s_endpgm 452; GFX10-WAVE64-NEXT: .LBB7_5: 453; 454; GFX10-WAVE32-LABEL: test_kill_control_flow: 455; GFX10-WAVE32: ; %bb.0: ; %entry 456; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 457; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_2 458; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit 459; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0 460; GFX10-WAVE32-NEXT: s_branch .LBB7_5 461; GFX10-WAVE32-NEXT: .LBB7_2: ; %bb 462; GFX10-WAVE32-NEXT: ;;#ASMSTART 463; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 464; GFX10-WAVE32-NEXT: v_nop_e64 465; GFX10-WAVE32-NEXT: v_nop_e64 466; GFX10-WAVE32-NEXT: v_nop_e64 467; GFX10-WAVE32-NEXT: v_nop_e64 468; GFX10-WAVE32-NEXT: v_nop_e64 469; GFX10-WAVE32-NEXT: v_nop_e64 470; GFX10-WAVE32-NEXT: v_nop_e64 471; GFX10-WAVE32-NEXT: v_nop_e64 472; GFX10-WAVE32-NEXT: v_nop_e64 473; GFX10-WAVE32-NEXT: v_nop_e64 474; GFX10-WAVE32-NEXT: ;;#ASMEND 475; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 476; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 477; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 478; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_4 479; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb 480; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 481; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0 482; GFX10-WAVE32-NEXT: s_branch .LBB7_5 483; GFX10-WAVE32-NEXT: .LBB7_4: 484; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 485; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 486; GFX10-WAVE32-NEXT: s_endpgm 487; GFX10-WAVE32-NEXT: .LBB7_5: 488; 489; GFX11-LABEL: test_kill_control_flow: 490; GFX11: ; %bb.0: ; %entry 491; GFX11-NEXT: s_cmp_lg_u32 s0, 0 492; GFX11-NEXT: s_cbranch_scc0 .LBB7_2 493; GFX11-NEXT: ; %bb.1: ; %exit 494; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 495; GFX11-NEXT: s_branch .LBB7_5 496; GFX11-NEXT: .LBB7_2: ; %bb 497; GFX11-NEXT: ;;#ASMSTART 498; GFX11-NEXT: v_mov_b32_e64 v7, -1 499; GFX11-NEXT: v_nop_e64 500; GFX11-NEXT: v_nop_e64 501; GFX11-NEXT: v_nop_e64 502; GFX11-NEXT: v_nop_e64 503; GFX11-NEXT: v_nop_e64 504; GFX11-NEXT: v_nop_e64 505; GFX11-NEXT: v_nop_e64 506; GFX11-NEXT: v_nop_e64 507; GFX11-NEXT: v_nop_e64 508; GFX11-NEXT: v_nop_e64 509; GFX11-NEXT: ;;#ASMEND 510; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 511; GFX11-NEXT: s_mov_b64 s[2:3], exec 512; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 513; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc 514; GFX11-NEXT: s_cbranch_scc0 .LBB7_4 515; GFX11-NEXT: ; %bb.3: ; %bb 516; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 517; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 518; GFX11-NEXT: s_branch .LBB7_5 519; GFX11-NEXT: .LBB7_4: 520; GFX11-NEXT: s_mov_b64 exec, 0 521; GFX11-NEXT: exp mrt0 off, off, off, off done 522; GFX11-NEXT: s_endpgm 523; GFX11-NEXT: .LBB7_5: 524entry: 525 %cmp = icmp eq i32 %arg, 0 526 br i1 %cmp, label %bb, label %exit 527 528bb: 529 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 530 v_nop_e64 531 v_nop_e64 532 v_nop_e64 533 v_nop_e64 534 v_nop_e64 535 v_nop_e64 536 v_nop_e64 537 v_nop_e64 538 v_nop_e64 539 v_nop_e64", "={v7}"() 540 %cmp.var = fcmp olt float %var, 0.0 541 ; TODO: We could do an early-exit here (the branch above is uniform!) 542 call void @llvm.amdgcn.kill(i1 %cmp.var) 543 br label %exit 544 545exit: 546 ret float 1.0 547} 548 549define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 { 550; SI-LABEL: test_kill_control_flow_remainder: 551; SI: ; %bb.0: ; %entry 552; SI-NEXT: s_cmp_lg_u32 s0, 0 553; SI-NEXT: v_mov_b32_e32 v9, 0 554; SI-NEXT: s_cbranch_scc1 .LBB8_3 555; SI-NEXT: ; %bb.1: ; %bb 556; SI-NEXT: s_mov_b64 s[2:3], exec 557; SI-NEXT: ;;#ASMSTART 558; SI-NEXT: v_mov_b32_e64 v7, -1 559; SI-NEXT: v_nop_e64 560; SI-NEXT: v_nop_e64 561; SI-NEXT: v_nop_e64 562; SI-NEXT: v_nop_e64 563; SI-NEXT: v_nop_e64 564; SI-NEXT: v_nop_e64 565; SI-NEXT: v_nop_e64 566; SI-NEXT: v_nop_e64 567; SI-NEXT: v_nop_e64 568; SI-NEXT: v_nop_e64 569; SI-NEXT: v_nop_e64 570; SI-NEXT: ;;#ASMEND 571; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 572; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 573; SI-NEXT: ;;#ASMSTART 574; SI-NEXT: v_mov_b32_e64 v8, -1 575; SI-NEXT: ;;#ASMEND 576; SI-NEXT: s_cbranch_scc0 .LBB8_4 577; SI-NEXT: ; %bb.2: ; %bb 578; SI-NEXT: s_andn2_b64 exec, exec, vcc 579; SI-NEXT: s_mov_b32 s3, 0xf000 580; SI-NEXT: s_mov_b32 s2, -1 581; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 582; SI-NEXT: s_waitcnt vmcnt(0) 583; SI-NEXT: ;;#ASMSTART 584; SI-NEXT: v_mov_b32_e64 v9, -2 585; SI-NEXT: ;;#ASMEND 586; SI-NEXT: .LBB8_3: ; %exit 587; SI-NEXT: s_mov_b32 s3, 0xf000 588; SI-NEXT: s_mov_b32 s2, -1 589; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 590; SI-NEXT: s_endpgm 591; SI-NEXT: .LBB8_4: 592; SI-NEXT: s_mov_b64 exec, 0 593; SI-NEXT: exp null off, off, off, off done vm 594; SI-NEXT: s_endpgm 595; 596; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder: 597; GFX10-WAVE64: ; %bb.0: ; %entry 598; GFX10-WAVE64-NEXT: v_mov_b32_e32 v9, 0 599; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 600; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_2 601; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit 602; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off 603; GFX10-WAVE64-NEXT: s_endpgm 604; GFX10-WAVE64-NEXT: .LBB8_2: ; %bb 605; GFX10-WAVE64-NEXT: ;;#ASMSTART 606; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 607; GFX10-WAVE64-NEXT: v_nop_e64 608; GFX10-WAVE64-NEXT: v_nop_e64 609; GFX10-WAVE64-NEXT: v_nop_e64 610; GFX10-WAVE64-NEXT: v_nop_e64 611; GFX10-WAVE64-NEXT: v_nop_e64 612; GFX10-WAVE64-NEXT: v_nop_e64 613; GFX10-WAVE64-NEXT: v_nop_e64 614; GFX10-WAVE64-NEXT: v_nop_e64 615; GFX10-WAVE64-NEXT: v_nop_e64 616; GFX10-WAVE64-NEXT: v_nop_e64 617; GFX10-WAVE64-NEXT: v_nop_e64 618; GFX10-WAVE64-NEXT: ;;#ASMEND 619; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 620; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 621; GFX10-WAVE64-NEXT: ;;#ASMSTART 622; GFX10-WAVE64-NEXT: v_mov_b32_e64 v8, -1 623; GFX10-WAVE64-NEXT: ;;#ASMEND 624; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 625; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_4 626; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb 627; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 628; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v8, off 629; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 630; GFX10-WAVE64-NEXT: ;;#ASMSTART 631; GFX10-WAVE64-NEXT: v_mov_b32_e64 v9, -2 632; GFX10-WAVE64-NEXT: ;;#ASMEND 633; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off 634; GFX10-WAVE64-NEXT: s_endpgm 635; GFX10-WAVE64-NEXT: .LBB8_4: 636; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 637; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 638; GFX10-WAVE64-NEXT: s_endpgm 639; 640; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder: 641; GFX10-WAVE32: ; %bb.0: ; %entry 642; GFX10-WAVE32-NEXT: v_mov_b32_e32 v9, 0 643; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 644; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_2 645; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit 646; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off 647; GFX10-WAVE32-NEXT: s_endpgm 648; GFX10-WAVE32-NEXT: .LBB8_2: ; %bb 649; GFX10-WAVE32-NEXT: ;;#ASMSTART 650; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 651; GFX10-WAVE32-NEXT: v_nop_e64 652; GFX10-WAVE32-NEXT: v_nop_e64 653; GFX10-WAVE32-NEXT: v_nop_e64 654; GFX10-WAVE32-NEXT: v_nop_e64 655; GFX10-WAVE32-NEXT: v_nop_e64 656; GFX10-WAVE32-NEXT: v_nop_e64 657; GFX10-WAVE32-NEXT: v_nop_e64 658; GFX10-WAVE32-NEXT: v_nop_e64 659; GFX10-WAVE32-NEXT: v_nop_e64 660; GFX10-WAVE32-NEXT: v_nop_e64 661; GFX10-WAVE32-NEXT: v_nop_e64 662; GFX10-WAVE32-NEXT: ;;#ASMEND 663; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 664; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 665; GFX10-WAVE32-NEXT: ;;#ASMSTART 666; GFX10-WAVE32-NEXT: v_mov_b32_e64 v8, -1 667; GFX10-WAVE32-NEXT: ;;#ASMEND 668; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 669; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_4 670; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb 671; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 672; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v8, off 673; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 674; GFX10-WAVE32-NEXT: ;;#ASMSTART 675; GFX10-WAVE32-NEXT: v_mov_b32_e64 v9, -2 676; GFX10-WAVE32-NEXT: ;;#ASMEND 677; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off 678; GFX10-WAVE32-NEXT: s_endpgm 679; GFX10-WAVE32-NEXT: .LBB8_4: 680; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 681; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 682; GFX10-WAVE32-NEXT: s_endpgm 683; 684; GFX11-LABEL: test_kill_control_flow_remainder: 685; GFX11: ; %bb.0: ; %entry 686; GFX11-NEXT: v_mov_b32_e32 v9, 0 687; GFX11-NEXT: s_cmp_lg_u32 s0, 0 688; GFX11-NEXT: s_cbranch_scc0 .LBB8_2 689; GFX11-NEXT: ; %bb.1: ; %exit 690; GFX11-NEXT: global_store_b32 v[0:1], v9, off 691; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 692; GFX11-NEXT: s_endpgm 693; GFX11-NEXT: .LBB8_2: ; %bb 694; GFX11-NEXT: ;;#ASMSTART 695; GFX11-NEXT: v_mov_b32_e64 v7, -1 696; GFX11-NEXT: v_nop_e64 697; GFX11-NEXT: v_nop_e64 698; GFX11-NEXT: v_nop_e64 699; GFX11-NEXT: v_nop_e64 700; GFX11-NEXT: v_nop_e64 701; GFX11-NEXT: v_nop_e64 702; GFX11-NEXT: v_nop_e64 703; GFX11-NEXT: v_nop_e64 704; GFX11-NEXT: v_nop_e64 705; GFX11-NEXT: v_nop_e64 706; GFX11-NEXT: v_nop_e64 707; GFX11-NEXT: ;;#ASMEND 708; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 709; GFX11-NEXT: s_mov_b64 s[2:3], exec 710; GFX11-NEXT: ;;#ASMSTART 711; GFX11-NEXT: v_mov_b32_e64 v8, -1 712; GFX11-NEXT: ;;#ASMEND 713; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc 714; GFX11-NEXT: s_cbranch_scc0 .LBB8_4 715; GFX11-NEXT: ; %bb.3: ; %bb 716; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 717; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc 718; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 719; GFX11-NEXT: ;;#ASMSTART 720; GFX11-NEXT: v_mov_b32_e64 v9, -2 721; GFX11-NEXT: ;;#ASMEND 722; GFX11-NEXT: global_store_b32 v[0:1], v9, off 723; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 724; GFX11-NEXT: s_endpgm 725; GFX11-NEXT: .LBB8_4: 726; GFX11-NEXT: s_mov_b64 exec, 0 727; GFX11-NEXT: exp mrt0 off, off, off, off done 728; GFX11-NEXT: s_endpgm 729entry: 730 %cmp = icmp eq i32 %arg, 0 731 br i1 %cmp, label %bb, label %exit 732 733bb: 734 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 735 v_nop_e64 736 v_nop_e64 737 v_nop_e64 738 v_nop_e64 739 v_nop_e64 740 v_nop_e64 741 v_nop_e64 742 v_nop_e64 743 v_nop_e64 744 v_nop_e64 745 v_nop_e64", "={v7}"() 746 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() 747 %cmp.var = fcmp olt float %var, 0.0 748 ; TODO: We could do an early-exit here (the branch above is uniform!) 749 call void @llvm.amdgcn.kill(i1 %cmp.var) 750 store volatile float %live.across, float addrspace(1)* undef 751 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() 752 br label %exit 753 754exit: 755 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ] 756 store float %phi, float addrspace(1)* undef 757 ret void 758} 759 760define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 { 761; SI-LABEL: test_kill_control_flow_return: 762; SI: ; %bb.0: ; %entry 763; SI-NEXT: s_cmp_eq_u32 s0, 1 764; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 765; SI-NEXT: s_mov_b64 s[2:3], exec 766; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec 767; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] 768; SI-NEXT: s_cbranch_scc0 .LBB9_4 769; SI-NEXT: ; %bb.1: ; %entry 770; SI-NEXT: s_and_b64 exec, exec, s[2:3] 771; SI-NEXT: s_cmp_lg_u32 s0, 0 772; SI-NEXT: v_mov_b32_e32 v0, 0 773; SI-NEXT: s_cbranch_scc0 .LBB9_3 774; SI-NEXT: ; %bb.2: ; %exit 775; SI-NEXT: s_branch .LBB9_5 776; SI-NEXT: .LBB9_3: ; %bb 777; SI-NEXT: ;;#ASMSTART 778; SI-NEXT: v_mov_b32_e64 v7, -1 779; SI-NEXT: v_nop_e64 780; SI-NEXT: v_nop_e64 781; SI-NEXT: v_nop_e64 782; SI-NEXT: v_nop_e64 783; SI-NEXT: v_nop_e64 784; SI-NEXT: v_nop_e64 785; SI-NEXT: v_nop_e64 786; SI-NEXT: v_nop_e64 787; SI-NEXT: v_nop_e64 788; SI-NEXT: v_nop_e64 789; SI-NEXT: ;;#ASMEND 790; SI-NEXT: v_mov_b32_e32 v0, v7 791; SI-NEXT: s_branch .LBB9_5 792; SI-NEXT: .LBB9_4: 793; SI-NEXT: s_mov_b64 exec, 0 794; SI-NEXT: exp null off, off, off, off done vm 795; SI-NEXT: s_endpgm 796; SI-NEXT: .LBB9_5: 797; 798; GFX10-WAVE64-LABEL: test_kill_control_flow_return: 799; GFX10-WAVE64: ; %bb.0: ; %entry 800; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1 801; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 802; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0 803; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec 804; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] 805; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4 806; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry 807; GFX10-WAVE64-NEXT: s_and_b64 exec, exec, s[2:3] 808; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 0 809; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 810; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_3 811; GFX10-WAVE64-NEXT: ; %bb.2: ; %exit 812; GFX10-WAVE64-NEXT: s_branch .LBB9_5 813; GFX10-WAVE64-NEXT: .LBB9_3: ; %bb 814; GFX10-WAVE64-NEXT: ;;#ASMSTART 815; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 816; GFX10-WAVE64-NEXT: v_nop_e64 817; GFX10-WAVE64-NEXT: v_nop_e64 818; GFX10-WAVE64-NEXT: v_nop_e64 819; GFX10-WAVE64-NEXT: v_nop_e64 820; GFX10-WAVE64-NEXT: v_nop_e64 821; GFX10-WAVE64-NEXT: v_nop_e64 822; GFX10-WAVE64-NEXT: v_nop_e64 823; GFX10-WAVE64-NEXT: v_nop_e64 824; GFX10-WAVE64-NEXT: v_nop_e64 825; GFX10-WAVE64-NEXT: v_nop_e64 826; GFX10-WAVE64-NEXT: ;;#ASMEND 827; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, v7 828; GFX10-WAVE64-NEXT: s_branch .LBB9_5 829; GFX10-WAVE64-NEXT: .LBB9_4: 830; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 831; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 832; GFX10-WAVE64-NEXT: s_endpgm 833; GFX10-WAVE64-NEXT: .LBB9_5: 834; 835; GFX10-WAVE32-LABEL: test_kill_control_flow_return: 836; GFX10-WAVE32: ; %bb.0: ; %entry 837; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1 838; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 839; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0 840; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo 841; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2 842; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4 843; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry 844; GFX10-WAVE32-NEXT: s_and_b32 exec_lo, exec_lo, s1 845; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 0 846; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 847; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_3 848; GFX10-WAVE32-NEXT: ; %bb.2: ; %exit 849; GFX10-WAVE32-NEXT: s_branch .LBB9_5 850; GFX10-WAVE32-NEXT: .LBB9_3: ; %bb 851; GFX10-WAVE32-NEXT: ;;#ASMSTART 852; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 853; GFX10-WAVE32-NEXT: v_nop_e64 854; GFX10-WAVE32-NEXT: v_nop_e64 855; GFX10-WAVE32-NEXT: v_nop_e64 856; GFX10-WAVE32-NEXT: v_nop_e64 857; GFX10-WAVE32-NEXT: v_nop_e64 858; GFX10-WAVE32-NEXT: v_nop_e64 859; GFX10-WAVE32-NEXT: v_nop_e64 860; GFX10-WAVE32-NEXT: v_nop_e64 861; GFX10-WAVE32-NEXT: v_nop_e64 862; GFX10-WAVE32-NEXT: v_nop_e64 863; GFX10-WAVE32-NEXT: ;;#ASMEND 864; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, v7 865; GFX10-WAVE32-NEXT: s_branch .LBB9_5 866; GFX10-WAVE32-NEXT: .LBB9_4: 867; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 868; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 869; GFX10-WAVE32-NEXT: s_endpgm 870; GFX10-WAVE32-NEXT: .LBB9_5: 871; 872; GFX11-LABEL: test_kill_control_flow_return: 873; GFX11: ; %bb.0: ; %entry 874; GFX11-NEXT: s_cmp_eq_u32 s0, 1 875; GFX11-NEXT: s_mov_b64 s[2:3], exec 876; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0 877; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 878; GFX11-NEXT: s_xor_b64 s[4:5], s[4:5], exec 879; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[4:5] 880; GFX11-NEXT: s_cbranch_scc0 .LBB9_4 881; GFX11-NEXT: ; %bb.1: ; %entry 882; GFX11-NEXT: s_and_b64 exec, exec, s[2:3] 883; GFX11-NEXT: v_mov_b32_e32 v0, 0 884; GFX11-NEXT: s_cmp_lg_u32 s0, 0 885; GFX11-NEXT: s_cbranch_scc0 .LBB9_3 886; GFX11-NEXT: ; %bb.2: ; %exit 887; GFX11-NEXT: s_branch .LBB9_5 888; GFX11-NEXT: .LBB9_3: ; %bb 889; GFX11-NEXT: ;;#ASMSTART 890; GFX11-NEXT: v_mov_b32_e64 v7, -1 891; GFX11-NEXT: v_nop_e64 892; GFX11-NEXT: v_nop_e64 893; GFX11-NEXT: v_nop_e64 894; GFX11-NEXT: v_nop_e64 895; GFX11-NEXT: v_nop_e64 896; GFX11-NEXT: v_nop_e64 897; GFX11-NEXT: v_nop_e64 898; GFX11-NEXT: v_nop_e64 899; GFX11-NEXT: v_nop_e64 900; GFX11-NEXT: v_nop_e64 901; GFX11-NEXT: ;;#ASMEND 902; GFX11-NEXT: v_mov_b32_e32 v0, v7 903; GFX11-NEXT: s_branch .LBB9_5 904; GFX11-NEXT: .LBB9_4: 905; GFX11-NEXT: s_mov_b64 exec, 0 906; GFX11-NEXT: exp mrt0 off, off, off, off done 907; GFX11-NEXT: s_endpgm 908; GFX11-NEXT: .LBB9_5: 909entry: 910 %kill = icmp eq i32 %arg, 1 911 %cmp = icmp eq i32 %arg, 0 912 call void @llvm.amdgcn.kill(i1 %kill) 913 br i1 %cmp, label %bb, label %exit 914 915bb: 916 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 917 v_nop_e64 918 v_nop_e64 919 v_nop_e64 920 v_nop_e64 921 v_nop_e64 922 v_nop_e64 923 v_nop_e64 924 v_nop_e64 925 v_nop_e64 926 v_nop_e64", "={v7}"() 927 br label %exit 928 929exit: 930 %ret = phi float [ %var, %bb ], [ 0.0, %entry ] 931 ret float %ret 932} 933 934define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { 935; SI-LABEL: test_kill_divergent_loop: 936; SI: ; %bb.0: ; %entry 937; SI-NEXT: s_mov_b64 s[0:1], exec 938; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 939; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 940; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3] 941; SI-NEXT: s_cbranch_execz .LBB10_4 942; SI-NEXT: ; %bb.1: ; %bb.preheader 943; SI-NEXT: s_mov_b32 s3, 0xf000 944; SI-NEXT: s_mov_b32 s2, -1 945; SI-NEXT: .LBB10_2: ; %bb 946; SI-NEXT: ; =>This Inner Loop Header: Depth=1 947; SI-NEXT: ;;#ASMSTART 948; SI-NEXT: v_mov_b32_e64 v7, -1 949; SI-NEXT: v_nop_e64 950; SI-NEXT: v_nop_e64 951; SI-NEXT: v_nop_e64 952; SI-NEXT: v_nop_e64 953; SI-NEXT: v_nop_e64 954; SI-NEXT: v_nop_e64 955; SI-NEXT: v_nop_e64 956; SI-NEXT: v_nop_e64 957; SI-NEXT: v_nop_e64 958; SI-NEXT: v_nop_e64 959; SI-NEXT: ;;#ASMEND 960; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 961; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 962; SI-NEXT: s_cbranch_scc0 .LBB10_5 963; SI-NEXT: ; %bb.3: ; %bb 964; SI-NEXT: ; in Loop: Header=BB10_2 Depth=1 965; SI-NEXT: s_andn2_b64 exec, exec, vcc 966; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 967; SI-NEXT: s_waitcnt vmcnt(0) 968; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 969; SI-NEXT: s_cbranch_vccnz .LBB10_2 970; SI-NEXT: .LBB10_4: ; %Flow1 971; SI-NEXT: s_or_b64 exec, exec, s[4:5] 972; SI-NEXT: s_mov_b32 s3, 0xf000 973; SI-NEXT: s_mov_b32 s2, -1 974; SI-NEXT: v_mov_b32_e32 v0, 8 975; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 976; SI-NEXT: s_waitcnt vmcnt(0) 977; SI-NEXT: s_endpgm 978; SI-NEXT: .LBB10_5: 979; SI-NEXT: s_mov_b64 exec, 0 980; SI-NEXT: exp null off, off, off, off done vm 981; SI-NEXT: s_endpgm 982; 983; GFX10-WAVE64-LABEL: test_kill_divergent_loop: 984; GFX10-WAVE64: ; %bb.0: ; %entry 985; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 986; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 987; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 988; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 989; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB10_3 990; GFX10-WAVE64-NEXT: .LBB10_1: ; %bb 991; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 992; GFX10-WAVE64-NEXT: ;;#ASMSTART 993; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 994; GFX10-WAVE64-NEXT: v_nop_e64 995; GFX10-WAVE64-NEXT: v_nop_e64 996; GFX10-WAVE64-NEXT: v_nop_e64 997; GFX10-WAVE64-NEXT: v_nop_e64 998; GFX10-WAVE64-NEXT: v_nop_e64 999; GFX10-WAVE64-NEXT: v_nop_e64 1000; GFX10-WAVE64-NEXT: v_nop_e64 1001; GFX10-WAVE64-NEXT: v_nop_e64 1002; GFX10-WAVE64-NEXT: v_nop_e64 1003; GFX10-WAVE64-NEXT: v_nop_e64 1004; GFX10-WAVE64-NEXT: ;;#ASMEND 1005; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 1006; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 1007; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB10_4 1008; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb 1009; GFX10-WAVE64-NEXT: ; in Loop: Header=BB10_1 Depth=1 1010; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1011; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc 1012; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1013; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1014; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1 1015; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1 1016; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3] 1017; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 8 1018; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1019; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1020; GFX10-WAVE64-NEXT: s_endpgm 1021; GFX10-WAVE64-NEXT: .LBB10_4: 1022; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1023; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1024; GFX10-WAVE64-NEXT: s_endpgm 1025; 1026; GFX10-WAVE32-LABEL: test_kill_divergent_loop: 1027; GFX10-WAVE32: ; %bb.0: ; %entry 1028; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1029; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1030; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1031; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1032; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3 1033; GFX10-WAVE32-NEXT: .LBB10_1: ; %bb 1034; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 1035; GFX10-WAVE32-NEXT: ;;#ASMSTART 1036; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 1037; GFX10-WAVE32-NEXT: v_nop_e64 1038; GFX10-WAVE32-NEXT: v_nop_e64 1039; GFX10-WAVE32-NEXT: v_nop_e64 1040; GFX10-WAVE32-NEXT: v_nop_e64 1041; GFX10-WAVE32-NEXT: v_nop_e64 1042; GFX10-WAVE32-NEXT: v_nop_e64 1043; GFX10-WAVE32-NEXT: v_nop_e64 1044; GFX10-WAVE32-NEXT: v_nop_e64 1045; GFX10-WAVE32-NEXT: v_nop_e64 1046; GFX10-WAVE32-NEXT: v_nop_e64 1047; GFX10-WAVE32-NEXT: ;;#ASMEND 1048; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 1049; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 1050; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB10_4 1051; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb 1052; GFX10-WAVE32-NEXT: ; in Loop: Header=BB10_1 Depth=1 1053; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1054; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc 1055; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1056; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1057; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1 1058; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1 1059; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1 1060; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 8 1061; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1062; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1063; GFX10-WAVE32-NEXT: s_endpgm 1064; GFX10-WAVE32-NEXT: .LBB10_4: 1065; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1066; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1067; GFX10-WAVE32-NEXT: s_endpgm 1068; 1069; GFX11-LABEL: test_kill_divergent_loop: 1070; GFX11: ; %bb.0: ; %entry 1071; GFX11-NEXT: s_mov_b64 s[0:1], exec 1072; GFX11-NEXT: s_mov_b64 s[2:3], exec 1073; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 1074; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1075; GFX11-NEXT: s_cbranch_execz .LBB10_3 1076; GFX11-NEXT: .LBB10_1: ; %bb 1077; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1078; GFX11-NEXT: ;;#ASMSTART 1079; GFX11-NEXT: v_mov_b32_e64 v7, -1 1080; GFX11-NEXT: v_nop_e64 1081; GFX11-NEXT: v_nop_e64 1082; GFX11-NEXT: v_nop_e64 1083; GFX11-NEXT: v_nop_e64 1084; GFX11-NEXT: v_nop_e64 1085; GFX11-NEXT: v_nop_e64 1086; GFX11-NEXT: v_nop_e64 1087; GFX11-NEXT: v_nop_e64 1088; GFX11-NEXT: v_nop_e64 1089; GFX11-NEXT: v_nop_e64 1090; GFX11-NEXT: ;;#ASMEND 1091; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 1092; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 1093; GFX11-NEXT: s_cbranch_scc0 .LBB10_4 1094; GFX11-NEXT: ; %bb.2: ; %bb 1095; GFX11-NEXT: ; in Loop: Header=BB10_1 Depth=1 1096; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1097; GFX11-NEXT: global_load_b32 v0, v[0:1], off glc dlc 1098; GFX11-NEXT: s_waitcnt vmcnt(0) 1099; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1100; GFX11-NEXT: s_cbranch_vccnz .LBB10_1 1101; GFX11-NEXT: .LBB10_3: ; %Flow1 1102; GFX11-NEXT: s_or_b64 exec, exec, s[2:3] 1103; GFX11-NEXT: v_mov_b32_e32 v0, 8 1104; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1105; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1106; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1107; GFX11-NEXT: s_endpgm 1108; GFX11-NEXT: .LBB10_4: 1109; GFX11-NEXT: s_mov_b64 exec, 0 1110; GFX11-NEXT: exp mrt0 off, off, off, off done 1111; GFX11-NEXT: s_endpgm 1112entry: 1113 %cmp = icmp eq i32 %arg, 0 1114 br i1 %cmp, label %bb, label %exit 1115 1116bb: 1117 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 1118 v_nop_e64 1119 v_nop_e64 1120 v_nop_e64 1121 v_nop_e64 1122 v_nop_e64 1123 v_nop_e64 1124 v_nop_e64 1125 v_nop_e64 1126 v_nop_e64 1127 v_nop_e64", "={v7}"() 1128 %cmp.var = fcmp olt float %var, 0.0 1129 call void @llvm.amdgcn.kill(i1 %cmp.var) 1130 %vgpr = load volatile i32, i32 addrspace(1)* undef 1131 %loop.cond = icmp eq i32 %vgpr, 0 1132 br i1 %loop.cond, label %bb, label %exit 1133 1134exit: 1135 store volatile i32 8, i32 addrspace(1)* undef 1136 ret void 1137} 1138 1139; bug 28550 1140define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 { 1141; SI-LABEL: phi_use_def_before_kill: 1142; SI: ; %bb.0: ; %bb 1143; SI-NEXT: v_add_f32_e64 v1, s0, 1.0 1144; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1145; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1146; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1147; SI-NEXT: s_andn2_b64 exec, exec, vcc 1148; SI-NEXT: s_cbranch_scc0 .LBB11_6 1149; SI-NEXT: ; %bb.1: ; %bb 1150; SI-NEXT: s_andn2_b64 exec, exec, vcc 1151; SI-NEXT: s_cbranch_scc0 .LBB11_3 1152; SI-NEXT: ; %bb.2: ; %bb8 1153; SI-NEXT: s_mov_b32 s3, 0xf000 1154; SI-NEXT: s_mov_b32 s2, -1 1155; SI-NEXT: v_mov_b32_e32 v0, 8 1156; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1157; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1158; SI-NEXT: v_mov_b32_e32 v0, 4.0 1159; SI-NEXT: .LBB11_3: ; %phibb 1160; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1161; SI-NEXT: s_cbranch_vccz .LBB11_5 1162; SI-NEXT: ; %bb.4: ; %bb10 1163; SI-NEXT: s_mov_b32 s3, 0xf000 1164; SI-NEXT: s_mov_b32 s2, -1 1165; SI-NEXT: v_mov_b32_e32 v0, 9 1166; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1167; SI-NEXT: s_waitcnt vmcnt(0) 1168; SI-NEXT: .LBB11_5: ; %end 1169; SI-NEXT: s_endpgm 1170; SI-NEXT: .LBB11_6: 1171; SI-NEXT: s_mov_b64 exec, 0 1172; SI-NEXT: exp null off, off, off, off done vm 1173; SI-NEXT: s_endpgm 1174; 1175; GFX10-WAVE64-LABEL: phi_use_def_before_kill: 1176; GFX10-WAVE64: ; %bb.0: ; %bb 1177; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0 1178; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1179; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1180; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1181; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1182; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6 1183; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb 1184; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1185; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3 1186; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8 1187; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8 1188; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0 1189; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off 1190; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1191; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb 1192; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1193; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5 1194; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10 1195; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 1196; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1197; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1198; GFX10-WAVE64-NEXT: .LBB11_5: ; %end 1199; GFX10-WAVE64-NEXT: s_endpgm 1200; GFX10-WAVE64-NEXT: .LBB11_6: 1201; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1202; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1203; GFX10-WAVE64-NEXT: s_endpgm 1204; 1205; GFX10-WAVE32-LABEL: phi_use_def_before_kill: 1206; GFX10-WAVE32: ; %bb.0: ; %bb 1207; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0 1208; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1 1209; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo 1210; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1 1211; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1212; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6 1213; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb 1214; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1215; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3 1216; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8 1217; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8 1218; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0 1219; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off 1220; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1221; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb 1222; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 1223; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5 1224; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10 1225; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 1226; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1227; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1228; GFX10-WAVE32-NEXT: .LBB11_5: ; %end 1229; GFX10-WAVE32-NEXT: s_endpgm 1230; GFX10-WAVE32-NEXT: .LBB11_6: 1231; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1232; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1233; GFX10-WAVE32-NEXT: s_endpgm 1234; 1235; GFX11-LABEL: phi_use_def_before_kill: 1236; GFX11: ; %bb.0: ; %bb 1237; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0 1238; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1239; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1240; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1241; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1242; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1243; GFX11-NEXT: s_cbranch_scc0 .LBB11_6 1244; GFX11-NEXT: ; %bb.1: ; %bb 1245; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1246; GFX11-NEXT: s_cbranch_scc0 .LBB11_3 1247; GFX11-NEXT: ; %bb.2: ; %bb8 1248; GFX11-NEXT: v_mov_b32_e32 v1, 8 1249; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 1250; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc 1251; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1252; GFX11-NEXT: .LBB11_3: ; %phibb 1253; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1254; GFX11-NEXT: s_cbranch_vccz .LBB11_5 1255; GFX11-NEXT: ; %bb.4: ; %bb10 1256; GFX11-NEXT: v_mov_b32_e32 v0, 9 1257; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1258; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1259; GFX11-NEXT: .LBB11_5: ; %end 1260; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1261; GFX11-NEXT: s_endpgm 1262; GFX11-NEXT: .LBB11_6: 1263; GFX11-NEXT: s_mov_b64 exec, 0 1264; GFX11-NEXT: exp mrt0 off, off, off, off done 1265; GFX11-NEXT: s_endpgm 1266bb: 1267 %tmp = fadd float %x, 1.000000e+00 1268 %tmp1 = fcmp olt float 0.000000e+00, %tmp 1269 %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00 1270 %cmp.tmp2 = fcmp olt float %tmp2, 0.0 1271 call void @llvm.amdgcn.kill(i1 %cmp.tmp2) 1272 br i1 undef, label %phibb, label %bb8 1273 1274phibb: 1275 %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ] 1276 %tmp6 = fcmp oeq float %tmp5, 0.000000e+00 1277 br i1 %tmp6, label %bb10, label %end 1278 1279bb8: 1280 store volatile i32 8, i32 addrspace(1)* undef 1281 br label %phibb 1282 1283bb10: 1284 store volatile i32 9, i32 addrspace(1)* undef 1285 br label %end 1286 1287end: 1288 ret void 1289} 1290 1291define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 { 1292; SI-LABEL: no_skip_no_successors: 1293; SI: ; %bb.0: ; %bb 1294; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1295; SI-NEXT: s_and_b64 vcc, exec, s[4:5] 1296; SI-NEXT: s_cbranch_vccz .LBB12_3 1297; SI-NEXT: ; %bb.1: ; %bb6 1298; SI-NEXT: s_mov_b64 s[2:3], exec 1299; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1300; SI-NEXT: s_cbranch_scc0 .LBB12_5 1301; SI-NEXT: ; %bb.2: ; %bb6 1302; SI-NEXT: s_mov_b64 exec, 0 1303; SI-NEXT: .LBB12_3: ; %bb3 1304; SI-NEXT: v_mov_b32_e32 v0, 0x3e7ae148 1305; SI-NEXT: v_cmp_nge_f32_e32 vcc, s0, v0 1306; SI-NEXT: s_and_b64 vcc, exec, vcc 1307; SI-NEXT: ; %bb.4: ; %bb5 1308; SI-NEXT: .LBB12_5: 1309; SI-NEXT: s_mov_b64 exec, 0 1310; SI-NEXT: exp null off, off, off, off done vm 1311; SI-NEXT: s_endpgm 1312; 1313; GFX10-WAVE64-LABEL: no_skip_no_successors: 1314; GFX10-WAVE64: ; %bb.0: ; %bb 1315; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1316; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[4:5] 1317; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3 1318; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb6 1319; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1320; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1321; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_5 1322; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb6 1323; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1324; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3 1325; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 1326; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1] 1327; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb5 1328; GFX10-WAVE64-NEXT: .LBB12_5: 1329; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1330; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1331; GFX10-WAVE64-NEXT: s_endpgm 1332; 1333; GFX10-WAVE32-LABEL: no_skip_no_successors: 1334; GFX10-WAVE32: ; %bb.0: ; %bb 1335; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0 1336; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1 1337; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3 1338; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb6 1339; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo 1340; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, exec_lo 1341; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_5 1342; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb6 1343; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1344; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3 1345; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0 1346; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s0 1347; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb5 1348; GFX10-WAVE32-NEXT: .LBB12_5: 1349; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1350; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1351; GFX10-WAVE32-NEXT: s_endpgm 1352; 1353; GFX11-LABEL: no_skip_no_successors: 1354; GFX11: ; %bb.0: ; %bb 1355; GFX11-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1356; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1357; GFX11-NEXT: s_and_b64 vcc, exec, s[4:5] 1358; GFX11-NEXT: s_cbranch_vccz .LBB12_3 1359; GFX11-NEXT: ; %bb.1: ; %bb6 1360; GFX11-NEXT: s_mov_b64 s[2:3], exec 1361; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1362; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec 1363; GFX11-NEXT: s_cbranch_scc0 .LBB12_5 1364; GFX11-NEXT: ; %bb.2: ; %bb6 1365; GFX11-NEXT: s_mov_b64 exec, 0 1366; GFX11-NEXT: .LBB12_3: ; %bb3 1367; GFX11-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 1368; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1369; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1] 1370; GFX11-NEXT: ; %bb.4: ; %bb5 1371; GFX11-NEXT: .LBB12_5: 1372; GFX11-NEXT: s_mov_b64 exec, 0 1373; GFX11-NEXT: exp mrt0 off, off, off, off done 1374; GFX11-NEXT: s_endpgm 1375bb: 1376 %tmp = fcmp ult float %arg1, 0.000000e+00 1377 br i1 %tmp, label %bb6, label %bb3 1378 1379bb3: ; preds = %bb 1380 %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000 1381 br i1 %tmp2, label %bb5, label %bb4 1382 1383bb4: ; preds = %bb3 1384 br i1 true, label %bb5, label %bb7 1385 1386bb5: ; preds = %bb4, %bb3 1387 unreachable 1388 1389bb6: ; preds = %bb 1390 call void @llvm.amdgcn.kill(i1 false) 1391 unreachable 1392 1393bb7: ; preds = %bb4 1394 ret void 1395} 1396 1397define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 { 1398; SI-LABEL: if_after_kill_block: 1399; SI: ; %bb.0: ; %bb 1400; SI-NEXT: s_mov_b64 s[0:1], exec 1401; SI-NEXT: s_wqm_b64 exec, exec 1402; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 1403; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 1404; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1405; SI-NEXT: s_cbranch_execz .LBB13_3 1406; SI-NEXT: ; %bb.1: ; %bb3 1407; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1408; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 1409; SI-NEXT: s_cbranch_scc0 .LBB13_6 1410; SI-NEXT: ; %bb.2: ; %bb3 1411; SI-NEXT: s_andn2_b64 exec, exec, vcc 1412; SI-NEXT: .LBB13_3: ; %bb4 1413; SI-NEXT: s_or_b64 exec, exec, s[2:3] 1414; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 1415; SI-NEXT: s_waitcnt vmcnt(0) 1416; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 1417; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc 1418; SI-NEXT: s_cbranch_execz .LBB13_5 1419; SI-NEXT: ; %bb.4: ; %bb8 1420; SI-NEXT: s_mov_b32 s3, 0xf000 1421; SI-NEXT: s_mov_b32 s2, -1 1422; SI-NEXT: v_mov_b32_e32 v0, 9 1423; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1424; SI-NEXT: s_waitcnt vmcnt(0) 1425; SI-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1426; SI-NEXT: s_endpgm 1427; SI-NEXT: .LBB13_6: 1428; SI-NEXT: s_mov_b64 exec, 0 1429; SI-NEXT: exp null off, off, off, off done vm 1430; SI-NEXT: s_endpgm 1431; 1432; GFX10-WAVE64-LABEL: if_after_kill_block: 1433; GFX10-WAVE64: ; %bb.0: ; %bb 1434; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 1435; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec 1436; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 1437; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 1438; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1439; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3 1440; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3 1441; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1442; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 1443; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6 1444; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3 1445; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1446; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4 1447; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3] 1448; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1449; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1450; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 1451; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[0:1], vcc 1452; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_5 1453; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb8 1454; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 1455; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1456; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1457; GFX10-WAVE64-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1458; GFX10-WAVE64-NEXT: s_endpgm 1459; GFX10-WAVE64-NEXT: .LBB13_6: 1460; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1461; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1462; GFX10-WAVE64-NEXT: s_endpgm 1463; 1464; GFX10-WAVE32-LABEL: if_after_kill_block: 1465; GFX10-WAVE32: ; %bb.0: ; %bb 1466; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1467; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo 1468; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1 1469; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1470; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1471; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3 1472; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3 1473; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 1474; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 1475; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6 1476; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3 1477; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1478; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4 1479; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1 1480; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1481; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1482; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 1483; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s0, vcc_lo 1484; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_5 1485; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb8 1486; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 1487; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1488; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1489; GFX10-WAVE32-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1490; GFX10-WAVE32-NEXT: s_endpgm 1491; GFX10-WAVE32-NEXT: .LBB13_6: 1492; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1493; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1494; GFX10-WAVE32-NEXT: s_endpgm 1495; 1496; GFX11-LABEL: if_after_kill_block: 1497; GFX11: ; %bb.0: ; %bb 1498; GFX11-NEXT: s_mov_b64 s[0:1], exec 1499; GFX11-NEXT: s_wqm_b64 exec, exec 1500; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1501; GFX11-NEXT: s_mov_b64 s[2:3], exec 1502; GFX11-NEXT: v_cmpx_nle_f32_e32 0, v1 1503; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1504; GFX11-NEXT: s_cbranch_execz .LBB13_3 1505; GFX11-NEXT: ; %bb.1: ; %bb3 1506; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1507; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 1508; GFX11-NEXT: s_cbranch_scc0 .LBB13_6 1509; GFX11-NEXT: ; %bb.2: ; %bb3 1510; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1511; GFX11-NEXT: .LBB13_3: ; %bb4 1512; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1513; GFX11-NEXT: s_or_b64 exec, exec, s[2:3] 1514; GFX11-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1515; GFX11-NEXT: s_mov_b64 s[0:1], exec 1516; GFX11-NEXT: s_waitcnt vmcnt(0) 1517; GFX11-NEXT: v_cmpx_neq_f32_e32 0, v0 1518; GFX11-NEXT: s_cbranch_execz .LBB13_5 1519; GFX11-NEXT: ; %bb.4: ; %bb8 1520; GFX11-NEXT: v_mov_b32_e32 v0, 9 1521; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1522; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1523; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1524; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1525; GFX11-NEXT: s_endpgm 1526; GFX11-NEXT: .LBB13_6: 1527; GFX11-NEXT: s_mov_b64 exec, 0 1528; GFX11-NEXT: exp mrt0 off, off, off, off done 1529; GFX11-NEXT: s_endpgm 1530bb: 1531 %tmp = fcmp ult float %arg1, 0.000000e+00 1532 br i1 %tmp, label %bb3, label %bb4 1533 1534bb3: ; preds = %bb 1535 %cmp.arg = fcmp olt float %arg, 0.0 1536 call void @llvm.amdgcn.kill(i1 %cmp.arg) 1537 br label %bb4 1538 1539bb4: ; preds = %bb3, %bb 1540 %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) 1541 %tmp6 = extractelement <4 x float> %tmp5, i32 0 1542 %tmp7 = fcmp une float %tmp6, 0.000000e+00 1543 br i1 %tmp7, label %bb8, label %bb9 1544 1545bb8: ; preds = %bb9, %bb4 1546 store volatile i32 9, i32 addrspace(1)* undef 1547 ret void 1548 1549bb9: ; preds = %bb4 1550 ret void 1551} 1552 1553define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { 1554; SI-LABEL: cbranch_kill: 1555; SI: ; %bb.0: ; %.entry 1556; SI-NEXT: s_mov_b64 s[0:1], exec 1557; SI-NEXT: v_mov_b32_e32 v4, 0 1558; SI-NEXT: v_mov_b32_e32 v2, v1 1559; SI-NEXT: v_mov_b32_e32 v3, v1 1560; SI-NEXT: image_sample_l v1, v[1:4], s[0:7], s[0:3] dmask:0x1 da 1561; SI-NEXT: s_waitcnt vmcnt(0) 1562; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 1563; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 1564; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1565; SI-NEXT: s_cbranch_execz .LBB14_3 1566; SI-NEXT: ; %bb.1: ; %kill 1567; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 1568; SI-NEXT: ; implicit-def: $vgpr0 1569; SI-NEXT: ; implicit-def: $vgpr1 1570; SI-NEXT: s_cbranch_scc0 .LBB14_6 1571; SI-NEXT: ; %bb.2: ; %kill 1572; SI-NEXT: s_mov_b64 exec, 0 1573; SI-NEXT: .LBB14_3: ; %Flow 1574; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1575; SI-NEXT: ; implicit-def: $vgpr2 1576; SI-NEXT: s_xor_b64 exec, exec, s[0:1] 1577; SI-NEXT: ; %bb.4: ; %live 1578; SI-NEXT: v_mul_f32_e32 v2, v0, v1 1579; SI-NEXT: ; %bb.5: ; %export 1580; SI-NEXT: s_or_b64 exec, exec, s[0:1] 1581; SI-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1582; SI-NEXT: s_endpgm 1583; SI-NEXT: .LBB14_6: 1584; SI-NEXT: s_mov_b64 exec, 0 1585; SI-NEXT: exp null off, off, off, off done vm 1586; SI-NEXT: s_endpgm 1587; 1588; GFX10-WAVE64-LABEL: cbranch_kill: 1589; GFX10-WAVE64: ; %bb.0: ; %.entry 1590; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, 0 1591; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 1592; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1593; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1594; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 1595; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 1596; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1597; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB14_3 1598; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill 1599; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 1600; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr0 1601; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr1 1602; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB14_6 1603; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill 1604; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1605; GFX10-WAVE64-NEXT: .LBB14_3: ; %Flow 1606; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1607; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2 1608; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1] 1609; GFX10-WAVE64-NEXT: ; %bb.4: ; %live 1610; GFX10-WAVE64-NEXT: v_mul_f32_e32 v2, v0, v1 1611; GFX10-WAVE64-NEXT: ; %bb.5: ; %export 1612; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] 1613; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1614; GFX10-WAVE64-NEXT: s_endpgm 1615; GFX10-WAVE64-NEXT: .LBB14_6: 1616; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1617; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1618; GFX10-WAVE64-NEXT: s_endpgm 1619; 1620; GFX10-WAVE32-LABEL: cbranch_kill: 1621; GFX10-WAVE32: ; %bb.0: ; %.entry 1622; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, 0 1623; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1624; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1625; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1626; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1 1627; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1628; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1629; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB14_3 1630; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill 1631; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 1632; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr0 1633; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr1 1634; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB14_6 1635; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill 1636; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1637; GFX10-WAVE32-NEXT: .LBB14_3: ; %Flow 1638; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1 1639; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2 1640; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 1641; GFX10-WAVE32-NEXT: ; %bb.4: ; %live 1642; GFX10-WAVE32-NEXT: v_mul_f32_e32 v2, v0, v1 1643; GFX10-WAVE32-NEXT: ; %bb.5: ; %export 1644; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 1645; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1646; GFX10-WAVE32-NEXT: s_endpgm 1647; GFX10-WAVE32-NEXT: .LBB14_6: 1648; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1649; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1650; GFX10-WAVE32-NEXT: s_endpgm 1651; 1652; GFX11-LABEL: cbranch_kill: 1653; GFX11: ; %bb.0: ; %.entry 1654; GFX11-NEXT: v_mov_b32_e32 v2, 0 1655; GFX11-NEXT: s_mov_b64 s[0:1], exec 1656; GFX11-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1657; GFX11-NEXT: s_mov_b64 s[2:3], exec 1658; GFX11-NEXT: s_waitcnt vmcnt(0) 1659; GFX11-NEXT: v_cmpx_ge_f32_e32 0, v1 1660; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1661; GFX11-NEXT: s_cbranch_execz .LBB14_3 1662; GFX11-NEXT: ; %bb.1: ; %kill 1663; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 1664; GFX11-NEXT: ; implicit-def: $vgpr0 1665; GFX11-NEXT: ; implicit-def: $vgpr1 1666; GFX11-NEXT: s_cbranch_scc0 .LBB14_6 1667; GFX11-NEXT: ; %bb.2: ; %kill 1668; GFX11-NEXT: s_mov_b64 exec, 0 1669; GFX11-NEXT: .LBB14_3: ; %Flow 1670; GFX11-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1671; GFX11-NEXT: ; implicit-def: $vgpr2 1672; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1673; GFX11-NEXT: s_xor_b64 exec, exec, s[0:1] 1674; GFX11-NEXT: ; %bb.4: ; %live 1675; GFX11-NEXT: v_mul_f32_e32 v2, v0, v1 1676; GFX11-NEXT: ; %bb.5: ; %export 1677; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1678; GFX11-NEXT: exp mrt0 v2, v2, v2, v2 done 1679; GFX11-NEXT: s_endpgm 1680; GFX11-NEXT: .LBB14_6: 1681; GFX11-NEXT: s_mov_b64 exec, 0 1682; GFX11-NEXT: exp mrt0 off, off, off, off done 1683; GFX11-NEXT: s_endpgm 1684.entry: 1685 %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) 1686 %cond0 = fcmp ugt float %sample, 0.000000e+00 1687 br i1 %cond0, label %live, label %kill 1688 1689kill: 1690 call void @llvm.amdgcn.kill(i1 false) 1691 br label %export 1692 1693live: 1694 %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample 1695 br label %export 1696 1697export: 1698 %proxy = phi float [ undef, %kill ], [ %scale, %live ] 1699 call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3 1700 ret void 1701} 1702 1703 1704define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { 1705; SI-LABEL: complex_loop: 1706; SI: ; %bb.0: ; %.entry 1707; SI-NEXT: s_cmp_lt_i32 s0, 1 1708; SI-NEXT: s_cbranch_scc1 .LBB15_7 1709; SI-NEXT: ; %bb.1: ; %.lr.ph 1710; SI-NEXT: s_mov_b64 s[2:3], exec 1711; SI-NEXT: s_mov_b32 s6, 0 1712; SI-NEXT: s_mov_b64 s[0:1], 0 1713; SI-NEXT: s_branch .LBB15_3 1714; SI-NEXT: .LBB15_2: ; %latch 1715; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1716; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1717; SI-NEXT: s_add_i32 s6, s6, 1 1718; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1719; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1720; SI-NEXT: v_mov_b32_e32 v2, s6 1721; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 1722; SI-NEXT: s_cbranch_execz .LBB15_6 1723; SI-NEXT: .LBB15_3: ; %hdr 1724; SI-NEXT: ; =>This Inner Loop Header: Depth=1 1725; SI-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 1726; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1727; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1728; SI-NEXT: s_cbranch_execz .LBB15_2 1729; SI-NEXT: ; %bb.4: ; %kill 1730; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1731; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1732; SI-NEXT: s_cbranch_scc0 .LBB15_8 1733; SI-NEXT: ; %bb.5: ; %kill 1734; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1735; SI-NEXT: s_mov_b64 exec, 0 1736; SI-NEXT: s_branch .LBB15_2 1737; SI-NEXT: .LBB15_6: ; %Flow 1738; SI-NEXT: s_or_b64 exec, exec, s[0:1] 1739; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1740; SI-NEXT: s_endpgm 1741; SI-NEXT: .LBB15_7: 1742; SI-NEXT: v_mov_b32_e32 v2, -1 1743; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1744; SI-NEXT: s_endpgm 1745; SI-NEXT: .LBB15_8: 1746; SI-NEXT: s_mov_b64 exec, 0 1747; SI-NEXT: exp null off, off, off, off done vm 1748; SI-NEXT: s_endpgm 1749; 1750; GFX10-WAVE64-LABEL: complex_loop: 1751; GFX10-WAVE64: ; %bb.0: ; %.entry 1752; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1 1753; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7 1754; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph 1755; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1756; GFX10-WAVE64-NEXT: s_mov_b32 s6, 0 1757; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], 0 1758; GFX10-WAVE64-NEXT: s_branch .LBB15_3 1759; GFX10-WAVE64-NEXT: .LBB15_2: ; %latch 1760; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1761; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1762; GFX10-WAVE64-NEXT: s_add_i32 s6, s6, 1 1763; GFX10-WAVE64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1764; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, s6 1765; GFX10-WAVE64-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1766; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, s[0:1] 1767; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_6 1768; GFX10-WAVE64-NEXT: .LBB15_3: ; %hdr 1769; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 1770; GFX10-WAVE64-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 1771; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1772; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1773; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_2 1774; GFX10-WAVE64-NEXT: ; %bb.4: ; %kill 1775; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1776; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1777; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB15_8 1778; GFX10-WAVE64-NEXT: ; %bb.5: ; %kill 1779; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1780; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1781; GFX10-WAVE64-NEXT: s_branch .LBB15_2 1782; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow 1783; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] 1784; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1785; GFX10-WAVE64-NEXT: s_endpgm 1786; GFX10-WAVE64-NEXT: .LBB15_7: 1787; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1 1788; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1789; GFX10-WAVE64-NEXT: s_endpgm 1790; GFX10-WAVE64-NEXT: .LBB15_8: 1791; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1792; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1793; GFX10-WAVE64-NEXT: s_endpgm 1794; 1795; GFX10-WAVE32-LABEL: complex_loop: 1796; GFX10-WAVE32: ; %bb.0: ; %.entry 1797; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1 1798; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7 1799; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph 1800; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 1801; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0 1802; GFX10-WAVE32-NEXT: s_mov_b32 s2, 0 1803; GFX10-WAVE32-NEXT: s_branch .LBB15_3 1804; GFX10-WAVE32-NEXT: .LBB15_2: ; %latch 1805; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1806; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s3 1807; GFX10-WAVE32-NEXT: s_add_i32 s2, s2, 1 1808; GFX10-WAVE32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1 1809; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, s2 1810; GFX10-WAVE32-NEXT: s_or_b32 s0, vcc_lo, s0 1811; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 1812; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_6 1813; GFX10-WAVE32-NEXT: .LBB15_3: ; %hdr 1814; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 1815; GFX10-WAVE32-NEXT: v_cmp_gt_u32_e32 vcc_lo, s2, v0 1816; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s3, vcc_lo 1817; GFX10-WAVE32-NEXT: s_xor_b32 s3, exec_lo, s3 1818; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_2 1819; GFX10-WAVE32-NEXT: ; %bb.4: ; %kill 1820; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1821; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, exec_lo 1822; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB15_8 1823; GFX10-WAVE32-NEXT: ; %bb.5: ; %kill 1824; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1825; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1826; GFX10-WAVE32-NEXT: s_branch .LBB15_2 1827; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow 1828; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 1829; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1830; GFX10-WAVE32-NEXT: s_endpgm 1831; GFX10-WAVE32-NEXT: .LBB15_7: 1832; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1 1833; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1834; GFX10-WAVE32-NEXT: s_endpgm 1835; GFX10-WAVE32-NEXT: .LBB15_8: 1836; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1837; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1838; GFX10-WAVE32-NEXT: s_endpgm 1839; 1840; GFX11-LABEL: complex_loop: 1841; GFX11: ; %bb.0: ; %.entry 1842; GFX11-NEXT: s_cmp_lt_i32 s0, 1 1843; GFX11-NEXT: s_cbranch_scc1 .LBB15_7 1844; GFX11-NEXT: ; %bb.1: ; %.lr.ph 1845; GFX11-NEXT: s_mov_b64 s[2:3], exec 1846; GFX11-NEXT: s_mov_b32 s6, 0 1847; GFX11-NEXT: s_mov_b64 s[0:1], 0 1848; GFX11-NEXT: s_branch .LBB15_3 1849; GFX11-NEXT: .LBB15_2: ; %latch 1850; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1851; GFX11-NEXT: s_or_b64 exec, exec, s[4:5] 1852; GFX11-NEXT: s_add_i32 s6, s6, 1 1853; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 1854; GFX11-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1855; GFX11-NEXT: v_mov_b32_e32 v2, s6 1856; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1857; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] 1858; GFX11-NEXT: s_cbranch_execz .LBB15_6 1859; GFX11-NEXT: .LBB15_3: ; %hdr 1860; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1861; GFX11-NEXT: s_mov_b64 s[4:5], exec 1862; GFX11-NEXT: v_cmpx_gt_u32_e64 s6, v0 1863; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1864; GFX11-NEXT: s_cbranch_execz .LBB15_2 1865; GFX11-NEXT: ; %bb.4: ; %kill 1866; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1867; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec 1868; GFX11-NEXT: s_cbranch_scc0 .LBB15_8 1869; GFX11-NEXT: ; %bb.5: ; %kill 1870; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1871; GFX11-NEXT: s_mov_b64 exec, 0 1872; GFX11-NEXT: s_branch .LBB15_2 1873; GFX11-NEXT: .LBB15_6: ; %Flow 1874; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1875; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done 1876; GFX11-NEXT: s_endpgm 1877; GFX11-NEXT: .LBB15_7: 1878; GFX11-NEXT: v_mov_b32_e32 v2, -1 1879; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done 1880; GFX11-NEXT: s_endpgm 1881; GFX11-NEXT: .LBB15_8: 1882; GFX11-NEXT: s_mov_b64 exec, 0 1883; GFX11-NEXT: exp mrt0 off, off, off, off done 1884; GFX11-NEXT: s_endpgm 1885.entry: 1886 %flaga = icmp sgt i32 %cmpa, 0 1887 br i1 %flaga, label %.lr.ph, label %._crit_edge 1888 1889.lr.ph: 1890 br label %hdr 1891 1892hdr: 1893 %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ] 1894 %flagb = icmp ugt i32 %ctr, %cmpb 1895 br i1 %flagb, label %kill, label %latch 1896 1897kill: 1898 call void @llvm.amdgcn.kill(i1 false) 1899 br label %latch 1900 1901latch: 1902 %ctr.next = add nuw nsw i32 %ctr, 1 1903 %flagc = icmp slt i32 %ctr.next, %cmpc 1904 br i1 %flagc, label %hdr, label %._crit_edge 1905 1906._crit_edge: 1907 %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ] 1908 %out = bitcast i32 %tmp to float 1909 call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true) 1910 ret void 1911} 1912 1913define void @skip_mode_switch(i32 %arg) { 1914; SI-LABEL: skip_mode_switch: 1915; SI: ; %bb.0: ; %entry 1916; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1917; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1918; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1919; SI-NEXT: s_cbranch_execz .LBB16_2 1920; SI-NEXT: ; %bb.1: ; %bb.0 1921; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1922; SI-NEXT: .LBB16_2: ; %bb.1 1923; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1924; SI-NEXT: s_setpc_b64 s[30:31] 1925; 1926; GFX10-WAVE64-LABEL: skip_mode_switch: 1927; GFX10-WAVE64: ; %bb.0: ; %entry 1928; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1929; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1930; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1931; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1932; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB16_2 1933; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb.0 1934; GFX10-WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1935; GFX10-WAVE64-NEXT: .LBB16_2: ; %bb.1 1936; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1937; GFX10-WAVE64-NEXT: s_setpc_b64 s[30:31] 1938; 1939; GFX10-WAVE32-LABEL: skip_mode_switch: 1940; GFX10-WAVE32: ; %bb.0: ; %entry 1941; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1942; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1943; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1944; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo 1945; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2 1946; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb.0 1947; GFX10-WAVE32-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1948; GFX10-WAVE32-NEXT: .LBB16_2: ; %bb.1 1949; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s4 1950; GFX10-WAVE32-NEXT: s_setpc_b64 s[30:31] 1951; 1952; GFX11-LABEL: skip_mode_switch: 1953; GFX11: ; %bb.0: ; %entry 1954; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1955; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1956; GFX11-NEXT: s_mov_b64 s[0:1], exec 1957; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 1958; GFX11-NEXT: s_cbranch_execz .LBB16_2 1959; GFX11-NEXT: ; %bb.1: ; %bb.0 1960; GFX11-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1961; GFX11-NEXT: .LBB16_2: ; %bb.1 1962; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1963; GFX11-NEXT: s_setpc_b64 s[30:31] 1964entry: 1965 %cmp = icmp eq i32 %arg, 0 1966 br i1 %cmp, label %bb.0, label %bb.1 1967 1968bb.0: 1969 call void @llvm.amdgcn.s.setreg(i32 2049, i32 3) 1970 br label %bb.1 1971 1972bb.1: 1973 ret void 1974} 1975 1976declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3 1977declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 1978declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1979declare void @llvm.amdgcn.kill(i1) #0 1980 1981declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) 1982 1983attributes #0 = { nounwind } 1984attributes #1 = { nounwind readonly } 1985attributes #2 = { nounwind readnone speculatable } 1986attributes #3 = { inaccessiblememonly nounwind writeonly } 1987