1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s 5; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 6 7define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { 8; GCN-LABEL: test_kill_depth_0_imm_pos: 9; GCN: ; %bb.0: 10; GCN-NEXT: s_endpgm 11 call void @llvm.amdgcn.kill(i1 true) 12 ret void 13} 14 15define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { 16; WAVE64-LABEL: test_kill_depth_0_imm_neg: 17; WAVE64: ; %bb.0: 18; WAVE64-NEXT: s_andn2_b64 exec, exec, exec 19; WAVE64-NEXT: s_cbranch_scc0 .LBB1_1 20; WAVE64-NEXT: s_endpgm 21; WAVE64-NEXT: .LBB1_1: 22; WAVE64-NEXT: s_mov_b64 exec, 0 23; WAVE64-NEXT: exp null off, off, off, off done vm 24; WAVE64-NEXT: s_endpgm 25; 26; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg: 27; GFX10-WAVE32: ; %bb.0: 28; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo 29; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB1_1 30; GFX10-WAVE32-NEXT: s_endpgm 31; GFX10-WAVE32-NEXT: .LBB1_1: 32; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 33; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 34; GFX10-WAVE32-NEXT: s_endpgm 35; 36; GFX11-LABEL: test_kill_depth_0_imm_neg: 37; GFX11: ; %bb.0: 38; GFX11-NEXT: s_and_not1_b64 exec, exec, exec 39; GFX11-NEXT: s_cbranch_scc0 .LBB1_1 40; GFX11-NEXT: s_endpgm 41; GFX11-NEXT: .LBB1_1: 42; GFX11-NEXT: s_mov_b64 exec, 0 43; GFX11-NEXT: exp mrt0 off, off, off, off done 44; GFX11-NEXT: s_endpgm 45 call void @llvm.amdgcn.kill(i1 false) 46 ret void 47} 48 49; FIXME: Ideally only one early-exit would be emitted 50define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { 51; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2: 52; WAVE64: ; %bb.0: 53; WAVE64-NEXT: s_mov_b64 s[0:1], exec 54; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 55; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2 56; WAVE64-NEXT: ; %bb.1: 57; WAVE64-NEXT: s_mov_b64 exec, 0 58; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 59; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2 60; WAVE64-NEXT: s_endpgm 61; WAVE64-NEXT: .LBB2_2: 62; WAVE64-NEXT: s_mov_b64 exec, 0 63; WAVE64-NEXT: exp null off, off, off, off done vm 64; WAVE64-NEXT: s_endpgm 65; 66; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2: 67; GFX10-WAVE32: ; %bb.0: 68; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 69; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 70; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2 71; GFX10-WAVE32-NEXT: ; %bb.1: 72; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 73; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 74; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2 75; GFX10-WAVE32-NEXT: s_endpgm 76; GFX10-WAVE32-NEXT: .LBB2_2: 77; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 78; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 79; GFX10-WAVE32-NEXT: s_endpgm 80; 81; GFX11-LABEL: test_kill_depth_0_imm_neg_x2: 82; GFX11: ; %bb.0: 83; GFX11-NEXT: s_mov_b64 s[0:1], exec 84; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 85; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 86; GFX11-NEXT: s_cbranch_scc0 .LBB2_2 87; GFX11-NEXT: ; %bb.1: 88; GFX11-NEXT: s_mov_b64 exec, 0 89; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 90; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 91; GFX11-NEXT: s_cbranch_scc0 .LBB2_2 92; GFX11-NEXT: s_endpgm 93; GFX11-NEXT: .LBB2_2: 94; GFX11-NEXT: s_mov_b64 exec, 0 95; GFX11-NEXT: exp mrt0 off, off, off, off done 96; GFX11-NEXT: s_endpgm 97 call void @llvm.amdgcn.kill(i1 false) 98 call void @llvm.amdgcn.kill(i1 false) 99 ret void 100} 101 102define amdgpu_ps void @test_kill_depth_var(float %x) #0 { 103; WAVE64-LABEL: test_kill_depth_var: 104; WAVE64: ; %bb.0: 105; WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 106; WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 107; WAVE64-NEXT: s_cbranch_scc0 .LBB3_1 108; WAVE64-NEXT: s_endpgm 109; WAVE64-NEXT: .LBB3_1: 110; WAVE64-NEXT: s_mov_b64 exec, 0 111; WAVE64-NEXT: exp null off, off, off, off done vm 112; WAVE64-NEXT: s_endpgm 113; 114; GFX10-WAVE32-LABEL: test_kill_depth_var: 115; GFX10-WAVE32: ; %bb.0: 116; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 117; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 118; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB3_1 119; GFX10-WAVE32-NEXT: s_endpgm 120; GFX10-WAVE32-NEXT: .LBB3_1: 121; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 122; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 123; GFX10-WAVE32-NEXT: s_endpgm 124; 125; GFX11-LABEL: test_kill_depth_var: 126; GFX11: ; %bb.0: 127; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 128; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 129; GFX11-NEXT: s_cbranch_scc0 .LBB3_1 130; GFX11-NEXT: s_endpgm 131; GFX11-NEXT: .LBB3_1: 132; GFX11-NEXT: s_mov_b64 exec, 0 133; GFX11-NEXT: exp mrt0 off, off, off, off done 134; GFX11-NEXT: s_endpgm 135 %cmp = fcmp olt float %x, 0.0 136 call void @llvm.amdgcn.kill(i1 %cmp) 137 ret void 138} 139 140; FIXME: Ideally only one early-exit would be emitted 141define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { 142; SI-LABEL: test_kill_depth_var_x2_same: 143; SI: ; %bb.0: 144; SI-NEXT: s_mov_b64 s[0:1], exec 145; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 146; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 147; SI-NEXT: s_cbranch_scc0 .LBB4_2 148; SI-NEXT: ; %bb.1: 149; SI-NEXT: s_andn2_b64 exec, exec, vcc 150; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 151; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 152; SI-NEXT: s_cbranch_scc0 .LBB4_2 153; SI-NEXT: s_endpgm 154; SI-NEXT: .LBB4_2: 155; SI-NEXT: s_mov_b64 exec, 0 156; SI-NEXT: exp null off, off, off, off done vm 157; SI-NEXT: s_endpgm 158; 159; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same: 160; GFX10-WAVE64: ; %bb.0: 161; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 162; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 163; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 164; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2 165; GFX10-WAVE64-NEXT: ; %bb.1: 166; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 167; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 168; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 169; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2 170; GFX10-WAVE64-NEXT: s_endpgm 171; GFX10-WAVE64-NEXT: .LBB4_2: 172; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 173; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 174; GFX10-WAVE64-NEXT: s_endpgm 175; 176; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same: 177; GFX10-WAVE32: ; %bb.0: 178; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 179; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 180; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 181; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2 182; GFX10-WAVE32-NEXT: ; %bb.1: 183; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 184; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 185; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 186; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2 187; GFX10-WAVE32-NEXT: s_endpgm 188; GFX10-WAVE32-NEXT: .LBB4_2: 189; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 190; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 191; GFX10-WAVE32-NEXT: s_endpgm 192; 193; GFX11-LABEL: test_kill_depth_var_x2_same: 194; GFX11: ; %bb.0: 195; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 196; GFX11-NEXT: s_mov_b64 s[0:1], exec 197; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 198; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 199; GFX11-NEXT: s_cbranch_scc0 .LBB4_2 200; GFX11-NEXT: ; %bb.1: 201; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 202; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 203; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 204; GFX11-NEXT: s_cbranch_scc0 .LBB4_2 205; GFX11-NEXT: s_endpgm 206; GFX11-NEXT: .LBB4_2: 207; GFX11-NEXT: s_mov_b64 exec, 0 208; GFX11-NEXT: exp mrt0 off, off, off, off done 209; GFX11-NEXT: s_endpgm 210 %cmp = fcmp olt float %x, 0.0 211 call void @llvm.amdgcn.kill(i1 %cmp) 212 call void @llvm.amdgcn.kill(i1 %cmp) 213 ret void 214} 215 216; FIXME: Ideally only one early-exit would be emitted 217define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { 218; SI-LABEL: test_kill_depth_var_x2: 219; SI: ; %bb.0: 220; SI-NEXT: s_mov_b64 s[0:1], exec 221; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 222; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 223; SI-NEXT: s_cbranch_scc0 .LBB5_2 224; SI-NEXT: ; %bb.1: 225; SI-NEXT: s_andn2_b64 exec, exec, vcc 226; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 227; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 228; SI-NEXT: s_cbranch_scc0 .LBB5_2 229; SI-NEXT: s_endpgm 230; SI-NEXT: .LBB5_2: 231; SI-NEXT: s_mov_b64 exec, 0 232; SI-NEXT: exp null off, off, off, off done vm 233; SI-NEXT: s_endpgm 234; 235; GFX10-WAVE64-LABEL: test_kill_depth_var_x2: 236; GFX10-WAVE64: ; %bb.0: 237; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 238; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 239; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 240; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2 241; GFX10-WAVE64-NEXT: ; %bb.1: 242; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 243; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 244; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 245; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2 246; GFX10-WAVE64-NEXT: s_endpgm 247; GFX10-WAVE64-NEXT: .LBB5_2: 248; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 249; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 250; GFX10-WAVE64-NEXT: s_endpgm 251; 252; GFX10-WAVE32-LABEL: test_kill_depth_var_x2: 253; GFX10-WAVE32: ; %bb.0: 254; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 255; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 256; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 257; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2 258; GFX10-WAVE32-NEXT: ; %bb.1: 259; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 260; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1 261; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 262; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2 263; GFX10-WAVE32-NEXT: s_endpgm 264; GFX10-WAVE32-NEXT: .LBB5_2: 265; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 266; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 267; GFX10-WAVE32-NEXT: s_endpgm 268; 269; GFX11-LABEL: test_kill_depth_var_x2: 270; GFX11: ; %bb.0: 271; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 272; GFX11-NEXT: s_mov_b64 s[0:1], exec 273; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 274; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 275; GFX11-NEXT: s_cbranch_scc0 .LBB5_2 276; GFX11-NEXT: ; %bb.1: 277; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 278; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 279; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 280; GFX11-NEXT: s_cbranch_scc0 .LBB5_2 281; GFX11-NEXT: s_endpgm 282; GFX11-NEXT: .LBB5_2: 283; GFX11-NEXT: s_mov_b64 exec, 0 284; GFX11-NEXT: exp mrt0 off, off, off, off done 285; GFX11-NEXT: s_endpgm 286 %cmp.x = fcmp olt float %x, 0.0 287 call void @llvm.amdgcn.kill(i1 %cmp.x) 288 %cmp.y = fcmp olt float %y, 0.0 289 call void @llvm.amdgcn.kill(i1 %cmp.y) 290 ret void 291} 292 293define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { 294; SI-LABEL: test_kill_depth_var_x2_instructions: 295; SI: ; %bb.0: 296; SI-NEXT: s_mov_b64 s[0:1], exec 297; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 298; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 299; SI-NEXT: s_cbranch_scc0 .LBB6_2 300; SI-NEXT: ; %bb.1: 301; SI-NEXT: s_andn2_b64 exec, exec, vcc 302; SI-NEXT: ;;#ASMSTART 303; SI-NEXT: v_mov_b32_e64 v7, -1 304; SI-NEXT: ;;#ASMEND 305; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 306; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 307; SI-NEXT: s_cbranch_scc0 .LBB6_2 308; SI-NEXT: s_endpgm 309; SI-NEXT: .LBB6_2: 310; SI-NEXT: s_mov_b64 exec, 0 311; SI-NEXT: exp null off, off, off, off done vm 312; SI-NEXT: s_endpgm 313; 314; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions: 315; GFX10-WAVE64: ; %bb.0: 316; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 317; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 318; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 319; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2 320; GFX10-WAVE64-NEXT: ; %bb.1: 321; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 322; GFX10-WAVE64-NEXT: ;;#ASMSTART 323; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 324; GFX10-WAVE64-NEXT: ;;#ASMEND 325; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 326; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 327; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2 328; GFX10-WAVE64-NEXT: s_endpgm 329; GFX10-WAVE64-NEXT: .LBB6_2: 330; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 331; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 332; GFX10-WAVE64-NEXT: s_endpgm 333; 334; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions: 335; GFX10-WAVE32: ; %bb.0: 336; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 337; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 338; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 339; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2 340; GFX10-WAVE32-NEXT: ; %bb.1: 341; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 342; GFX10-WAVE32-NEXT: ;;#ASMSTART 343; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 344; GFX10-WAVE32-NEXT: ;;#ASMEND 345; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 346; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 347; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2 348; GFX10-WAVE32-NEXT: s_endpgm 349; GFX10-WAVE32-NEXT: .LBB6_2: 350; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 351; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 352; GFX10-WAVE32-NEXT: s_endpgm 353; 354; GFX11-LABEL: test_kill_depth_var_x2_instructions: 355; GFX11: ; %bb.0: 356; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 357; GFX11-NEXT: s_mov_b64 s[0:1], exec 358; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 359; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 360; GFX11-NEXT: s_cbranch_scc0 .LBB6_2 361; GFX11-NEXT: ; %bb.1: 362; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 363; GFX11-NEXT: ;;#ASMSTART 364; GFX11-NEXT: v_mov_b32_e64 v7, -1 365; GFX11-NEXT: ;;#ASMEND 366; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 367; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 368; GFX11-NEXT: s_cbranch_scc0 .LBB6_2 369; GFX11-NEXT: s_endpgm 370; GFX11-NEXT: .LBB6_2: 371; GFX11-NEXT: s_mov_b64 exec, 0 372; GFX11-NEXT: exp mrt0 off, off, off, off done 373; GFX11-NEXT: s_endpgm 374 %cmp.x = fcmp olt float %x, 0.0 375 call void @llvm.amdgcn.kill(i1 %cmp.x) 376 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() 377 %cmp.y = fcmp olt float %y, 0.0 378 call void @llvm.amdgcn.kill(i1 %cmp.y) 379 ret void 380} 381 382; FIXME: why does the skip depend on the asm length in the same block? 383define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 { 384; SI-LABEL: test_kill_control_flow: 385; SI: ; %bb.0: ; %entry 386; SI-NEXT: s_cmp_lg_u32 s0, 0 387; SI-NEXT: s_cbranch_scc0 .LBB7_2 388; SI-NEXT: ; %bb.1: ; %exit 389; SI-NEXT: v_mov_b32_e32 v0, 1.0 390; SI-NEXT: s_branch .LBB7_5 391; SI-NEXT: .LBB7_2: ; %bb 392; SI-NEXT: s_mov_b64 s[2:3], exec 393; SI-NEXT: ;;#ASMSTART 394; SI-NEXT: v_mov_b32_e64 v7, -1 395; SI-NEXT: v_nop_e64 396; SI-NEXT: v_nop_e64 397; SI-NEXT: v_nop_e64 398; SI-NEXT: v_nop_e64 399; SI-NEXT: v_nop_e64 400; SI-NEXT: v_nop_e64 401; SI-NEXT: v_nop_e64 402; SI-NEXT: v_nop_e64 403; SI-NEXT: v_nop_e64 404; SI-NEXT: v_nop_e64 405; SI-NEXT: ;;#ASMEND 406; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 407; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 408; SI-NEXT: s_cbranch_scc0 .LBB7_4 409; SI-NEXT: ; %bb.3: ; %bb 410; SI-NEXT: s_andn2_b64 exec, exec, vcc 411; SI-NEXT: v_mov_b32_e32 v0, 1.0 412; SI-NEXT: s_branch .LBB7_5 413; SI-NEXT: .LBB7_4: 414; SI-NEXT: s_mov_b64 exec, 0 415; SI-NEXT: exp null off, off, off, off done vm 416; SI-NEXT: s_endpgm 417; SI-NEXT: .LBB7_5: 418; 419; GFX10-WAVE64-LABEL: test_kill_control_flow: 420; GFX10-WAVE64: ; %bb.0: ; %entry 421; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 422; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_2 423; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit 424; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0 425; GFX10-WAVE64-NEXT: s_branch .LBB7_5 426; GFX10-WAVE64-NEXT: .LBB7_2: ; %bb 427; GFX10-WAVE64-NEXT: ;;#ASMSTART 428; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 429; GFX10-WAVE64-NEXT: v_nop_e64 430; GFX10-WAVE64-NEXT: v_nop_e64 431; GFX10-WAVE64-NEXT: v_nop_e64 432; GFX10-WAVE64-NEXT: v_nop_e64 433; GFX10-WAVE64-NEXT: v_nop_e64 434; GFX10-WAVE64-NEXT: v_nop_e64 435; GFX10-WAVE64-NEXT: v_nop_e64 436; GFX10-WAVE64-NEXT: v_nop_e64 437; GFX10-WAVE64-NEXT: v_nop_e64 438; GFX10-WAVE64-NEXT: v_nop_e64 439; GFX10-WAVE64-NEXT: ;;#ASMEND 440; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 441; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 442; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 443; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_4 444; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb 445; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 446; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0 447; GFX10-WAVE64-NEXT: s_branch .LBB7_5 448; GFX10-WAVE64-NEXT: .LBB7_4: 449; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 450; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 451; GFX10-WAVE64-NEXT: s_endpgm 452; GFX10-WAVE64-NEXT: .LBB7_5: 453; 454; GFX10-WAVE32-LABEL: test_kill_control_flow: 455; GFX10-WAVE32: ; %bb.0: ; %entry 456; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 457; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_2 458; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit 459; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0 460; GFX10-WAVE32-NEXT: s_branch .LBB7_5 461; GFX10-WAVE32-NEXT: .LBB7_2: ; %bb 462; GFX10-WAVE32-NEXT: ;;#ASMSTART 463; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 464; GFX10-WAVE32-NEXT: v_nop_e64 465; GFX10-WAVE32-NEXT: v_nop_e64 466; GFX10-WAVE32-NEXT: v_nop_e64 467; GFX10-WAVE32-NEXT: v_nop_e64 468; GFX10-WAVE32-NEXT: v_nop_e64 469; GFX10-WAVE32-NEXT: v_nop_e64 470; GFX10-WAVE32-NEXT: v_nop_e64 471; GFX10-WAVE32-NEXT: v_nop_e64 472; GFX10-WAVE32-NEXT: v_nop_e64 473; GFX10-WAVE32-NEXT: v_nop_e64 474; GFX10-WAVE32-NEXT: ;;#ASMEND 475; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 476; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 477; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 478; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_4 479; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb 480; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 481; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0 482; GFX10-WAVE32-NEXT: s_branch .LBB7_5 483; GFX10-WAVE32-NEXT: .LBB7_4: 484; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 485; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 486; GFX10-WAVE32-NEXT: s_endpgm 487; GFX10-WAVE32-NEXT: .LBB7_5: 488; 489; GFX11-LABEL: test_kill_control_flow: 490; GFX11: ; %bb.0: ; %entry 491; GFX11-NEXT: s_cmp_lg_u32 s0, 0 492; GFX11-NEXT: s_cbranch_scc0 .LBB7_2 493; GFX11-NEXT: ; %bb.1: ; %exit 494; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 495; GFX11-NEXT: s_branch .LBB7_5 496; GFX11-NEXT: .LBB7_2: ; %bb 497; GFX11-NEXT: ;;#ASMSTART 498; GFX11-NEXT: v_mov_b32_e64 v7, -1 499; GFX11-NEXT: v_nop_e64 500; GFX11-NEXT: v_nop_e64 501; GFX11-NEXT: v_nop_e64 502; GFX11-NEXT: v_nop_e64 503; GFX11-NEXT: v_nop_e64 504; GFX11-NEXT: v_nop_e64 505; GFX11-NEXT: v_nop_e64 506; GFX11-NEXT: v_nop_e64 507; GFX11-NEXT: v_nop_e64 508; GFX11-NEXT: v_nop_e64 509; GFX11-NEXT: ;;#ASMEND 510; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 511; GFX11-NEXT: s_mov_b64 s[2:3], exec 512; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 513; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc 514; GFX11-NEXT: s_cbranch_scc0 .LBB7_4 515; GFX11-NEXT: ; %bb.3: ; %bb 516; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 517; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 518; GFX11-NEXT: s_branch .LBB7_5 519; GFX11-NEXT: .LBB7_4: 520; GFX11-NEXT: s_mov_b64 exec, 0 521; GFX11-NEXT: exp mrt0 off, off, off, off done 522; GFX11-NEXT: s_endpgm 523; GFX11-NEXT: .LBB7_5: 524entry: 525 %cmp = icmp eq i32 %arg, 0 526 br i1 %cmp, label %bb, label %exit 527 528bb: 529 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 530 v_nop_e64 531 v_nop_e64 532 v_nop_e64 533 v_nop_e64 534 v_nop_e64 535 v_nop_e64 536 v_nop_e64 537 v_nop_e64 538 v_nop_e64 539 v_nop_e64", "={v7}"() 540 %cmp.var = fcmp olt float %var, 0.0 541 ; TODO: We could do an early-exit here (the branch above is uniform!) 542 call void @llvm.amdgcn.kill(i1 %cmp.var) 543 br label %exit 544 545exit: 546 ret float 1.0 547} 548 549define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 { 550; SI-LABEL: test_kill_control_flow_remainder: 551; SI: ; %bb.0: ; %entry 552; SI-NEXT: s_cmp_lg_u32 s0, 0 553; SI-NEXT: v_mov_b32_e32 v9, 0 554; SI-NEXT: s_cbranch_scc1 .LBB8_3 555; SI-NEXT: ; %bb.1: ; %bb 556; SI-NEXT: s_mov_b64 s[2:3], exec 557; SI-NEXT: ;;#ASMSTART 558; SI-NEXT: v_mov_b32_e64 v7, -1 559; SI-NEXT: v_nop_e64 560; SI-NEXT: v_nop_e64 561; SI-NEXT: v_nop_e64 562; SI-NEXT: v_nop_e64 563; SI-NEXT: v_nop_e64 564; SI-NEXT: v_nop_e64 565; SI-NEXT: v_nop_e64 566; SI-NEXT: v_nop_e64 567; SI-NEXT: v_nop_e64 568; SI-NEXT: v_nop_e64 569; SI-NEXT: v_nop_e64 570; SI-NEXT: ;;#ASMEND 571; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 572; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 573; SI-NEXT: ;;#ASMSTART 574; SI-NEXT: v_mov_b32_e64 v8, -1 575; SI-NEXT: ;;#ASMEND 576; SI-NEXT: s_cbranch_scc0 .LBB8_4 577; SI-NEXT: ; %bb.2: ; %bb 578; SI-NEXT: s_andn2_b64 exec, exec, vcc 579; SI-NEXT: s_mov_b32 s3, 0xf000 580; SI-NEXT: s_mov_b32 s2, -1 581; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 582; SI-NEXT: s_waitcnt vmcnt(0) 583; SI-NEXT: ;;#ASMSTART 584; SI-NEXT: v_mov_b32_e64 v9, -2 585; SI-NEXT: ;;#ASMEND 586; SI-NEXT: .LBB8_3: ; %exit 587; SI-NEXT: s_mov_b32 s3, 0xf000 588; SI-NEXT: s_mov_b32 s2, -1 589; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 590; SI-NEXT: s_endpgm 591; SI-NEXT: .LBB8_4: 592; SI-NEXT: s_mov_b64 exec, 0 593; SI-NEXT: exp null off, off, off, off done vm 594; SI-NEXT: s_endpgm 595; 596; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder: 597; GFX10-WAVE64: ; %bb.0: ; %entry 598; GFX10-WAVE64-NEXT: v_mov_b32_e32 v9, 0 599; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 600; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_2 601; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit 602; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off 603; GFX10-WAVE64-NEXT: s_endpgm 604; GFX10-WAVE64-NEXT: .LBB8_2: ; %bb 605; GFX10-WAVE64-NEXT: ;;#ASMSTART 606; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 607; GFX10-WAVE64-NEXT: v_nop_e64 608; GFX10-WAVE64-NEXT: v_nop_e64 609; GFX10-WAVE64-NEXT: v_nop_e64 610; GFX10-WAVE64-NEXT: v_nop_e64 611; GFX10-WAVE64-NEXT: v_nop_e64 612; GFX10-WAVE64-NEXT: v_nop_e64 613; GFX10-WAVE64-NEXT: v_nop_e64 614; GFX10-WAVE64-NEXT: v_nop_e64 615; GFX10-WAVE64-NEXT: v_nop_e64 616; GFX10-WAVE64-NEXT: v_nop_e64 617; GFX10-WAVE64-NEXT: v_nop_e64 618; GFX10-WAVE64-NEXT: ;;#ASMEND 619; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 620; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 621; GFX10-WAVE64-NEXT: ;;#ASMSTART 622; GFX10-WAVE64-NEXT: v_mov_b32_e64 v8, -1 623; GFX10-WAVE64-NEXT: ;;#ASMEND 624; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 625; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_4 626; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb 627; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 628; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v8, off 629; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 630; GFX10-WAVE64-NEXT: ;;#ASMSTART 631; GFX10-WAVE64-NEXT: v_mov_b32_e64 v9, -2 632; GFX10-WAVE64-NEXT: ;;#ASMEND 633; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off 634; GFX10-WAVE64-NEXT: s_endpgm 635; GFX10-WAVE64-NEXT: .LBB8_4: 636; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 637; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 638; GFX10-WAVE64-NEXT: s_endpgm 639; 640; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder: 641; GFX10-WAVE32: ; %bb.0: ; %entry 642; GFX10-WAVE32-NEXT: v_mov_b32_e32 v9, 0 643; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 644; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_2 645; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit 646; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off 647; GFX10-WAVE32-NEXT: s_endpgm 648; GFX10-WAVE32-NEXT: .LBB8_2: ; %bb 649; GFX10-WAVE32-NEXT: ;;#ASMSTART 650; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 651; GFX10-WAVE32-NEXT: v_nop_e64 652; GFX10-WAVE32-NEXT: v_nop_e64 653; GFX10-WAVE32-NEXT: v_nop_e64 654; GFX10-WAVE32-NEXT: v_nop_e64 655; GFX10-WAVE32-NEXT: v_nop_e64 656; GFX10-WAVE32-NEXT: v_nop_e64 657; GFX10-WAVE32-NEXT: v_nop_e64 658; GFX10-WAVE32-NEXT: v_nop_e64 659; GFX10-WAVE32-NEXT: v_nop_e64 660; GFX10-WAVE32-NEXT: v_nop_e64 661; GFX10-WAVE32-NEXT: v_nop_e64 662; GFX10-WAVE32-NEXT: ;;#ASMEND 663; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 664; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 665; GFX10-WAVE32-NEXT: ;;#ASMSTART 666; GFX10-WAVE32-NEXT: v_mov_b32_e64 v8, -1 667; GFX10-WAVE32-NEXT: ;;#ASMEND 668; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 669; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_4 670; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb 671; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 672; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v8, off 673; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 674; GFX10-WAVE32-NEXT: ;;#ASMSTART 675; GFX10-WAVE32-NEXT: v_mov_b32_e64 v9, -2 676; GFX10-WAVE32-NEXT: ;;#ASMEND 677; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off 678; GFX10-WAVE32-NEXT: s_endpgm 679; GFX10-WAVE32-NEXT: .LBB8_4: 680; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 681; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 682; GFX10-WAVE32-NEXT: s_endpgm 683; 684; GFX11-LABEL: test_kill_control_flow_remainder: 685; GFX11: ; %bb.0: ; %entry 686; GFX11-NEXT: v_mov_b32_e32 v9, 0 687; GFX11-NEXT: s_cmp_lg_u32 s0, 0 688; GFX11-NEXT: s_cbranch_scc0 .LBB8_2 689; GFX11-NEXT: ; %bb.1: ; %exit 690; GFX11-NEXT: global_store_b32 v[0:1], v9, off 691; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 692; GFX11-NEXT: s_endpgm 693; GFX11-NEXT: .LBB8_2: ; %bb 694; GFX11-NEXT: ;;#ASMSTART 695; GFX11-NEXT: v_mov_b32_e64 v7, -1 696; GFX11-NEXT: v_nop_e64 697; GFX11-NEXT: v_nop_e64 698; GFX11-NEXT: v_nop_e64 699; GFX11-NEXT: v_nop_e64 700; GFX11-NEXT: v_nop_e64 701; GFX11-NEXT: v_nop_e64 702; GFX11-NEXT: v_nop_e64 703; GFX11-NEXT: v_nop_e64 704; GFX11-NEXT: v_nop_e64 705; GFX11-NEXT: v_nop_e64 706; GFX11-NEXT: v_nop_e64 707; GFX11-NEXT: ;;#ASMEND 708; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 709; GFX11-NEXT: s_mov_b64 s[2:3], exec 710; GFX11-NEXT: ;;#ASMSTART 711; GFX11-NEXT: v_mov_b32_e64 v8, -1 712; GFX11-NEXT: ;;#ASMEND 713; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc 714; GFX11-NEXT: s_cbranch_scc0 .LBB8_4 715; GFX11-NEXT: ; %bb.3: ; %bb 716; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 717; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc 718; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 719; GFX11-NEXT: ;;#ASMSTART 720; GFX11-NEXT: v_mov_b32_e64 v9, -2 721; GFX11-NEXT: ;;#ASMEND 722; GFX11-NEXT: global_store_b32 v[0:1], v9, off 723; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 724; GFX11-NEXT: s_endpgm 725; GFX11-NEXT: .LBB8_4: 726; GFX11-NEXT: s_mov_b64 exec, 0 727; GFX11-NEXT: exp mrt0 off, off, off, off done 728; GFX11-NEXT: s_endpgm 729entry: 730 %cmp = icmp eq i32 %arg, 0 731 br i1 %cmp, label %bb, label %exit 732 733bb: 734 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 735 v_nop_e64 736 v_nop_e64 737 v_nop_e64 738 v_nop_e64 739 v_nop_e64 740 v_nop_e64 741 v_nop_e64 742 v_nop_e64 743 v_nop_e64 744 v_nop_e64 745 v_nop_e64", "={v7}"() 746 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() 747 %cmp.var = fcmp olt float %var, 0.0 748 ; TODO: We could do an early-exit here (the branch above is uniform!) 749 call void @llvm.amdgcn.kill(i1 %cmp.var) 750 store volatile float %live.across, float addrspace(1)* undef 751 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() 752 br label %exit 753 754exit: 755 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ] 756 store float %phi, float addrspace(1)* undef 757 ret void 758} 759 760define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 { 761; SI-LABEL: test_kill_control_flow_return: 762; SI: ; %bb.0: ; %entry 763; SI-NEXT: s_cmp_eq_u32 s0, 1 764; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 765; SI-NEXT: s_mov_b64 s[2:3], exec 766; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec 767; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] 768; SI-NEXT: s_cbranch_scc0 .LBB9_4 769; SI-NEXT: ; %bb.1: ; %entry 770; SI-NEXT: s_and_b64 exec, exec, s[2:3] 771; SI-NEXT: s_cmp_lg_u32 s0, 0 772; SI-NEXT: v_mov_b32_e32 v0, 0 773; SI-NEXT: s_cbranch_scc0 .LBB9_3 774; SI-NEXT: ; %bb.2: ; %exit 775; SI-NEXT: s_branch .LBB9_5 776; SI-NEXT: .LBB9_3: ; %bb 777; SI-NEXT: ;;#ASMSTART 778; SI-NEXT: v_mov_b32_e64 v7, -1 779; SI-NEXT: v_nop_e64 780; SI-NEXT: v_nop_e64 781; SI-NEXT: v_nop_e64 782; SI-NEXT: v_nop_e64 783; SI-NEXT: v_nop_e64 784; SI-NEXT: v_nop_e64 785; SI-NEXT: v_nop_e64 786; SI-NEXT: v_nop_e64 787; SI-NEXT: v_nop_e64 788; SI-NEXT: v_nop_e64 789; SI-NEXT: ;;#ASMEND 790; SI-NEXT: v_mov_b32_e32 v0, v7 791; SI-NEXT: s_branch .LBB9_5 792; SI-NEXT: .LBB9_4: 793; SI-NEXT: s_mov_b64 exec, 0 794; SI-NEXT: exp null off, off, off, off done vm 795; SI-NEXT: s_endpgm 796; SI-NEXT: .LBB9_5: 797; 798; GFX10-WAVE64-LABEL: test_kill_control_flow_return: 799; GFX10-WAVE64: ; %bb.0: ; %entry 800; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1 801; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 802; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0 803; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec 804; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] 805; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4 806; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry 807; GFX10-WAVE64-NEXT: s_and_b64 exec, exec, s[2:3] 808; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 0 809; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 810; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_3 811; GFX10-WAVE64-NEXT: ; %bb.2: ; %exit 812; GFX10-WAVE64-NEXT: s_branch .LBB9_5 813; GFX10-WAVE64-NEXT: .LBB9_3: ; %bb 814; GFX10-WAVE64-NEXT: ;;#ASMSTART 815; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 816; GFX10-WAVE64-NEXT: v_nop_e64 817; GFX10-WAVE64-NEXT: v_nop_e64 818; GFX10-WAVE64-NEXT: v_nop_e64 819; GFX10-WAVE64-NEXT: v_nop_e64 820; GFX10-WAVE64-NEXT: v_nop_e64 821; GFX10-WAVE64-NEXT: v_nop_e64 822; GFX10-WAVE64-NEXT: v_nop_e64 823; GFX10-WAVE64-NEXT: v_nop_e64 824; GFX10-WAVE64-NEXT: v_nop_e64 825; GFX10-WAVE64-NEXT: v_nop_e64 826; GFX10-WAVE64-NEXT: ;;#ASMEND 827; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, v7 828; GFX10-WAVE64-NEXT: s_branch .LBB9_5 829; GFX10-WAVE64-NEXT: .LBB9_4: 830; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 831; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 832; GFX10-WAVE64-NEXT: s_endpgm 833; GFX10-WAVE64-NEXT: .LBB9_5: 834; 835; GFX10-WAVE32-LABEL: test_kill_control_flow_return: 836; GFX10-WAVE32: ; %bb.0: ; %entry 837; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1 838; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 839; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0 840; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo 841; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2 842; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4 843; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry 844; GFX10-WAVE32-NEXT: s_and_b32 exec_lo, exec_lo, s1 845; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 0 846; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 847; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_3 848; GFX10-WAVE32-NEXT: ; %bb.2: ; %exit 849; GFX10-WAVE32-NEXT: s_branch .LBB9_5 850; GFX10-WAVE32-NEXT: .LBB9_3: ; %bb 851; GFX10-WAVE32-NEXT: ;;#ASMSTART 852; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 853; GFX10-WAVE32-NEXT: v_nop_e64 854; GFX10-WAVE32-NEXT: v_nop_e64 855; GFX10-WAVE32-NEXT: v_nop_e64 856; GFX10-WAVE32-NEXT: v_nop_e64 857; GFX10-WAVE32-NEXT: v_nop_e64 858; GFX10-WAVE32-NEXT: v_nop_e64 859; GFX10-WAVE32-NEXT: v_nop_e64 860; GFX10-WAVE32-NEXT: v_nop_e64 861; GFX10-WAVE32-NEXT: v_nop_e64 862; GFX10-WAVE32-NEXT: v_nop_e64 863; GFX10-WAVE32-NEXT: ;;#ASMEND 864; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, v7 865; GFX10-WAVE32-NEXT: s_branch .LBB9_5 866; GFX10-WAVE32-NEXT: .LBB9_4: 867; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 868; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 869; GFX10-WAVE32-NEXT: s_endpgm 870; GFX10-WAVE32-NEXT: .LBB9_5: 871; 872; GFX11-LABEL: test_kill_control_flow_return: 873; GFX11: ; %bb.0: ; %entry 874; GFX11-NEXT: s_cmp_eq_u32 s0, 1 875; GFX11-NEXT: s_mov_b64 s[2:3], exec 876; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0 877; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 878; GFX11-NEXT: s_xor_b64 s[4:5], s[4:5], exec 879; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[4:5] 880; GFX11-NEXT: s_cbranch_scc0 .LBB9_4 881; GFX11-NEXT: ; %bb.1: ; %entry 882; GFX11-NEXT: s_and_b64 exec, exec, s[2:3] 883; GFX11-NEXT: v_mov_b32_e32 v0, 0 884; GFX11-NEXT: s_cmp_lg_u32 s0, 0 885; GFX11-NEXT: s_cbranch_scc0 .LBB9_3 886; GFX11-NEXT: ; %bb.2: ; %exit 887; GFX11-NEXT: s_branch .LBB9_5 888; GFX11-NEXT: .LBB9_3: ; %bb 889; GFX11-NEXT: ;;#ASMSTART 890; GFX11-NEXT: v_mov_b32_e64 v7, -1 891; GFX11-NEXT: v_nop_e64 892; GFX11-NEXT: v_nop_e64 893; GFX11-NEXT: v_nop_e64 894; GFX11-NEXT: v_nop_e64 895; GFX11-NEXT: v_nop_e64 896; GFX11-NEXT: v_nop_e64 897; GFX11-NEXT: v_nop_e64 898; GFX11-NEXT: v_nop_e64 899; GFX11-NEXT: v_nop_e64 900; GFX11-NEXT: v_nop_e64 901; GFX11-NEXT: ;;#ASMEND 902; GFX11-NEXT: v_mov_b32_e32 v0, v7 903; GFX11-NEXT: s_branch .LBB9_5 904; GFX11-NEXT: .LBB9_4: 905; GFX11-NEXT: s_mov_b64 exec, 0 906; GFX11-NEXT: exp mrt0 off, off, off, off done 907; GFX11-NEXT: s_endpgm 908; GFX11-NEXT: .LBB9_5: 909entry: 910 %kill = icmp eq i32 %arg, 1 911 %cmp = icmp eq i32 %arg, 0 912 call void @llvm.amdgcn.kill(i1 %kill) 913 br i1 %cmp, label %bb, label %exit 914 915bb: 916 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 917 v_nop_e64 918 v_nop_e64 919 v_nop_e64 920 v_nop_e64 921 v_nop_e64 922 v_nop_e64 923 v_nop_e64 924 v_nop_e64 925 v_nop_e64 926 v_nop_e64", "={v7}"() 927 br label %exit 928 929exit: 930 %ret = phi float [ %var, %bb ], [ 0.0, %entry ] 931 ret float %ret 932} 933 934define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { 935; SI-LABEL: test_kill_divergent_loop: 936; SI: ; %bb.0: ; %entry 937; SI-NEXT: s_mov_b64 s[0:1], exec 938; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 939; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 940; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3] 941; SI-NEXT: s_cbranch_execz .LBB10_4 942; SI-NEXT: ; %bb.1: ; %bb.preheader 943; SI-NEXT: s_mov_b32 s3, 0xf000 944; SI-NEXT: s_mov_b32 s2, -1 945; SI-NEXT: .LBB10_2: ; %bb 946; SI-NEXT: ; =>This Inner Loop Header: Depth=1 947; SI-NEXT: ;;#ASMSTART 948; SI-NEXT: v_mov_b32_e64 v7, -1 949; SI-NEXT: v_nop_e64 950; SI-NEXT: v_nop_e64 951; SI-NEXT: v_nop_e64 952; SI-NEXT: v_nop_e64 953; SI-NEXT: v_nop_e64 954; SI-NEXT: v_nop_e64 955; SI-NEXT: v_nop_e64 956; SI-NEXT: v_nop_e64 957; SI-NEXT: v_nop_e64 958; SI-NEXT: v_nop_e64 959; SI-NEXT: ;;#ASMEND 960; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 961; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 962; SI-NEXT: s_cbranch_scc0 .LBB10_5 963; SI-NEXT: ; %bb.3: ; %bb 964; SI-NEXT: ; in Loop: Header=BB10_2 Depth=1 965; SI-NEXT: s_andn2_b64 exec, exec, vcc 966; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 967; SI-NEXT: s_waitcnt vmcnt(0) 968; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 969; SI-NEXT: s_cbranch_vccnz .LBB10_2 970; SI-NEXT: .LBB10_4: ; %Flow1 971; SI-NEXT: s_or_b64 exec, exec, s[4:5] 972; SI-NEXT: s_mov_b32 s3, 0xf000 973; SI-NEXT: s_mov_b32 s2, -1 974; SI-NEXT: v_mov_b32_e32 v0, 8 975; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 976; SI-NEXT: s_waitcnt vmcnt(0) 977; SI-NEXT: s_endpgm 978; SI-NEXT: .LBB10_5: 979; SI-NEXT: s_mov_b64 exec, 0 980; SI-NEXT: exp null off, off, off, off done vm 981; SI-NEXT: s_endpgm 982; 983; GFX10-WAVE64-LABEL: test_kill_divergent_loop: 984; GFX10-WAVE64: ; %bb.0: ; %entry 985; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 986; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 987; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 988; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 989; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB10_3 990; GFX10-WAVE64-NEXT: .LBB10_1: ; %bb 991; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 992; GFX10-WAVE64-NEXT: ;;#ASMSTART 993; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 994; GFX10-WAVE64-NEXT: v_nop_e64 995; GFX10-WAVE64-NEXT: v_nop_e64 996; GFX10-WAVE64-NEXT: v_nop_e64 997; GFX10-WAVE64-NEXT: v_nop_e64 998; GFX10-WAVE64-NEXT: v_nop_e64 999; GFX10-WAVE64-NEXT: v_nop_e64 1000; GFX10-WAVE64-NEXT: v_nop_e64 1001; GFX10-WAVE64-NEXT: v_nop_e64 1002; GFX10-WAVE64-NEXT: v_nop_e64 1003; GFX10-WAVE64-NEXT: v_nop_e64 1004; GFX10-WAVE64-NEXT: ;;#ASMEND 1005; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 1006; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 1007; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB10_4 1008; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb 1009; GFX10-WAVE64-NEXT: ; in Loop: Header=BB10_1 Depth=1 1010; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1011; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc 1012; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1013; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1014; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1 1015; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1 1016; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3] 1017; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 8 1018; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1019; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1020; GFX10-WAVE64-NEXT: s_endpgm 1021; GFX10-WAVE64-NEXT: .LBB10_4: 1022; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1023; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1024; GFX10-WAVE64-NEXT: s_endpgm 1025; 1026; GFX10-WAVE32-LABEL: test_kill_divergent_loop: 1027; GFX10-WAVE32: ; %bb.0: ; %entry 1028; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1029; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1030; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1031; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1032; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3 1033; GFX10-WAVE32-NEXT: .LBB10_1: ; %bb 1034; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 1035; GFX10-WAVE32-NEXT: ;;#ASMSTART 1036; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 1037; GFX10-WAVE32-NEXT: v_nop_e64 1038; GFX10-WAVE32-NEXT: v_nop_e64 1039; GFX10-WAVE32-NEXT: v_nop_e64 1040; GFX10-WAVE32-NEXT: v_nop_e64 1041; GFX10-WAVE32-NEXT: v_nop_e64 1042; GFX10-WAVE32-NEXT: v_nop_e64 1043; GFX10-WAVE32-NEXT: v_nop_e64 1044; GFX10-WAVE32-NEXT: v_nop_e64 1045; GFX10-WAVE32-NEXT: v_nop_e64 1046; GFX10-WAVE32-NEXT: v_nop_e64 1047; GFX10-WAVE32-NEXT: ;;#ASMEND 1048; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 1049; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 1050; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB10_4 1051; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb 1052; GFX10-WAVE32-NEXT: ; in Loop: Header=BB10_1 Depth=1 1053; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1054; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc 1055; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1056; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1057; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1 1058; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1 1059; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1 1060; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 8 1061; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1062; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1063; GFX10-WAVE32-NEXT: s_endpgm 1064; GFX10-WAVE32-NEXT: .LBB10_4: 1065; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1066; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1067; GFX10-WAVE32-NEXT: s_endpgm 1068; 1069; GFX11-LABEL: test_kill_divergent_loop: 1070; GFX11: ; %bb.0: ; %entry 1071; GFX11-NEXT: s_mov_b64 s[0:1], exec 1072; GFX11-NEXT: s_mov_b64 s[2:3], exec 1073; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 1074; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1075; GFX11-NEXT: s_cbranch_execz .LBB10_3 1076; GFX11-NEXT: .LBB10_1: ; %bb 1077; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1078; GFX11-NEXT: ;;#ASMSTART 1079; GFX11-NEXT: v_mov_b32_e64 v7, -1 1080; GFX11-NEXT: v_nop_e64 1081; GFX11-NEXT: v_nop_e64 1082; GFX11-NEXT: v_nop_e64 1083; GFX11-NEXT: v_nop_e64 1084; GFX11-NEXT: v_nop_e64 1085; GFX11-NEXT: v_nop_e64 1086; GFX11-NEXT: v_nop_e64 1087; GFX11-NEXT: v_nop_e64 1088; GFX11-NEXT: v_nop_e64 1089; GFX11-NEXT: v_nop_e64 1090; GFX11-NEXT: ;;#ASMEND 1091; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 1092; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 1093; GFX11-NEXT: s_cbranch_scc0 .LBB10_4 1094; GFX11-NEXT: ; %bb.2: ; %bb 1095; GFX11-NEXT: ; in Loop: Header=BB10_1 Depth=1 1096; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1097; GFX11-NEXT: global_load_b32 v0, v[0:1], off glc dlc 1098; GFX11-NEXT: s_waitcnt vmcnt(0) 1099; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1100; GFX11-NEXT: s_cbranch_vccnz .LBB10_1 1101; GFX11-NEXT: .LBB10_3: ; %Flow1 1102; GFX11-NEXT: s_or_b64 exec, exec, s[2:3] 1103; GFX11-NEXT: v_mov_b32_e32 v0, 8 1104; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1105; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1106; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1107; GFX11-NEXT: s_endpgm 1108; GFX11-NEXT: .LBB10_4: 1109; GFX11-NEXT: s_mov_b64 exec, 0 1110; GFX11-NEXT: exp mrt0 off, off, off, off done 1111; GFX11-NEXT: s_endpgm 1112entry: 1113 %cmp = icmp eq i32 %arg, 0 1114 br i1 %cmp, label %bb, label %exit 1115 1116bb: 1117 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 1118 v_nop_e64 1119 v_nop_e64 1120 v_nop_e64 1121 v_nop_e64 1122 v_nop_e64 1123 v_nop_e64 1124 v_nop_e64 1125 v_nop_e64 1126 v_nop_e64 1127 v_nop_e64", "={v7}"() 1128 %cmp.var = fcmp olt float %var, 0.0 1129 call void @llvm.amdgcn.kill(i1 %cmp.var) 1130 %vgpr = load volatile i32, i32 addrspace(1)* undef 1131 %loop.cond = icmp eq i32 %vgpr, 0 1132 br i1 %loop.cond, label %bb, label %exit 1133 1134exit: 1135 store volatile i32 8, i32 addrspace(1)* undef 1136 ret void 1137} 1138 1139; bug 28550 1140define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 { 1141; SI-LABEL: phi_use_def_before_kill: 1142; SI: ; %bb.0: ; %bb 1143; SI-NEXT: v_add_f32_e64 v1, s0, 1.0 1144; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1145; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1146; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1147; SI-NEXT: s_andn2_b64 exec, exec, vcc 1148; SI-NEXT: s_cbranch_scc0 .LBB11_6 1149; SI-NEXT: ; %bb.1: ; %bb 1150; SI-NEXT: s_andn2_b64 exec, exec, vcc 1151; SI-NEXT: s_cbranch_scc0 .LBB11_3 1152; SI-NEXT: ; %bb.2: ; %bb8 1153; SI-NEXT: s_mov_b32 s3, 0xf000 1154; SI-NEXT: s_mov_b32 s2, -1 1155; SI-NEXT: v_mov_b32_e32 v0, 8 1156; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1157; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1158; SI-NEXT: v_mov_b32_e32 v0, 4.0 1159; SI-NEXT: .LBB11_3: ; %phibb 1160; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1161; SI-NEXT: s_cbranch_vccz .LBB11_5 1162; SI-NEXT: ; %bb.4: ; %bb10 1163; SI-NEXT: s_mov_b32 s3, 0xf000 1164; SI-NEXT: s_mov_b32 s2, -1 1165; SI-NEXT: v_mov_b32_e32 v0, 9 1166; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1167; SI-NEXT: s_waitcnt vmcnt(0) 1168; SI-NEXT: .LBB11_5: ; %end 1169; SI-NEXT: s_endpgm 1170; SI-NEXT: .LBB11_6: 1171; SI-NEXT: s_mov_b64 exec, 0 1172; SI-NEXT: exp null off, off, off, off done vm 1173; SI-NEXT: s_endpgm 1174; 1175; GFX10-WAVE64-LABEL: phi_use_def_before_kill: 1176; GFX10-WAVE64: ; %bb.0: ; %bb 1177; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0 1178; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1179; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1180; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1181; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1182; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6 1183; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb 1184; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1185; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3 1186; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8 1187; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8 1188; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0 1189; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off 1190; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1191; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb 1192; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1193; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5 1194; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10 1195; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 1196; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1197; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1198; GFX10-WAVE64-NEXT: .LBB11_5: ; %end 1199; GFX10-WAVE64-NEXT: s_endpgm 1200; GFX10-WAVE64-NEXT: .LBB11_6: 1201; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1202; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1203; GFX10-WAVE64-NEXT: s_endpgm 1204; 1205; GFX10-WAVE32-LABEL: phi_use_def_before_kill: 1206; GFX10-WAVE32: ; %bb.0: ; %bb 1207; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0 1208; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1 1209; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo 1210; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1 1211; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1212; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6 1213; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb 1214; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1215; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3 1216; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8 1217; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8 1218; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0 1219; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off 1220; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1221; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb 1222; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 1223; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5 1224; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10 1225; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 1226; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1227; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1228; GFX10-WAVE32-NEXT: .LBB11_5: ; %end 1229; GFX10-WAVE32-NEXT: s_endpgm 1230; GFX10-WAVE32-NEXT: .LBB11_6: 1231; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1232; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1233; GFX10-WAVE32-NEXT: s_endpgm 1234; 1235; GFX11-LABEL: phi_use_def_before_kill: 1236; GFX11: ; %bb.0: ; %bb 1237; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0 1238; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1239; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1240; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1241; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1242; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1243; GFX11-NEXT: s_cbranch_scc0 .LBB11_6 1244; GFX11-NEXT: ; %bb.1: ; %bb 1245; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1246; GFX11-NEXT: s_cbranch_scc0 .LBB11_3 1247; GFX11-NEXT: ; %bb.2: ; %bb8 1248; GFX11-NEXT: v_mov_b32_e32 v1, 8 1249; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 1250; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc 1251; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1252; GFX11-NEXT: .LBB11_3: ; %phibb 1253; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1254; GFX11-NEXT: s_cbranch_vccz .LBB11_5 1255; GFX11-NEXT: ; %bb.4: ; %bb10 1256; GFX11-NEXT: v_mov_b32_e32 v0, 9 1257; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1258; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1259; GFX11-NEXT: .LBB11_5: ; %end 1260; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1261; GFX11-NEXT: s_endpgm 1262; GFX11-NEXT: .LBB11_6: 1263; GFX11-NEXT: s_mov_b64 exec, 0 1264; GFX11-NEXT: exp mrt0 off, off, off, off done 1265; GFX11-NEXT: s_endpgm 1266bb: 1267 %tmp = fadd float %x, 1.000000e+00 1268 %tmp1 = fcmp olt float 0.000000e+00, %tmp 1269 %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00 1270 %cmp.tmp2 = fcmp olt float %tmp2, 0.0 1271 call void @llvm.amdgcn.kill(i1 %cmp.tmp2) 1272 br i1 undef, label %phibb, label %bb8 1273 1274phibb: 1275 %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ] 1276 %tmp6 = fcmp oeq float %tmp5, 0.000000e+00 1277 br i1 %tmp6, label %bb10, label %end 1278 1279bb8: 1280 store volatile i32 8, i32 addrspace(1)* undef 1281 br label %phibb 1282 1283bb10: 1284 store volatile i32 9, i32 addrspace(1)* undef 1285 br label %end 1286 1287end: 1288 ret void 1289} 1290 1291define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 { 1292; SI-LABEL: no_skip_no_successors: 1293; SI: ; %bb.0: ; %bb 1294; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1295; SI-NEXT: s_and_b64 vcc, exec, s[4:5] 1296; SI-NEXT: s_cbranch_vccz .LBB12_3 1297; SI-NEXT: ; %bb.1: ; %bb6 1298; SI-NEXT: s_mov_b64 s[2:3], exec 1299; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1300; SI-NEXT: s_cbranch_scc0 .LBB12_5 1301; SI-NEXT: ; %bb.2: ; %bb6 1302; SI-NEXT: s_mov_b64 exec, 0 1303; SI-NEXT: .LBB12_3: ; %bb3 1304; SI-NEXT: v_mov_b32_e32 v0, 0x3e7ae148 1305; SI-NEXT: v_cmp_nge_f32_e32 vcc, s0, v0 1306; SI-NEXT: s_and_b64 vcc, exec, vcc 1307; SI-NEXT: ; %bb.4: ; %bb5 1308; SI-NEXT: .LBB12_5: 1309; SI-NEXT: s_mov_b64 exec, 0 1310; SI-NEXT: exp null off, off, off, off done vm 1311; SI-NEXT: s_endpgm 1312; 1313; GFX10-WAVE64-LABEL: no_skip_no_successors: 1314; GFX10-WAVE64: ; %bb.0: ; %bb 1315; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1316; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[4:5] 1317; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3 1318; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb6 1319; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1320; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1321; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_5 1322; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb6 1323; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1324; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3 1325; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 1326; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1] 1327; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb5 1328; GFX10-WAVE64-NEXT: .LBB12_5: 1329; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1330; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1331; GFX10-WAVE64-NEXT: s_endpgm 1332; 1333; GFX10-WAVE32-LABEL: no_skip_no_successors: 1334; GFX10-WAVE32: ; %bb.0: ; %bb 1335; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0 1336; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1 1337; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3 1338; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb6 1339; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo 1340; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, exec_lo 1341; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_5 1342; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb6 1343; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1344; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3 1345; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0 1346; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s0 1347; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb5 1348; GFX10-WAVE32-NEXT: .LBB12_5: 1349; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1350; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1351; GFX10-WAVE32-NEXT: s_endpgm 1352; 1353; GFX11-LABEL: no_skip_no_successors: 1354; GFX11: ; %bb.0: ; %bb 1355; GFX11-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1356; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1357; GFX11-NEXT: s_and_b64 vcc, exec, s[4:5] 1358; GFX11-NEXT: s_cbranch_vccz .LBB12_3 1359; GFX11-NEXT: ; %bb.1: ; %bb6 1360; GFX11-NEXT: s_mov_b64 s[2:3], exec 1361; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1362; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec 1363; GFX11-NEXT: s_cbranch_scc0 .LBB12_5 1364; GFX11-NEXT: ; %bb.2: ; %bb6 1365; GFX11-NEXT: s_mov_b64 exec, 0 1366; GFX11-NEXT: .LBB12_3: ; %bb3 1367; GFX11-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 1368; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1369; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1] 1370; GFX11-NEXT: ; %bb.4: ; %bb5 1371; GFX11-NEXT: .LBB12_5: 1372; GFX11-NEXT: s_mov_b64 exec, 0 1373; GFX11-NEXT: exp mrt0 off, off, off, off done 1374; GFX11-NEXT: s_endpgm 1375bb: 1376 %tmp = fcmp ult float %arg1, 0.000000e+00 1377 %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000 1378 br i1 %tmp, label %bb6, label %bb3 1379 1380bb3: ; preds = %bb 1381 br i1 %tmp2, label %bb5, label %bb4 1382 1383bb4: ; preds = %bb3 1384 br i1 true, label %bb5, label %bb7 1385 1386bb5: ; preds = %bb4, %bb3 1387 unreachable 1388 1389bb6: ; preds = %bb 1390 call void @llvm.amdgcn.kill(i1 false) 1391 unreachable 1392 1393bb7: ; preds = %bb4 1394 ret void 1395} 1396 1397define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 { 1398; SI-LABEL: if_after_kill_block: 1399; SI: ; %bb.0: ; %bb 1400; SI-NEXT: s_mov_b64 s[2:3], exec 1401; SI-NEXT: s_wqm_b64 exec, exec 1402; SI-NEXT: s_mov_b32 s0, 0 1403; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 1404; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1405; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1406; SI-NEXT: s_cbranch_execz .LBB13_3 1407; SI-NEXT: ; %bb.1: ; %bb3 1408; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1409; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 1410; SI-NEXT: s_cbranch_scc0 .LBB13_6 1411; SI-NEXT: ; %bb.2: ; %bb3 1412; SI-NEXT: s_andn2_b64 exec, exec, vcc 1413; SI-NEXT: .LBB13_3: ; %bb4 1414; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1415; SI-NEXT: s_mov_b32 s1, s0 1416; SI-NEXT: s_mov_b32 s2, s0 1417; SI-NEXT: s_mov_b32 s3, s0 1418; SI-NEXT: s_mov_b32 s4, s0 1419; SI-NEXT: s_mov_b32 s5, s0 1420; SI-NEXT: s_mov_b32 s6, s0 1421; SI-NEXT: s_mov_b32 s7, s0 1422; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 1423; SI-NEXT: s_waitcnt vmcnt(0) 1424; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 1425; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc 1426; SI-NEXT: s_cbranch_execz .LBB13_5 1427; SI-NEXT: ; %bb.4: ; %bb8 1428; SI-NEXT: s_mov_b32 s3, 0xf000 1429; SI-NEXT: s_mov_b32 s2, -1 1430; SI-NEXT: v_mov_b32_e32 v0, 9 1431; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1432; SI-NEXT: s_waitcnt vmcnt(0) 1433; SI-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1434; SI-NEXT: s_endpgm 1435; SI-NEXT: .LBB13_6: 1436; SI-NEXT: s_mov_b64 exec, 0 1437; SI-NEXT: exp null off, off, off, off done vm 1438; SI-NEXT: s_endpgm 1439; 1440; GFX10-WAVE64-LABEL: if_after_kill_block: 1441; GFX10-WAVE64: ; %bb.0: ; %bb 1442; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1443; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec 1444; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 1445; GFX10-WAVE64-NEXT: s_mov_b32 s0, 0 1446; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1447; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1448; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3 1449; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3 1450; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1451; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 1452; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6 1453; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3 1454; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1455; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4 1456; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1457; GFX10-WAVE64-NEXT: s_mov_b32 s1, s0 1458; GFX10-WAVE64-NEXT: s_mov_b32 s2, s0 1459; GFX10-WAVE64-NEXT: s_mov_b32 s3, s0 1460; GFX10-WAVE64-NEXT: s_mov_b32 s4, s0 1461; GFX10-WAVE64-NEXT: s_mov_b32 s5, s0 1462; GFX10-WAVE64-NEXT: s_mov_b32 s6, s0 1463; GFX10-WAVE64-NEXT: s_mov_b32 s7, s0 1464; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1465; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1466; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 1467; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[0:1], vcc 1468; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_5 1469; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb8 1470; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 1471; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1472; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1473; GFX10-WAVE64-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1474; GFX10-WAVE64-NEXT: s_endpgm 1475; GFX10-WAVE64-NEXT: .LBB13_6: 1476; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1477; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1478; GFX10-WAVE64-NEXT: s_endpgm 1479; 1480; GFX10-WAVE32-LABEL: if_after_kill_block: 1481; GFX10-WAVE32: ; %bb.0: ; %bb 1482; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 1483; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo 1484; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1 1485; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0 1486; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s2, vcc_lo 1487; GFX10-WAVE32-NEXT: s_xor_b32 s2, exec_lo, s2 1488; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3 1489; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3 1490; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 1491; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 1492; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6 1493; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3 1494; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1495; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4 1496; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s2 1497; GFX10-WAVE32-NEXT: s_mov_b32 s1, s0 1498; GFX10-WAVE32-NEXT: s_mov_b32 s2, s0 1499; GFX10-WAVE32-NEXT: s_mov_b32 s3, s0 1500; GFX10-WAVE32-NEXT: s_mov_b32 s4, s0 1501; GFX10-WAVE32-NEXT: s_mov_b32 s5, s0 1502; GFX10-WAVE32-NEXT: s_mov_b32 s6, s0 1503; GFX10-WAVE32-NEXT: s_mov_b32 s7, s0 1504; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1505; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1506; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 1507; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s0, vcc_lo 1508; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_5 1509; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb8 1510; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 1511; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1512; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1513; GFX10-WAVE32-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1514; GFX10-WAVE32-NEXT: s_endpgm 1515; GFX10-WAVE32-NEXT: .LBB13_6: 1516; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1517; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1518; GFX10-WAVE32-NEXT: s_endpgm 1519; 1520; GFX11-LABEL: if_after_kill_block: 1521; GFX11: ; %bb.0: ; %bb 1522; GFX11-NEXT: s_mov_b64 s[2:3], exec 1523; GFX11-NEXT: s_wqm_b64 exec, exec 1524; GFX11-NEXT: s_mov_b32 s0, 0 1525; GFX11-NEXT: s_mov_b64 s[4:5], exec 1526; GFX11-NEXT: v_cmpx_nle_f32_e32 0, v1 1527; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1528; GFX11-NEXT: s_cbranch_execz .LBB13_3 1529; GFX11-NEXT: ; %bb.1: ; %bb3 1530; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1531; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc 1532; GFX11-NEXT: s_cbranch_scc0 .LBB13_6 1533; GFX11-NEXT: ; %bb.2: ; %bb3 1534; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1535; GFX11-NEXT: .LBB13_3: ; %bb4 1536; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1537; GFX11-NEXT: s_or_b64 exec, exec, s[4:5] 1538; GFX11-NEXT: s_mov_b32 s1, s0 1539; GFX11-NEXT: s_mov_b32 s2, s0 1540; GFX11-NEXT: s_mov_b32 s3, s0 1541; GFX11-NEXT: s_mov_b32 s4, s0 1542; GFX11-NEXT: s_mov_b32 s5, s0 1543; GFX11-NEXT: s_mov_b32 s6, s0 1544; GFX11-NEXT: s_mov_b32 s7, s0 1545; GFX11-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1546; GFX11-NEXT: s_mov_b64 s[0:1], exec 1547; GFX11-NEXT: s_waitcnt vmcnt(0) 1548; GFX11-NEXT: v_cmpx_neq_f32_e32 0, v0 1549; GFX11-NEXT: s_cbranch_execz .LBB13_5 1550; GFX11-NEXT: ; %bb.4: ; %bb8 1551; GFX11-NEXT: v_mov_b32_e32 v0, 9 1552; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1553; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1554; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1555; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1556; GFX11-NEXT: s_endpgm 1557; GFX11-NEXT: .LBB13_6: 1558; GFX11-NEXT: s_mov_b64 exec, 0 1559; GFX11-NEXT: exp mrt0 off, off, off, off done 1560; GFX11-NEXT: s_endpgm 1561bb: 1562 %tmp = fcmp ult float %arg1, 0.000000e+00 1563 br i1 %tmp, label %bb3, label %bb4 1564 1565bb3: ; preds = %bb 1566 %cmp.arg = fcmp olt float %arg, 0.0 1567 call void @llvm.amdgcn.kill(i1 %cmp.arg) 1568 br label %bb4 1569 1570bb4: ; preds = %bb3, %bb 1571 %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) 1572 %tmp6 = extractelement <4 x float> %tmp5, i32 0 1573 %tmp7 = fcmp une float %tmp6, 0.000000e+00 1574 br i1 %tmp7, label %bb8, label %bb9 1575 1576bb8: ; preds = %bb9, %bb4 1577 store volatile i32 9, i32 addrspace(1)* undef 1578 ret void 1579 1580bb9: ; preds = %bb4 1581 ret void 1582} 1583 1584define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { 1585; SI-LABEL: cbranch_kill: 1586; SI: ; %bb.0: ; %.entry 1587; SI-NEXT: s_mov_b32 s4, 0 1588; SI-NEXT: s_mov_b64 s[0:1], exec 1589; SI-NEXT: v_mov_b32_e32 v4, 0 1590; SI-NEXT: v_mov_b32_e32 v2, v1 1591; SI-NEXT: v_mov_b32_e32 v3, v1 1592; SI-NEXT: s_mov_b32 s5, s4 1593; SI-NEXT: s_mov_b32 s6, s4 1594; SI-NEXT: s_mov_b32 s7, s4 1595; SI-NEXT: s_mov_b32 s8, s4 1596; SI-NEXT: s_mov_b32 s9, s4 1597; SI-NEXT: s_mov_b32 s10, s4 1598; SI-NEXT: s_mov_b32 s11, s4 1599; SI-NEXT: image_sample_l v1, v[1:4], s[4:11], s[0:3] dmask:0x1 da 1600; SI-NEXT: s_waitcnt vmcnt(0) 1601; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 1602; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 1603; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1604; SI-NEXT: s_cbranch_execz .LBB14_3 1605; SI-NEXT: ; %bb.1: ; %kill 1606; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 1607; SI-NEXT: ; implicit-def: $vgpr0 1608; SI-NEXT: ; implicit-def: $vgpr1 1609; SI-NEXT: s_cbranch_scc0 .LBB14_6 1610; SI-NEXT: ; %bb.2: ; %kill 1611; SI-NEXT: s_mov_b64 exec, 0 1612; SI-NEXT: .LBB14_3: ; %Flow 1613; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1614; SI-NEXT: ; implicit-def: $vgpr2 1615; SI-NEXT: s_xor_b64 exec, exec, s[0:1] 1616; SI-NEXT: ; %bb.4: ; %live 1617; SI-NEXT: v_mul_f32_e32 v2, v0, v1 1618; SI-NEXT: ; %bb.5: ; %export 1619; SI-NEXT: s_or_b64 exec, exec, s[0:1] 1620; SI-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1621; SI-NEXT: s_endpgm 1622; SI-NEXT: .LBB14_6: 1623; SI-NEXT: s_mov_b64 exec, 0 1624; SI-NEXT: exp null off, off, off, off done vm 1625; SI-NEXT: s_endpgm 1626; 1627; GFX10-WAVE64-LABEL: cbranch_kill: 1628; GFX10-WAVE64: ; %bb.0: ; %.entry 1629; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, 0 1630; GFX10-WAVE64-NEXT: s_mov_b32 s4, 0 1631; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 1632; GFX10-WAVE64-NEXT: s_mov_b32 s5, s4 1633; GFX10-WAVE64-NEXT: s_mov_b32 s6, s4 1634; GFX10-WAVE64-NEXT: s_mov_b32 s7, s4 1635; GFX10-WAVE64-NEXT: s_mov_b32 s8, s4 1636; GFX10-WAVE64-NEXT: s_mov_b32 s9, s4 1637; GFX10-WAVE64-NEXT: s_mov_b32 s10, s4 1638; GFX10-WAVE64-NEXT: s_mov_b32 s11, s4 1639; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1640; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1641; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 1642; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 1643; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1644; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB14_3 1645; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill 1646; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 1647; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr0 1648; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr1 1649; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB14_6 1650; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill 1651; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1652; GFX10-WAVE64-NEXT: .LBB14_3: ; %Flow 1653; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1654; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2 1655; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1] 1656; GFX10-WAVE64-NEXT: ; %bb.4: ; %live 1657; GFX10-WAVE64-NEXT: v_mul_f32_e32 v2, v0, v1 1658; GFX10-WAVE64-NEXT: ; %bb.5: ; %export 1659; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] 1660; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1661; GFX10-WAVE64-NEXT: s_endpgm 1662; GFX10-WAVE64-NEXT: .LBB14_6: 1663; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1664; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1665; GFX10-WAVE64-NEXT: s_endpgm 1666; 1667; GFX10-WAVE32-LABEL: cbranch_kill: 1668; GFX10-WAVE32: ; %bb.0: ; %.entry 1669; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, 0 1670; GFX10-WAVE32-NEXT: s_mov_b32 s4, 0 1671; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1672; GFX10-WAVE32-NEXT: s_mov_b32 s5, s4 1673; GFX10-WAVE32-NEXT: s_mov_b32 s6, s4 1674; GFX10-WAVE32-NEXT: s_mov_b32 s7, s4 1675; GFX10-WAVE32-NEXT: s_mov_b32 s8, s4 1676; GFX10-WAVE32-NEXT: s_mov_b32 s9, s4 1677; GFX10-WAVE32-NEXT: s_mov_b32 s10, s4 1678; GFX10-WAVE32-NEXT: s_mov_b32 s11, s4 1679; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1680; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1681; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1 1682; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1683; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1684; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB14_3 1685; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill 1686; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 1687; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr0 1688; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr1 1689; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB14_6 1690; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill 1691; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1692; GFX10-WAVE32-NEXT: .LBB14_3: ; %Flow 1693; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1 1694; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2 1695; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 1696; GFX10-WAVE32-NEXT: ; %bb.4: ; %live 1697; GFX10-WAVE32-NEXT: v_mul_f32_e32 v2, v0, v1 1698; GFX10-WAVE32-NEXT: ; %bb.5: ; %export 1699; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 1700; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1701; GFX10-WAVE32-NEXT: s_endpgm 1702; GFX10-WAVE32-NEXT: .LBB14_6: 1703; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1704; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1705; GFX10-WAVE32-NEXT: s_endpgm 1706; 1707; GFX11-LABEL: cbranch_kill: 1708; GFX11: ; %bb.0: ; %.entry 1709; GFX11-NEXT: v_mov_b32_e32 v2, 0 1710; GFX11-NEXT: s_mov_b32 s4, 0 1711; GFX11-NEXT: s_mov_b64 s[0:1], exec 1712; GFX11-NEXT: s_mov_b32 s5, s4 1713; GFX11-NEXT: s_mov_b32 s6, s4 1714; GFX11-NEXT: s_mov_b32 s7, s4 1715; GFX11-NEXT: s_mov_b32 s8, s4 1716; GFX11-NEXT: s_mov_b32 s9, s4 1717; GFX11-NEXT: s_mov_b32 s10, s4 1718; GFX11-NEXT: s_mov_b32 s11, s4 1719; GFX11-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1720; GFX11-NEXT: s_mov_b64 s[2:3], exec 1721; GFX11-NEXT: s_waitcnt vmcnt(0) 1722; GFX11-NEXT: v_cmpx_ge_f32_e32 0, v1 1723; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1724; GFX11-NEXT: s_cbranch_execz .LBB14_3 1725; GFX11-NEXT: ; %bb.1: ; %kill 1726; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 1727; GFX11-NEXT: ; implicit-def: $vgpr0 1728; GFX11-NEXT: ; implicit-def: $vgpr1 1729; GFX11-NEXT: s_cbranch_scc0 .LBB14_6 1730; GFX11-NEXT: ; %bb.2: ; %kill 1731; GFX11-NEXT: s_mov_b64 exec, 0 1732; GFX11-NEXT: .LBB14_3: ; %Flow 1733; GFX11-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1734; GFX11-NEXT: ; implicit-def: $vgpr2 1735; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1736; GFX11-NEXT: s_xor_b64 exec, exec, s[0:1] 1737; GFX11-NEXT: ; %bb.4: ; %live 1738; GFX11-NEXT: v_mul_f32_e32 v2, v0, v1 1739; GFX11-NEXT: ; %bb.5: ; %export 1740; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1741; GFX11-NEXT: exp mrt0 v2, v2, v2, v2 done 1742; GFX11-NEXT: s_endpgm 1743; GFX11-NEXT: .LBB14_6: 1744; GFX11-NEXT: s_mov_b64 exec, 0 1745; GFX11-NEXT: exp mrt0 off, off, off, off done 1746; GFX11-NEXT: s_endpgm 1747.entry: 1748 %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) 1749 %cond0 = fcmp ugt float %sample, 0.000000e+00 1750 br i1 %cond0, label %live, label %kill 1751 1752kill: 1753 call void @llvm.amdgcn.kill(i1 false) 1754 br label %export 1755 1756live: 1757 %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample 1758 br label %export 1759 1760export: 1761 %proxy = phi float [ undef, %kill ], [ %scale, %live ] 1762 call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3 1763 ret void 1764} 1765 1766 1767define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { 1768; SI-LABEL: complex_loop: 1769; SI: ; %bb.0: ; %.entry 1770; SI-NEXT: s_cmp_lt_i32 s0, 1 1771; SI-NEXT: s_cbranch_scc1 .LBB15_7 1772; SI-NEXT: ; %bb.1: ; %.lr.ph 1773; SI-NEXT: s_mov_b64 s[2:3], exec 1774; SI-NEXT: s_mov_b32 s6, 0 1775; SI-NEXT: s_mov_b64 s[0:1], 0 1776; SI-NEXT: s_branch .LBB15_3 1777; SI-NEXT: .LBB15_2: ; %latch 1778; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1779; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1780; SI-NEXT: s_add_i32 s6, s6, 1 1781; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1782; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1783; SI-NEXT: v_mov_b32_e32 v2, s6 1784; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 1785; SI-NEXT: s_cbranch_execz .LBB15_6 1786; SI-NEXT: .LBB15_3: ; %hdr 1787; SI-NEXT: ; =>This Inner Loop Header: Depth=1 1788; SI-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 1789; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1790; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1791; SI-NEXT: s_cbranch_execz .LBB15_2 1792; SI-NEXT: ; %bb.4: ; %kill 1793; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1794; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1795; SI-NEXT: s_cbranch_scc0 .LBB15_8 1796; SI-NEXT: ; %bb.5: ; %kill 1797; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1798; SI-NEXT: s_mov_b64 exec, 0 1799; SI-NEXT: s_branch .LBB15_2 1800; SI-NEXT: .LBB15_6: ; %Flow 1801; SI-NEXT: s_or_b64 exec, exec, s[0:1] 1802; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1803; SI-NEXT: s_endpgm 1804; SI-NEXT: .LBB15_7: 1805; SI-NEXT: v_mov_b32_e32 v2, -1 1806; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1807; SI-NEXT: s_endpgm 1808; SI-NEXT: .LBB15_8: 1809; SI-NEXT: s_mov_b64 exec, 0 1810; SI-NEXT: exp null off, off, off, off done vm 1811; SI-NEXT: s_endpgm 1812; 1813; GFX10-WAVE64-LABEL: complex_loop: 1814; GFX10-WAVE64: ; %bb.0: ; %.entry 1815; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1 1816; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7 1817; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph 1818; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1819; GFX10-WAVE64-NEXT: s_mov_b32 s6, 0 1820; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], 0 1821; GFX10-WAVE64-NEXT: s_branch .LBB15_3 1822; GFX10-WAVE64-NEXT: .LBB15_2: ; %latch 1823; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1824; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1825; GFX10-WAVE64-NEXT: s_add_i32 s6, s6, 1 1826; GFX10-WAVE64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1827; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, s6 1828; GFX10-WAVE64-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1829; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, s[0:1] 1830; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_6 1831; GFX10-WAVE64-NEXT: .LBB15_3: ; %hdr 1832; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 1833; GFX10-WAVE64-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 1834; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1835; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1836; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_2 1837; GFX10-WAVE64-NEXT: ; %bb.4: ; %kill 1838; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1839; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1840; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB15_8 1841; GFX10-WAVE64-NEXT: ; %bb.5: ; %kill 1842; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1843; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1844; GFX10-WAVE64-NEXT: s_branch .LBB15_2 1845; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow 1846; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] 1847; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1848; GFX10-WAVE64-NEXT: s_endpgm 1849; GFX10-WAVE64-NEXT: .LBB15_7: 1850; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1 1851; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1852; GFX10-WAVE64-NEXT: s_endpgm 1853; GFX10-WAVE64-NEXT: .LBB15_8: 1854; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1855; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1856; GFX10-WAVE64-NEXT: s_endpgm 1857; 1858; GFX10-WAVE32-LABEL: complex_loop: 1859; GFX10-WAVE32: ; %bb.0: ; %.entry 1860; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1 1861; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7 1862; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph 1863; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 1864; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0 1865; GFX10-WAVE32-NEXT: s_mov_b32 s2, 0 1866; GFX10-WAVE32-NEXT: s_branch .LBB15_3 1867; GFX10-WAVE32-NEXT: .LBB15_2: ; %latch 1868; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1869; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s3 1870; GFX10-WAVE32-NEXT: s_add_i32 s2, s2, 1 1871; GFX10-WAVE32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1 1872; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, s2 1873; GFX10-WAVE32-NEXT: s_or_b32 s0, vcc_lo, s0 1874; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 1875; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_6 1876; GFX10-WAVE32-NEXT: .LBB15_3: ; %hdr 1877; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 1878; GFX10-WAVE32-NEXT: v_cmp_gt_u32_e32 vcc_lo, s2, v0 1879; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s3, vcc_lo 1880; GFX10-WAVE32-NEXT: s_xor_b32 s3, exec_lo, s3 1881; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_2 1882; GFX10-WAVE32-NEXT: ; %bb.4: ; %kill 1883; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1884; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, exec_lo 1885; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB15_8 1886; GFX10-WAVE32-NEXT: ; %bb.5: ; %kill 1887; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1888; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1889; GFX10-WAVE32-NEXT: s_branch .LBB15_2 1890; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow 1891; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 1892; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1893; GFX10-WAVE32-NEXT: s_endpgm 1894; GFX10-WAVE32-NEXT: .LBB15_7: 1895; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1 1896; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1897; GFX10-WAVE32-NEXT: s_endpgm 1898; GFX10-WAVE32-NEXT: .LBB15_8: 1899; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1900; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1901; GFX10-WAVE32-NEXT: s_endpgm 1902; 1903; GFX11-LABEL: complex_loop: 1904; GFX11: ; %bb.0: ; %.entry 1905; GFX11-NEXT: s_cmp_lt_i32 s0, 1 1906; GFX11-NEXT: s_cbranch_scc1 .LBB15_7 1907; GFX11-NEXT: ; %bb.1: ; %.lr.ph 1908; GFX11-NEXT: s_mov_b64 s[2:3], exec 1909; GFX11-NEXT: s_mov_b32 s6, 0 1910; GFX11-NEXT: s_mov_b64 s[0:1], 0 1911; GFX11-NEXT: s_branch .LBB15_3 1912; GFX11-NEXT: .LBB15_2: ; %latch 1913; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1914; GFX11-NEXT: s_or_b64 exec, exec, s[4:5] 1915; GFX11-NEXT: s_add_i32 s6, s6, 1 1916; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 1917; GFX11-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1918; GFX11-NEXT: v_mov_b32_e32 v2, s6 1919; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1920; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] 1921; GFX11-NEXT: s_cbranch_execz .LBB15_6 1922; GFX11-NEXT: .LBB15_3: ; %hdr 1923; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1924; GFX11-NEXT: s_mov_b64 s[4:5], exec 1925; GFX11-NEXT: v_cmpx_gt_u32_e64 s6, v0 1926; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1927; GFX11-NEXT: s_cbranch_execz .LBB15_2 1928; GFX11-NEXT: ; %bb.4: ; %kill 1929; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1930; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec 1931; GFX11-NEXT: s_cbranch_scc0 .LBB15_8 1932; GFX11-NEXT: ; %bb.5: ; %kill 1933; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1934; GFX11-NEXT: s_mov_b64 exec, 0 1935; GFX11-NEXT: s_branch .LBB15_2 1936; GFX11-NEXT: .LBB15_6: ; %Flow 1937; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1938; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done 1939; GFX11-NEXT: s_endpgm 1940; GFX11-NEXT: .LBB15_7: 1941; GFX11-NEXT: v_mov_b32_e32 v2, -1 1942; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done 1943; GFX11-NEXT: s_endpgm 1944; GFX11-NEXT: .LBB15_8: 1945; GFX11-NEXT: s_mov_b64 exec, 0 1946; GFX11-NEXT: exp mrt0 off, off, off, off done 1947; GFX11-NEXT: s_endpgm 1948.entry: 1949 %flaga = icmp sgt i32 %cmpa, 0 1950 br i1 %flaga, label %.lr.ph, label %._crit_edge 1951 1952.lr.ph: 1953 br label %hdr 1954 1955hdr: 1956 %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ] 1957 %flagb = icmp ugt i32 %ctr, %cmpb 1958 br i1 %flagb, label %kill, label %latch 1959 1960kill: 1961 call void @llvm.amdgcn.kill(i1 false) 1962 br label %latch 1963 1964latch: 1965 %ctr.next = add nuw nsw i32 %ctr, 1 1966 %flagc = icmp slt i32 %ctr.next, %cmpc 1967 br i1 %flagc, label %hdr, label %._crit_edge 1968 1969._crit_edge: 1970 %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ] 1971 %out = bitcast i32 %tmp to float 1972 call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true) 1973 ret void 1974} 1975 1976define void @skip_mode_switch(i32 %arg) { 1977; SI-LABEL: skip_mode_switch: 1978; SI: ; %bb.0: ; %entry 1979; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1980; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1981; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1982; SI-NEXT: s_cbranch_execz .LBB16_2 1983; SI-NEXT: ; %bb.1: ; %bb.0 1984; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1985; SI-NEXT: .LBB16_2: ; %bb.1 1986; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1987; SI-NEXT: s_setpc_b64 s[30:31] 1988; 1989; GFX10-WAVE64-LABEL: skip_mode_switch: 1990; GFX10-WAVE64: ; %bb.0: ; %entry 1991; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1992; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1993; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1994; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1995; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB16_2 1996; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb.0 1997; GFX10-WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1998; GFX10-WAVE64-NEXT: .LBB16_2: ; %bb.1 1999; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 2000; GFX10-WAVE64-NEXT: s_setpc_b64 s[30:31] 2001; 2002; GFX10-WAVE32-LABEL: skip_mode_switch: 2003; GFX10-WAVE32: ; %bb.0: ; %entry 2004; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2005; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 2006; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 2007; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo 2008; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2 2009; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb.0 2010; GFX10-WAVE32-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 2011; GFX10-WAVE32-NEXT: .LBB16_2: ; %bb.1 2012; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s4 2013; GFX10-WAVE32-NEXT: s_setpc_b64 s[30:31] 2014; 2015; GFX11-LABEL: skip_mode_switch: 2016; GFX11: ; %bb.0: ; %entry 2017; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2018; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2019; GFX11-NEXT: s_mov_b64 s[0:1], exec 2020; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 2021; GFX11-NEXT: s_cbranch_execz .LBB16_2 2022; GFX11-NEXT: ; %bb.1: ; %bb.0 2023; GFX11-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 2024; GFX11-NEXT: .LBB16_2: ; %bb.1 2025; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 2026; GFX11-NEXT: s_setpc_b64 s[30:31] 2027entry: 2028 %cmp = icmp eq i32 %arg, 0 2029 br i1 %cmp, label %bb.0, label %bb.1 2030 2031bb.0: 2032 call void @llvm.amdgcn.s.setreg(i32 2049, i32 3) 2033 br label %bb.1 2034 2035bb.1: 2036 ret void 2037} 2038 2039declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3 2040declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 2041declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2042declare void @llvm.amdgcn.kill(i1) #0 2043 2044declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) 2045 2046attributes #0 = { nounwind } 2047attributes #1 = { nounwind readonly } 2048attributes #2 = { nounwind readnone speculatable } 2049attributes #3 = { inaccessiblememonly nounwind writeonly } 2050