1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s 5 6define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { 7; GCN-LABEL: test_kill_depth_0_imm_pos: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_endpgm 10 call void @llvm.amdgcn.kill(i1 true) 11 ret void 12} 13 14define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { 15; WAVE64-LABEL: test_kill_depth_0_imm_neg: 16; WAVE64: ; %bb.0: 17; WAVE64-NEXT: s_andn2_b64 exec, exec, exec 18; WAVE64-NEXT: s_cbranch_scc0 .LBB1_1 19; WAVE64-NEXT: s_endpgm 20; WAVE64-NEXT: .LBB1_1: 21; WAVE64-NEXT: s_mov_b64 exec, 0 22; WAVE64-NEXT: exp null off, off, off, off done vm 23; WAVE64-NEXT: s_endpgm 24; 25; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg: 26; GFX10-WAVE32: ; %bb.0: 27; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo 28; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB1_1 29; GFX10-WAVE32-NEXT: s_endpgm 30; GFX10-WAVE32-NEXT: .LBB1_1: 31; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 32; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 33; GFX10-WAVE32-NEXT: s_endpgm 34 call void @llvm.amdgcn.kill(i1 false) 35 ret void 36} 37 38; FIXME: Ideally only one early-exit would be emitted 39define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { 40; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2: 41; WAVE64: ; %bb.0: 42; WAVE64-NEXT: s_mov_b64 s[0:1], exec 43; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 44; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2 45; WAVE64-NEXT: ; %bb.1: 46; WAVE64-NEXT: s_mov_b64 exec, 0 47; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 48; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2 49; WAVE64-NEXT: s_endpgm 50; WAVE64-NEXT: .LBB2_2: 51; WAVE64-NEXT: s_mov_b64 exec, 0 52; WAVE64-NEXT: exp null off, off, off, off done vm 53; WAVE64-NEXT: s_endpgm 54; 55; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2: 56; GFX10-WAVE32: ; %bb.0: 57; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 58; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 59; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2 60; GFX10-WAVE32-NEXT: ; %bb.1: 61; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 62; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 63; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2 64; GFX10-WAVE32-NEXT: s_endpgm 65; GFX10-WAVE32-NEXT: .LBB2_2: 66; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 67; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 68; GFX10-WAVE32-NEXT: s_endpgm 69 call void @llvm.amdgcn.kill(i1 false) 70 call void @llvm.amdgcn.kill(i1 false) 71 ret void 72} 73 74define amdgpu_ps void @test_kill_depth_var(float %x) #0 { 75; WAVE64-LABEL: test_kill_depth_var: 76; WAVE64: ; %bb.0: 77; WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 78; WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 79; WAVE64-NEXT: s_cbranch_scc0 .LBB3_1 80; WAVE64-NEXT: s_endpgm 81; WAVE64-NEXT: .LBB3_1: 82; WAVE64-NEXT: s_mov_b64 exec, 0 83; WAVE64-NEXT: exp null off, off, off, off done vm 84; WAVE64-NEXT: s_endpgm 85; 86; GFX10-WAVE32-LABEL: test_kill_depth_var: 87; GFX10-WAVE32: ; %bb.0: 88; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 89; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 90; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB3_1 91; GFX10-WAVE32-NEXT: s_endpgm 92; GFX10-WAVE32-NEXT: .LBB3_1: 93; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 94; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 95; GFX10-WAVE32-NEXT: s_endpgm 96 %cmp = fcmp olt float %x, 0.0 97 call void @llvm.amdgcn.kill(i1 %cmp) 98 ret void 99} 100 101; FIXME: Ideally only one early-exit would be emitted 102define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { 103; SI-LABEL: test_kill_depth_var_x2_same: 104; SI: ; %bb.0: 105; SI-NEXT: s_mov_b64 s[0:1], exec 106; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 107; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 108; SI-NEXT: s_cbranch_scc0 .LBB4_2 109; SI-NEXT: ; %bb.1: 110; SI-NEXT: s_andn2_b64 exec, exec, vcc 111; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 112; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 113; SI-NEXT: s_cbranch_scc0 .LBB4_2 114; SI-NEXT: s_endpgm 115; SI-NEXT: .LBB4_2: 116; SI-NEXT: s_mov_b64 exec, 0 117; SI-NEXT: exp null off, off, off, off done vm 118; SI-NEXT: s_endpgm 119; 120; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same: 121; GFX10-WAVE64: ; %bb.0: 122; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 123; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 124; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 125; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2 126; GFX10-WAVE64-NEXT: ; %bb.1: 127; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 128; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 129; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 130; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2 131; GFX10-WAVE64-NEXT: s_endpgm 132; GFX10-WAVE64-NEXT: .LBB4_2: 133; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 134; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 135; GFX10-WAVE64-NEXT: s_endpgm 136; 137; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same: 138; GFX10-WAVE32: ; %bb.0: 139; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 140; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 141; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 142; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2 143; GFX10-WAVE32-NEXT: ; %bb.1: 144; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 145; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 146; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 147; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2 148; GFX10-WAVE32-NEXT: s_endpgm 149; GFX10-WAVE32-NEXT: .LBB4_2: 150; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 151; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 152; GFX10-WAVE32-NEXT: s_endpgm 153 %cmp = fcmp olt float %x, 0.0 154 call void @llvm.amdgcn.kill(i1 %cmp) 155 call void @llvm.amdgcn.kill(i1 %cmp) 156 ret void 157} 158 159; FIXME: Ideally only one early-exit would be emitted 160define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { 161; SI-LABEL: test_kill_depth_var_x2: 162; SI: ; %bb.0: 163; SI-NEXT: s_mov_b64 s[0:1], exec 164; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 165; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 166; SI-NEXT: s_cbranch_scc0 .LBB5_2 167; SI-NEXT: ; %bb.1: 168; SI-NEXT: s_andn2_b64 exec, exec, vcc 169; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 170; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 171; SI-NEXT: s_cbranch_scc0 .LBB5_2 172; SI-NEXT: s_endpgm 173; SI-NEXT: .LBB5_2: 174; SI-NEXT: s_mov_b64 exec, 0 175; SI-NEXT: exp null off, off, off, off done vm 176; SI-NEXT: s_endpgm 177; 178; GFX10-WAVE64-LABEL: test_kill_depth_var_x2: 179; GFX10-WAVE64: ; %bb.0: 180; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 181; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 182; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 183; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2 184; GFX10-WAVE64-NEXT: ; %bb.1: 185; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 186; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 187; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 188; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2 189; GFX10-WAVE64-NEXT: s_endpgm 190; GFX10-WAVE64-NEXT: .LBB5_2: 191; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 192; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 193; GFX10-WAVE64-NEXT: s_endpgm 194; 195; GFX10-WAVE32-LABEL: test_kill_depth_var_x2: 196; GFX10-WAVE32: ; %bb.0: 197; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 198; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 199; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 200; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2 201; GFX10-WAVE32-NEXT: ; %bb.1: 202; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 203; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1 204; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 205; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2 206; GFX10-WAVE32-NEXT: s_endpgm 207; GFX10-WAVE32-NEXT: .LBB5_2: 208; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 209; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 210; GFX10-WAVE32-NEXT: s_endpgm 211 %cmp.x = fcmp olt float %x, 0.0 212 call void @llvm.amdgcn.kill(i1 %cmp.x) 213 %cmp.y = fcmp olt float %y, 0.0 214 call void @llvm.amdgcn.kill(i1 %cmp.y) 215 ret void 216} 217 218define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { 219; SI-LABEL: test_kill_depth_var_x2_instructions: 220; SI: ; %bb.0: 221; SI-NEXT: s_mov_b64 s[0:1], exec 222; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 223; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 224; SI-NEXT: s_cbranch_scc0 .LBB6_2 225; SI-NEXT: ; %bb.1: 226; SI-NEXT: s_andn2_b64 exec, exec, vcc 227; SI-NEXT: ;;#ASMSTART 228; SI-NEXT: v_mov_b32_e64 v7, -1 229; SI-NEXT: ;;#ASMEND 230; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 231; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 232; SI-NEXT: s_cbranch_scc0 .LBB6_2 233; SI-NEXT: s_endpgm 234; SI-NEXT: .LBB6_2: 235; SI-NEXT: s_mov_b64 exec, 0 236; SI-NEXT: exp null off, off, off, off done vm 237; SI-NEXT: s_endpgm 238; 239; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions: 240; GFX10-WAVE64: ; %bb.0: 241; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 242; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 243; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 244; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2 245; GFX10-WAVE64-NEXT: ; %bb.1: 246; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 247; GFX10-WAVE64-NEXT: ;;#ASMSTART 248; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 249; GFX10-WAVE64-NEXT: ;;#ASMEND 250; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 251; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 252; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2 253; GFX10-WAVE64-NEXT: s_endpgm 254; GFX10-WAVE64-NEXT: .LBB6_2: 255; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 256; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 257; GFX10-WAVE64-NEXT: s_endpgm 258; 259; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions: 260; GFX10-WAVE32: ; %bb.0: 261; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 262; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 263; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 264; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2 265; GFX10-WAVE32-NEXT: ; %bb.1: 266; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 267; GFX10-WAVE32-NEXT: ;;#ASMSTART 268; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 269; GFX10-WAVE32-NEXT: ;;#ASMEND 270; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 271; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 272; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2 273; GFX10-WAVE32-NEXT: s_endpgm 274; GFX10-WAVE32-NEXT: .LBB6_2: 275; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 276; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 277; GFX10-WAVE32-NEXT: s_endpgm 278 %cmp.x = fcmp olt float %x, 0.0 279 call void @llvm.amdgcn.kill(i1 %cmp.x) 280 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() 281 %cmp.y = fcmp olt float %y, 0.0 282 call void @llvm.amdgcn.kill(i1 %cmp.y) 283 ret void 284} 285 286; FIXME: why does the skip depend on the asm length in the same block? 287define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 { 288; SI-LABEL: test_kill_control_flow: 289; SI: ; %bb.0: ; %entry 290; SI-NEXT: s_cmp_lg_u32 s0, 0 291; SI-NEXT: s_cbranch_scc0 .LBB7_2 292; SI-NEXT: ; %bb.1: ; %exit 293; SI-NEXT: v_mov_b32_e32 v0, 1.0 294; SI-NEXT: s_branch .LBB7_5 295; SI-NEXT: .LBB7_2: ; %bb 296; SI-NEXT: s_mov_b64 s[2:3], exec 297; SI-NEXT: ;;#ASMSTART 298; SI-NEXT: v_mov_b32_e64 v7, -1 299; SI-NEXT: v_nop_e64 300; SI-NEXT: v_nop_e64 301; SI-NEXT: v_nop_e64 302; SI-NEXT: v_nop_e64 303; SI-NEXT: v_nop_e64 304; SI-NEXT: v_nop_e64 305; SI-NEXT: v_nop_e64 306; SI-NEXT: v_nop_e64 307; SI-NEXT: v_nop_e64 308; SI-NEXT: v_nop_e64 309; SI-NEXT: ;;#ASMEND 310; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 311; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 312; SI-NEXT: s_cbranch_scc0 .LBB7_4 313; SI-NEXT: ; %bb.3: ; %bb 314; SI-NEXT: s_andn2_b64 exec, exec, vcc 315; SI-NEXT: v_mov_b32_e32 v0, 1.0 316; SI-NEXT: s_branch .LBB7_5 317; SI-NEXT: .LBB7_4: 318; SI-NEXT: s_mov_b64 exec, 0 319; SI-NEXT: exp null off, off, off, off done vm 320; SI-NEXT: s_endpgm 321; SI-NEXT: .LBB7_5: 322; 323; GFX10-WAVE64-LABEL: test_kill_control_flow: 324; GFX10-WAVE64: ; %bb.0: ; %entry 325; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 326; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_2 327; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit 328; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0 329; GFX10-WAVE64-NEXT: s_branch .LBB7_5 330; GFX10-WAVE64-NEXT: .LBB7_2: ; %bb 331; GFX10-WAVE64-NEXT: ;;#ASMSTART 332; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 333; GFX10-WAVE64-NEXT: v_nop_e64 334; GFX10-WAVE64-NEXT: v_nop_e64 335; GFX10-WAVE64-NEXT: v_nop_e64 336; GFX10-WAVE64-NEXT: v_nop_e64 337; GFX10-WAVE64-NEXT: v_nop_e64 338; GFX10-WAVE64-NEXT: v_nop_e64 339; GFX10-WAVE64-NEXT: v_nop_e64 340; GFX10-WAVE64-NEXT: v_nop_e64 341; GFX10-WAVE64-NEXT: v_nop_e64 342; GFX10-WAVE64-NEXT: v_nop_e64 343; GFX10-WAVE64-NEXT: ;;#ASMEND 344; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 345; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 346; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 347; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_4 348; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb 349; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 350; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0 351; GFX10-WAVE64-NEXT: s_branch .LBB7_5 352; GFX10-WAVE64-NEXT: .LBB7_4: 353; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 354; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 355; GFX10-WAVE64-NEXT: s_endpgm 356; GFX10-WAVE64-NEXT: .LBB7_5: 357; 358; GFX10-WAVE32-LABEL: test_kill_control_flow: 359; GFX10-WAVE32: ; %bb.0: ; %entry 360; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 361; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_2 362; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit 363; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0 364; GFX10-WAVE32-NEXT: s_branch .LBB7_5 365; GFX10-WAVE32-NEXT: .LBB7_2: ; %bb 366; GFX10-WAVE32-NEXT: ;;#ASMSTART 367; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 368; GFX10-WAVE32-NEXT: v_nop_e64 369; GFX10-WAVE32-NEXT: v_nop_e64 370; GFX10-WAVE32-NEXT: v_nop_e64 371; GFX10-WAVE32-NEXT: v_nop_e64 372; GFX10-WAVE32-NEXT: v_nop_e64 373; GFX10-WAVE32-NEXT: v_nop_e64 374; GFX10-WAVE32-NEXT: v_nop_e64 375; GFX10-WAVE32-NEXT: v_nop_e64 376; GFX10-WAVE32-NEXT: v_nop_e64 377; GFX10-WAVE32-NEXT: v_nop_e64 378; GFX10-WAVE32-NEXT: ;;#ASMEND 379; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 380; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 381; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 382; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_4 383; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb 384; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 385; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0 386; GFX10-WAVE32-NEXT: s_branch .LBB7_5 387; GFX10-WAVE32-NEXT: .LBB7_4: 388; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 389; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 390; GFX10-WAVE32-NEXT: s_endpgm 391; GFX10-WAVE32-NEXT: .LBB7_5: 392entry: 393 %cmp = icmp eq i32 %arg, 0 394 br i1 %cmp, label %bb, label %exit 395 396bb: 397 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 398 v_nop_e64 399 v_nop_e64 400 v_nop_e64 401 v_nop_e64 402 v_nop_e64 403 v_nop_e64 404 v_nop_e64 405 v_nop_e64 406 v_nop_e64 407 v_nop_e64", "={v7}"() 408 %cmp.var = fcmp olt float %var, 0.0 409 ; TODO: We could do an early-exit here (the branch above is uniform!) 410 call void @llvm.amdgcn.kill(i1 %cmp.var) 411 br label %exit 412 413exit: 414 ret float 1.0 415} 416 417define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 { 418; SI-LABEL: test_kill_control_flow_remainder: 419; SI: ; %bb.0: ; %entry 420; SI-NEXT: s_cmp_lg_u32 s0, 0 421; SI-NEXT: v_mov_b32_e32 v9, 0 422; SI-NEXT: s_cbranch_scc1 .LBB8_3 423; SI-NEXT: ; %bb.1: ; %bb 424; SI-NEXT: s_mov_b64 s[2:3], exec 425; SI-NEXT: ;;#ASMSTART 426; SI-NEXT: v_mov_b32_e64 v7, -1 427; SI-NEXT: v_nop_e64 428; SI-NEXT: v_nop_e64 429; SI-NEXT: v_nop_e64 430; SI-NEXT: v_nop_e64 431; SI-NEXT: v_nop_e64 432; SI-NEXT: v_nop_e64 433; SI-NEXT: v_nop_e64 434; SI-NEXT: v_nop_e64 435; SI-NEXT: v_nop_e64 436; SI-NEXT: v_nop_e64 437; SI-NEXT: v_nop_e64 438; SI-NEXT: ;;#ASMEND 439; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 440; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 441; SI-NEXT: ;;#ASMSTART 442; SI-NEXT: v_mov_b32_e64 v8, -1 443; SI-NEXT: ;;#ASMEND 444; SI-NEXT: s_cbranch_scc0 .LBB8_4 445; SI-NEXT: ; %bb.2: ; %bb 446; SI-NEXT: s_andn2_b64 exec, exec, vcc 447; SI-NEXT: s_mov_b32 s3, 0xf000 448; SI-NEXT: s_mov_b32 s2, -1 449; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 450; SI-NEXT: s_waitcnt vmcnt(0) 451; SI-NEXT: ;;#ASMSTART 452; SI-NEXT: v_mov_b32_e64 v9, -2 453; SI-NEXT: ;;#ASMEND 454; SI-NEXT: .LBB8_3: ; %exit 455; SI-NEXT: s_mov_b32 s3, 0xf000 456; SI-NEXT: s_mov_b32 s2, -1 457; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 458; SI-NEXT: s_endpgm 459; SI-NEXT: .LBB8_4: 460; SI-NEXT: s_mov_b64 exec, 0 461; SI-NEXT: exp null off, off, off, off done vm 462; SI-NEXT: s_endpgm 463; 464; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder: 465; GFX10-WAVE64: ; %bb.0: ; %entry 466; GFX10-WAVE64-NEXT: v_mov_b32_e32 v9, 0 467; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 468; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_2 469; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit 470; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off 471; GFX10-WAVE64-NEXT: s_endpgm 472; GFX10-WAVE64-NEXT: .LBB8_2: ; %bb 473; GFX10-WAVE64-NEXT: ;;#ASMSTART 474; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 475; GFX10-WAVE64-NEXT: v_nop_e64 476; GFX10-WAVE64-NEXT: v_nop_e64 477; GFX10-WAVE64-NEXT: v_nop_e64 478; GFX10-WAVE64-NEXT: v_nop_e64 479; GFX10-WAVE64-NEXT: v_nop_e64 480; GFX10-WAVE64-NEXT: v_nop_e64 481; GFX10-WAVE64-NEXT: v_nop_e64 482; GFX10-WAVE64-NEXT: v_nop_e64 483; GFX10-WAVE64-NEXT: v_nop_e64 484; GFX10-WAVE64-NEXT: v_nop_e64 485; GFX10-WAVE64-NEXT: v_nop_e64 486; GFX10-WAVE64-NEXT: ;;#ASMEND 487; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 488; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 489; GFX10-WAVE64-NEXT: ;;#ASMSTART 490; GFX10-WAVE64-NEXT: v_mov_b32_e64 v8, -1 491; GFX10-WAVE64-NEXT: ;;#ASMEND 492; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 493; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_4 494; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb 495; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 496; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v8, off 497; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 498; GFX10-WAVE64-NEXT: ;;#ASMSTART 499; GFX10-WAVE64-NEXT: v_mov_b32_e64 v9, -2 500; GFX10-WAVE64-NEXT: ;;#ASMEND 501; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off 502; GFX10-WAVE64-NEXT: s_endpgm 503; GFX10-WAVE64-NEXT: .LBB8_4: 504; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 505; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 506; GFX10-WAVE64-NEXT: s_endpgm 507; 508; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder: 509; GFX10-WAVE32: ; %bb.0: ; %entry 510; GFX10-WAVE32-NEXT: v_mov_b32_e32 v9, 0 511; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 512; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_2 513; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit 514; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off 515; GFX10-WAVE32-NEXT: s_endpgm 516; GFX10-WAVE32-NEXT: .LBB8_2: ; %bb 517; GFX10-WAVE32-NEXT: ;;#ASMSTART 518; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 519; GFX10-WAVE32-NEXT: v_nop_e64 520; GFX10-WAVE32-NEXT: v_nop_e64 521; GFX10-WAVE32-NEXT: v_nop_e64 522; GFX10-WAVE32-NEXT: v_nop_e64 523; GFX10-WAVE32-NEXT: v_nop_e64 524; GFX10-WAVE32-NEXT: v_nop_e64 525; GFX10-WAVE32-NEXT: v_nop_e64 526; GFX10-WAVE32-NEXT: v_nop_e64 527; GFX10-WAVE32-NEXT: v_nop_e64 528; GFX10-WAVE32-NEXT: v_nop_e64 529; GFX10-WAVE32-NEXT: v_nop_e64 530; GFX10-WAVE32-NEXT: ;;#ASMEND 531; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 532; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 533; GFX10-WAVE32-NEXT: ;;#ASMSTART 534; GFX10-WAVE32-NEXT: v_mov_b32_e64 v8, -1 535; GFX10-WAVE32-NEXT: ;;#ASMEND 536; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 537; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_4 538; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb 539; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 540; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v8, off 541; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 542; GFX10-WAVE32-NEXT: ;;#ASMSTART 543; GFX10-WAVE32-NEXT: v_mov_b32_e64 v9, -2 544; GFX10-WAVE32-NEXT: ;;#ASMEND 545; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off 546; GFX10-WAVE32-NEXT: s_endpgm 547; GFX10-WAVE32-NEXT: .LBB8_4: 548; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 549; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 550; GFX10-WAVE32-NEXT: s_endpgm 551entry: 552 %cmp = icmp eq i32 %arg, 0 553 br i1 %cmp, label %bb, label %exit 554 555bb: 556 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 557 v_nop_e64 558 v_nop_e64 559 v_nop_e64 560 v_nop_e64 561 v_nop_e64 562 v_nop_e64 563 v_nop_e64 564 v_nop_e64 565 v_nop_e64 566 v_nop_e64 567 v_nop_e64", "={v7}"() 568 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() 569 %cmp.var = fcmp olt float %var, 0.0 570 ; TODO: We could do an early-exit here (the branch above is uniform!) 571 call void @llvm.amdgcn.kill(i1 %cmp.var) 572 store volatile float %live.across, float addrspace(1)* undef 573 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() 574 br label %exit 575 576exit: 577 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ] 578 store float %phi, float addrspace(1)* undef 579 ret void 580} 581 582define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 { 583; SI-LABEL: test_kill_control_flow_return: 584; SI: ; %bb.0: ; %entry 585; SI-NEXT: s_cmp_eq_u32 s0, 1 586; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 587; SI-NEXT: s_mov_b64 s[2:3], exec 588; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec 589; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] 590; SI-NEXT: s_cbranch_scc0 .LBB9_4 591; SI-NEXT: ; %bb.1: ; %entry 592; SI-NEXT: s_and_b64 exec, exec, s[2:3] 593; SI-NEXT: s_cmp_lg_u32 s0, 0 594; SI-NEXT: v_mov_b32_e32 v0, 0 595; SI-NEXT: s_cbranch_scc0 .LBB9_3 596; SI-NEXT: ; %bb.2: ; %exit 597; SI-NEXT: s_branch .LBB9_5 598; SI-NEXT: .LBB9_3: ; %bb 599; SI-NEXT: ;;#ASMSTART 600; SI-NEXT: v_mov_b32_e64 v7, -1 601; SI-NEXT: v_nop_e64 602; SI-NEXT: v_nop_e64 603; SI-NEXT: v_nop_e64 604; SI-NEXT: v_nop_e64 605; SI-NEXT: v_nop_e64 606; SI-NEXT: v_nop_e64 607; SI-NEXT: v_nop_e64 608; SI-NEXT: v_nop_e64 609; SI-NEXT: v_nop_e64 610; SI-NEXT: v_nop_e64 611; SI-NEXT: ;;#ASMEND 612; SI-NEXT: v_mov_b32_e32 v0, v7 613; SI-NEXT: s_branch .LBB9_5 614; SI-NEXT: .LBB9_4: 615; SI-NEXT: s_mov_b64 exec, 0 616; SI-NEXT: exp null off, off, off, off done vm 617; SI-NEXT: s_endpgm 618; SI-NEXT: .LBB9_5: 619; 620; GFX10-WAVE64-LABEL: test_kill_control_flow_return: 621; GFX10-WAVE64: ; %bb.0: ; %entry 622; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1 623; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 624; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0 625; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec 626; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] 627; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4 628; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry 629; GFX10-WAVE64-NEXT: s_and_b64 exec, exec, s[2:3] 630; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 0 631; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 632; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_3 633; GFX10-WAVE64-NEXT: ; %bb.2: ; %exit 634; GFX10-WAVE64-NEXT: s_branch .LBB9_5 635; GFX10-WAVE64-NEXT: .LBB9_3: ; %bb 636; GFX10-WAVE64-NEXT: ;;#ASMSTART 637; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 638; GFX10-WAVE64-NEXT: v_nop_e64 639; GFX10-WAVE64-NEXT: v_nop_e64 640; GFX10-WAVE64-NEXT: v_nop_e64 641; GFX10-WAVE64-NEXT: v_nop_e64 642; GFX10-WAVE64-NEXT: v_nop_e64 643; GFX10-WAVE64-NEXT: v_nop_e64 644; GFX10-WAVE64-NEXT: v_nop_e64 645; GFX10-WAVE64-NEXT: v_nop_e64 646; GFX10-WAVE64-NEXT: v_nop_e64 647; GFX10-WAVE64-NEXT: v_nop_e64 648; GFX10-WAVE64-NEXT: ;;#ASMEND 649; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, v7 650; GFX10-WAVE64-NEXT: s_branch .LBB9_5 651; GFX10-WAVE64-NEXT: .LBB9_4: 652; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 653; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 654; GFX10-WAVE64-NEXT: s_endpgm 655; GFX10-WAVE64-NEXT: .LBB9_5: 656; 657; GFX10-WAVE32-LABEL: test_kill_control_flow_return: 658; GFX10-WAVE32: ; %bb.0: ; %entry 659; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1 660; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 661; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0 662; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo 663; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2 664; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4 665; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry 666; GFX10-WAVE32-NEXT: s_and_b32 exec_lo, exec_lo, s1 667; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 0 668; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 669; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_3 670; GFX10-WAVE32-NEXT: ; %bb.2: ; %exit 671; GFX10-WAVE32-NEXT: s_branch .LBB9_5 672; GFX10-WAVE32-NEXT: .LBB9_3: ; %bb 673; GFX10-WAVE32-NEXT: ;;#ASMSTART 674; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 675; GFX10-WAVE32-NEXT: v_nop_e64 676; GFX10-WAVE32-NEXT: v_nop_e64 677; GFX10-WAVE32-NEXT: v_nop_e64 678; GFX10-WAVE32-NEXT: v_nop_e64 679; GFX10-WAVE32-NEXT: v_nop_e64 680; GFX10-WAVE32-NEXT: v_nop_e64 681; GFX10-WAVE32-NEXT: v_nop_e64 682; GFX10-WAVE32-NEXT: v_nop_e64 683; GFX10-WAVE32-NEXT: v_nop_e64 684; GFX10-WAVE32-NEXT: v_nop_e64 685; GFX10-WAVE32-NEXT: ;;#ASMEND 686; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, v7 687; GFX10-WAVE32-NEXT: s_branch .LBB9_5 688; GFX10-WAVE32-NEXT: .LBB9_4: 689; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 690; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 691; GFX10-WAVE32-NEXT: s_endpgm 692; GFX10-WAVE32-NEXT: .LBB9_5: 693entry: 694 %kill = icmp eq i32 %arg, 1 695 %cmp = icmp eq i32 %arg, 0 696 call void @llvm.amdgcn.kill(i1 %kill) 697 br i1 %cmp, label %bb, label %exit 698 699bb: 700 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 701 v_nop_e64 702 v_nop_e64 703 v_nop_e64 704 v_nop_e64 705 v_nop_e64 706 v_nop_e64 707 v_nop_e64 708 v_nop_e64 709 v_nop_e64 710 v_nop_e64", "={v7}"() 711 br label %exit 712 713exit: 714 %ret = phi float [ %var, %bb ], [ 0.0, %entry ] 715 ret float %ret 716} 717 718define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { 719; SI-LABEL: test_kill_divergent_loop: 720; SI: ; %bb.0: ; %entry 721; SI-NEXT: s_mov_b64 s[0:1], exec 722; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 723; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 724; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3] 725; SI-NEXT: s_cbranch_execz .LBB10_4 726; SI-NEXT: ; %bb.1: ; %bb.preheader 727; SI-NEXT: s_mov_b32 s3, 0xf000 728; SI-NEXT: s_mov_b32 s2, -1 729; SI-NEXT: .LBB10_2: ; %bb 730; SI-NEXT: ; =>This Inner Loop Header: Depth=1 731; SI-NEXT: ;;#ASMSTART 732; SI-NEXT: v_mov_b32_e64 v7, -1 733; SI-NEXT: v_nop_e64 734; SI-NEXT: v_nop_e64 735; SI-NEXT: v_nop_e64 736; SI-NEXT: v_nop_e64 737; SI-NEXT: v_nop_e64 738; SI-NEXT: v_nop_e64 739; SI-NEXT: v_nop_e64 740; SI-NEXT: v_nop_e64 741; SI-NEXT: v_nop_e64 742; SI-NEXT: v_nop_e64 743; SI-NEXT: ;;#ASMEND 744; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 745; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 746; SI-NEXT: s_cbranch_scc0 .LBB10_5 747; SI-NEXT: ; %bb.3: ; %bb 748; SI-NEXT: ; in Loop: Header=BB10_2 Depth=1 749; SI-NEXT: s_andn2_b64 exec, exec, vcc 750; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 751; SI-NEXT: s_waitcnt vmcnt(0) 752; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 753; SI-NEXT: s_and_b64 vcc, exec, vcc 754; SI-NEXT: s_cbranch_vccnz .LBB10_2 755; SI-NEXT: .LBB10_4: ; %Flow1 756; SI-NEXT: s_or_b64 exec, exec, s[4:5] 757; SI-NEXT: s_mov_b32 s3, 0xf000 758; SI-NEXT: s_mov_b32 s2, -1 759; SI-NEXT: v_mov_b32_e32 v0, 8 760; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 761; SI-NEXT: s_waitcnt vmcnt(0) 762; SI-NEXT: s_endpgm 763; SI-NEXT: .LBB10_5: 764; SI-NEXT: s_mov_b64 exec, 0 765; SI-NEXT: exp null off, off, off, off done vm 766; SI-NEXT: s_endpgm 767; 768; GFX10-WAVE64-LABEL: test_kill_divergent_loop: 769; GFX10-WAVE64: ; %bb.0: ; %entry 770; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 771; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 772; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 773; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 774; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB10_3 775; GFX10-WAVE64-NEXT: .LBB10_1: ; %bb 776; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 777; GFX10-WAVE64-NEXT: ;;#ASMSTART 778; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 779; GFX10-WAVE64-NEXT: v_nop_e64 780; GFX10-WAVE64-NEXT: v_nop_e64 781; GFX10-WAVE64-NEXT: v_nop_e64 782; GFX10-WAVE64-NEXT: v_nop_e64 783; GFX10-WAVE64-NEXT: v_nop_e64 784; GFX10-WAVE64-NEXT: v_nop_e64 785; GFX10-WAVE64-NEXT: v_nop_e64 786; GFX10-WAVE64-NEXT: v_nop_e64 787; GFX10-WAVE64-NEXT: v_nop_e64 788; GFX10-WAVE64-NEXT: v_nop_e64 789; GFX10-WAVE64-NEXT: ;;#ASMEND 790; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 791; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 792; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB10_4 793; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb 794; GFX10-WAVE64-NEXT: ; in Loop: Header=BB10_1 Depth=1 795; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 796; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc 797; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 798; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 799; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc 800; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1 801; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1 802; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3] 803; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 8 804; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 805; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 806; GFX10-WAVE64-NEXT: s_endpgm 807; GFX10-WAVE64-NEXT: .LBB10_4: 808; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 809; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 810; GFX10-WAVE64-NEXT: s_endpgm 811; 812; GFX10-WAVE32-LABEL: test_kill_divergent_loop: 813; GFX10-WAVE32: ; %bb.0: ; %entry 814; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 815; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 816; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 817; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 818; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3 819; GFX10-WAVE32-NEXT: .LBB10_1: ; %bb 820; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 821; GFX10-WAVE32-NEXT: ;;#ASMSTART 822; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 823; GFX10-WAVE32-NEXT: v_nop_e64 824; GFX10-WAVE32-NEXT: v_nop_e64 825; GFX10-WAVE32-NEXT: v_nop_e64 826; GFX10-WAVE32-NEXT: v_nop_e64 827; GFX10-WAVE32-NEXT: v_nop_e64 828; GFX10-WAVE32-NEXT: v_nop_e64 829; GFX10-WAVE32-NEXT: v_nop_e64 830; GFX10-WAVE32-NEXT: v_nop_e64 831; GFX10-WAVE32-NEXT: v_nop_e64 832; GFX10-WAVE32-NEXT: v_nop_e64 833; GFX10-WAVE32-NEXT: ;;#ASMEND 834; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 835; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 836; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB10_4 837; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb 838; GFX10-WAVE32-NEXT: ; in Loop: Header=BB10_1 Depth=1 839; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 840; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc 841; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 842; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 843; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo 844; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1 845; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1 846; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1 847; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 8 848; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 849; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 850; GFX10-WAVE32-NEXT: s_endpgm 851; GFX10-WAVE32-NEXT: .LBB10_4: 852; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 853; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 854; GFX10-WAVE32-NEXT: s_endpgm 855entry: 856 %cmp = icmp eq i32 %arg, 0 857 br i1 %cmp, label %bb, label %exit 858 859bb: 860 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 861 v_nop_e64 862 v_nop_e64 863 v_nop_e64 864 v_nop_e64 865 v_nop_e64 866 v_nop_e64 867 v_nop_e64 868 v_nop_e64 869 v_nop_e64 870 v_nop_e64", "={v7}"() 871 %cmp.var = fcmp olt float %var, 0.0 872 call void @llvm.amdgcn.kill(i1 %cmp.var) 873 %vgpr = load volatile i32, i32 addrspace(1)* undef 874 %loop.cond = icmp eq i32 %vgpr, 0 875 br i1 %loop.cond, label %bb, label %exit 876 877exit: 878 store volatile i32 8, i32 addrspace(1)* undef 879 ret void 880} 881 882; bug 28550 883define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 { 884; SI-LABEL: phi_use_def_before_kill: 885; SI: ; %bb.0: ; %bb 886; SI-NEXT: v_add_f32_e64 v1, s0, 1.0 887; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 888; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 889; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 890; SI-NEXT: s_andn2_b64 exec, exec, vcc 891; SI-NEXT: s_cbranch_scc0 .LBB11_6 892; SI-NEXT: ; %bb.1: ; %bb 893; SI-NEXT: s_andn2_b64 exec, exec, vcc 894; SI-NEXT: s_cbranch_scc0 .LBB11_3 895; SI-NEXT: ; %bb.2: ; %bb8 896; SI-NEXT: s_mov_b32 s3, 0xf000 897; SI-NEXT: s_mov_b32 s2, -1 898; SI-NEXT: v_mov_b32_e32 v0, 8 899; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 900; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 901; SI-NEXT: v_mov_b32_e32 v0, 4.0 902; SI-NEXT: .LBB11_3: ; %phibb 903; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 904; SI-NEXT: s_and_b64 vcc, exec, vcc 905; SI-NEXT: s_cbranch_vccz .LBB11_5 906; SI-NEXT: ; %bb.4: ; %bb10 907; SI-NEXT: s_mov_b32 s3, 0xf000 908; SI-NEXT: s_mov_b32 s2, -1 909; SI-NEXT: v_mov_b32_e32 v0, 9 910; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 911; SI-NEXT: s_waitcnt vmcnt(0) 912; SI-NEXT: .LBB11_5: ; %end 913; SI-NEXT: s_endpgm 914; SI-NEXT: .LBB11_6: 915; SI-NEXT: s_mov_b64 exec, 0 916; SI-NEXT: exp null off, off, off, off done vm 917; SI-NEXT: s_endpgm 918; 919; GFX10-WAVE64-LABEL: phi_use_def_before_kill: 920; GFX10-WAVE64: ; %bb.0: ; %bb 921; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0 922; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 923; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 924; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 925; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 926; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6 927; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb 928; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 929; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3 930; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8 931; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8 932; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0 933; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off 934; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 935; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb 936; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 937; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc 938; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5 939; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10 940; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 941; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 942; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 943; GFX10-WAVE64-NEXT: .LBB11_5: ; %end 944; GFX10-WAVE64-NEXT: s_endpgm 945; GFX10-WAVE64-NEXT: .LBB11_6: 946; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 947; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 948; GFX10-WAVE64-NEXT: s_endpgm 949; 950; GFX10-WAVE32-LABEL: phi_use_def_before_kill: 951; GFX10-WAVE32: ; %bb.0: ; %bb 952; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0 953; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1 954; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo 955; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1 956; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 957; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6 958; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb 959; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 960; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3 961; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8 962; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8 963; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0 964; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off 965; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 966; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb 967; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 968; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo 969; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5 970; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10 971; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 972; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 973; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 974; GFX10-WAVE32-NEXT: .LBB11_5: ; %end 975; GFX10-WAVE32-NEXT: s_endpgm 976; GFX10-WAVE32-NEXT: .LBB11_6: 977; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 978; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 979; GFX10-WAVE32-NEXT: s_endpgm 980bb: 981 %tmp = fadd float %x, 1.000000e+00 982 %tmp1 = fcmp olt float 0.000000e+00, %tmp 983 %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00 984 %cmp.tmp2 = fcmp olt float %tmp2, 0.0 985 call void @llvm.amdgcn.kill(i1 %cmp.tmp2) 986 br i1 undef, label %phibb, label %bb8 987 988phibb: 989 %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ] 990 %tmp6 = fcmp oeq float %tmp5, 0.000000e+00 991 br i1 %tmp6, label %bb10, label %end 992 993bb8: 994 store volatile i32 8, i32 addrspace(1)* undef 995 br label %phibb 996 997bb10: 998 store volatile i32 9, i32 addrspace(1)* undef 999 br label %end 1000 1001end: 1002 ret void 1003} 1004 1005define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 { 1006; SI-LABEL: no_skip_no_successors: 1007; SI: ; %bb.0: ; %bb 1008; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1009; SI-NEXT: s_and_b64 vcc, exec, s[4:5] 1010; SI-NEXT: s_cbranch_vccz .LBB12_3 1011; SI-NEXT: ; %bb.1: ; %bb6 1012; SI-NEXT: s_mov_b64 s[2:3], exec 1013; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1014; SI-NEXT: s_cbranch_scc0 .LBB12_5 1015; SI-NEXT: ; %bb.2: ; %bb6 1016; SI-NEXT: s_mov_b64 exec, 0 1017; SI-NEXT: .LBB12_3: ; %bb3 1018; SI-NEXT: v_mov_b32_e32 v0, 0x3e7ae148 1019; SI-NEXT: v_cmp_nge_f32_e32 vcc, s0, v0 1020; SI-NEXT: s_and_b64 vcc, exec, vcc 1021; SI-NEXT: ; %bb.4: ; %bb5 1022; SI-NEXT: .LBB12_5: 1023; SI-NEXT: s_mov_b64 exec, 0 1024; SI-NEXT: exp null off, off, off, off done vm 1025; SI-NEXT: s_endpgm 1026; 1027; GFX10-WAVE64-LABEL: no_skip_no_successors: 1028; GFX10-WAVE64: ; %bb.0: ; %bb 1029; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1030; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[4:5] 1031; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3 1032; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb6 1033; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1034; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1035; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_5 1036; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb6 1037; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1038; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3 1039; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 1040; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1] 1041; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb5 1042; GFX10-WAVE64-NEXT: .LBB12_5: 1043; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1044; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1045; GFX10-WAVE64-NEXT: s_endpgm 1046; 1047; GFX10-WAVE32-LABEL: no_skip_no_successors: 1048; GFX10-WAVE32: ; %bb.0: ; %bb 1049; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0 1050; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1 1051; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3 1052; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb6 1053; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo 1054; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, exec_lo 1055; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_5 1056; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb6 1057; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1058; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3 1059; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0 1060; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s0 1061; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb5 1062; GFX10-WAVE32-NEXT: .LBB12_5: 1063; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1064; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1065; GFX10-WAVE32-NEXT: s_endpgm 1066bb: 1067 %tmp = fcmp ult float %arg1, 0.000000e+00 1068 %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000 1069 br i1 %tmp, label %bb6, label %bb3 1070 1071bb3: ; preds = %bb 1072 br i1 %tmp2, label %bb5, label %bb4 1073 1074bb4: ; preds = %bb3 1075 br i1 true, label %bb5, label %bb7 1076 1077bb5: ; preds = %bb4, %bb3 1078 unreachable 1079 1080bb6: ; preds = %bb 1081 call void @llvm.amdgcn.kill(i1 false) 1082 unreachable 1083 1084bb7: ; preds = %bb4 1085 ret void 1086} 1087 1088define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 { 1089; SI-LABEL: if_after_kill_block: 1090; SI: ; %bb.0: ; %bb 1091; SI-NEXT: s_mov_b64 s[2:3], exec 1092; SI-NEXT: s_wqm_b64 exec, exec 1093; SI-NEXT: s_mov_b32 s0, 0 1094; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 1095; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1096; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1097; SI-NEXT: s_cbranch_execz .LBB13_3 1098; SI-NEXT: ; %bb.1: ; %bb3 1099; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1100; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 1101; SI-NEXT: s_cbranch_scc0 .LBB13_6 1102; SI-NEXT: ; %bb.2: ; %bb3 1103; SI-NEXT: s_andn2_b64 exec, exec, vcc 1104; SI-NEXT: .LBB13_3: ; %bb4 1105; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1106; SI-NEXT: s_mov_b32 s1, s0 1107; SI-NEXT: s_mov_b32 s2, s0 1108; SI-NEXT: s_mov_b32 s3, s0 1109; SI-NEXT: s_mov_b32 s4, s0 1110; SI-NEXT: s_mov_b32 s5, s0 1111; SI-NEXT: s_mov_b32 s6, s0 1112; SI-NEXT: s_mov_b32 s7, s0 1113; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 1114; SI-NEXT: s_waitcnt vmcnt(0) 1115; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 1116; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc 1117; SI-NEXT: s_cbranch_execz .LBB13_5 1118; SI-NEXT: ; %bb.4: ; %bb8 1119; SI-NEXT: s_mov_b32 s3, 0xf000 1120; SI-NEXT: s_mov_b32 s2, -1 1121; SI-NEXT: v_mov_b32_e32 v0, 9 1122; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1123; SI-NEXT: s_waitcnt vmcnt(0) 1124; SI-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1125; SI-NEXT: s_endpgm 1126; SI-NEXT: .LBB13_6: 1127; SI-NEXT: s_mov_b64 exec, 0 1128; SI-NEXT: exp null off, off, off, off done vm 1129; SI-NEXT: s_endpgm 1130; 1131; GFX10-WAVE64-LABEL: if_after_kill_block: 1132; GFX10-WAVE64: ; %bb.0: ; %bb 1133; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1134; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec 1135; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 1136; GFX10-WAVE64-NEXT: s_mov_b32 s0, 0 1137; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1138; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1139; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3 1140; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3 1141; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1142; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 1143; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6 1144; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3 1145; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1146; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4 1147; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1148; GFX10-WAVE64-NEXT: s_mov_b32 s1, s0 1149; GFX10-WAVE64-NEXT: s_mov_b32 s2, s0 1150; GFX10-WAVE64-NEXT: s_mov_b32 s3, s0 1151; GFX10-WAVE64-NEXT: s_mov_b32 s4, s0 1152; GFX10-WAVE64-NEXT: s_mov_b32 s5, s0 1153; GFX10-WAVE64-NEXT: s_mov_b32 s6, s0 1154; GFX10-WAVE64-NEXT: s_mov_b32 s7, s0 1155; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1156; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1157; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 1158; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[0:1], vcc 1159; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_5 1160; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb8 1161; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 1162; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1163; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1164; GFX10-WAVE64-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1165; GFX10-WAVE64-NEXT: s_endpgm 1166; GFX10-WAVE64-NEXT: .LBB13_6: 1167; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1168; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1169; GFX10-WAVE64-NEXT: s_endpgm 1170; 1171; GFX10-WAVE32-LABEL: if_after_kill_block: 1172; GFX10-WAVE32: ; %bb.0: ; %bb 1173; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 1174; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo 1175; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1 1176; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0 1177; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s2, vcc_lo 1178; GFX10-WAVE32-NEXT: s_xor_b32 s2, exec_lo, s2 1179; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3 1180; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3 1181; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 1182; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 1183; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6 1184; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3 1185; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1186; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4 1187; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s2 1188; GFX10-WAVE32-NEXT: s_mov_b32 s1, s0 1189; GFX10-WAVE32-NEXT: s_mov_b32 s2, s0 1190; GFX10-WAVE32-NEXT: s_mov_b32 s3, s0 1191; GFX10-WAVE32-NEXT: s_mov_b32 s4, s0 1192; GFX10-WAVE32-NEXT: s_mov_b32 s5, s0 1193; GFX10-WAVE32-NEXT: s_mov_b32 s6, s0 1194; GFX10-WAVE32-NEXT: s_mov_b32 s7, s0 1195; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1196; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1197; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 1198; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s0, vcc_lo 1199; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_5 1200; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb8 1201; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 1202; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1203; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1204; GFX10-WAVE32-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1205; GFX10-WAVE32-NEXT: s_endpgm 1206; GFX10-WAVE32-NEXT: .LBB13_6: 1207; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1208; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1209; GFX10-WAVE32-NEXT: s_endpgm 1210bb: 1211 %tmp = fcmp ult float %arg1, 0.000000e+00 1212 br i1 %tmp, label %bb3, label %bb4 1213 1214bb3: ; preds = %bb 1215 %cmp.arg = fcmp olt float %arg, 0.0 1216 call void @llvm.amdgcn.kill(i1 %cmp.arg) 1217 br label %bb4 1218 1219bb4: ; preds = %bb3, %bb 1220 %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) 1221 %tmp6 = extractelement <4 x float> %tmp5, i32 0 1222 %tmp7 = fcmp une float %tmp6, 0.000000e+00 1223 br i1 %tmp7, label %bb8, label %bb9 1224 1225bb8: ; preds = %bb9, %bb4 1226 store volatile i32 9, i32 addrspace(1)* undef 1227 ret void 1228 1229bb9: ; preds = %bb4 1230 ret void 1231} 1232 1233define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { 1234; SI-LABEL: cbranch_kill: 1235; SI: ; %bb.0: ; %.entry 1236; SI-NEXT: s_mov_b32 s4, 0 1237; SI-NEXT: s_mov_b64 s[0:1], exec 1238; SI-NEXT: v_mov_b32_e32 v2, v1 1239; SI-NEXT: v_mov_b32_e32 v3, v1 1240; SI-NEXT: s_mov_b32 s5, s4 1241; SI-NEXT: s_mov_b32 s6, s4 1242; SI-NEXT: s_mov_b32 s7, s4 1243; SI-NEXT: s_mov_b32 s8, s4 1244; SI-NEXT: s_mov_b32 s9, s4 1245; SI-NEXT: s_mov_b32 s10, s4 1246; SI-NEXT: s_mov_b32 s11, s4 1247; SI-NEXT: image_sample_lz v1, v[1:3], s[4:11], s[0:3] dmask:0x1 da 1248; SI-NEXT: s_waitcnt vmcnt(0) 1249; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 1250; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 1251; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1252; SI-NEXT: s_cbranch_execz .LBB14_3 1253; SI-NEXT: ; %bb.1: ; %kill 1254; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 1255; SI-NEXT: ; implicit-def: $vgpr0 1256; SI-NEXT: ; implicit-def: $vgpr1 1257; SI-NEXT: s_cbranch_scc0 .LBB14_6 1258; SI-NEXT: ; %bb.2: ; %kill 1259; SI-NEXT: s_mov_b64 exec, 0 1260; SI-NEXT: .LBB14_3: ; %Flow 1261; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1262; SI-NEXT: ; implicit-def: $vgpr2 1263; SI-NEXT: s_xor_b64 exec, exec, s[0:1] 1264; SI-NEXT: ; %bb.4: ; %live 1265; SI-NEXT: v_mul_f32_e32 v2, v0, v1 1266; SI-NEXT: ; %bb.5: ; %export 1267; SI-NEXT: s_or_b64 exec, exec, s[0:1] 1268; SI-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1269; SI-NEXT: s_endpgm 1270; SI-NEXT: .LBB14_6: 1271; SI-NEXT: s_mov_b64 exec, 0 1272; SI-NEXT: exp null off, off, off, off done vm 1273; SI-NEXT: s_endpgm 1274; 1275; GFX10-WAVE64-LABEL: cbranch_kill: 1276; GFX10-WAVE64: ; %bb.0: ; %.entry 1277; GFX10-WAVE64-NEXT: s_mov_b32 s4, 0 1278; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 1279; GFX10-WAVE64-NEXT: s_mov_b32 s5, s4 1280; GFX10-WAVE64-NEXT: s_mov_b32 s6, s4 1281; GFX10-WAVE64-NEXT: s_mov_b32 s7, s4 1282; GFX10-WAVE64-NEXT: s_mov_b32 s8, s4 1283; GFX10-WAVE64-NEXT: s_mov_b32 s9, s4 1284; GFX10-WAVE64-NEXT: s_mov_b32 s10, s4 1285; GFX10-WAVE64-NEXT: s_mov_b32 s11, s4 1286; GFX10-WAVE64-NEXT: image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1287; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1288; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 1289; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 1290; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1291; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB14_3 1292; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill 1293; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 1294; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr0 1295; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr1 1296; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB14_6 1297; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill 1298; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1299; GFX10-WAVE64-NEXT: .LBB14_3: ; %Flow 1300; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1301; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2 1302; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1] 1303; GFX10-WAVE64-NEXT: ; %bb.4: ; %live 1304; GFX10-WAVE64-NEXT: v_mul_f32_e32 v2, v0, v1 1305; GFX10-WAVE64-NEXT: ; %bb.5: ; %export 1306; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] 1307; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1308; GFX10-WAVE64-NEXT: s_endpgm 1309; GFX10-WAVE64-NEXT: .LBB14_6: 1310; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1311; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1312; GFX10-WAVE64-NEXT: s_endpgm 1313; 1314; GFX10-WAVE32-LABEL: cbranch_kill: 1315; GFX10-WAVE32: ; %bb.0: ; %.entry 1316; GFX10-WAVE32-NEXT: s_mov_b32 s4, 0 1317; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1318; GFX10-WAVE32-NEXT: s_mov_b32 s5, s4 1319; GFX10-WAVE32-NEXT: s_mov_b32 s6, s4 1320; GFX10-WAVE32-NEXT: s_mov_b32 s7, s4 1321; GFX10-WAVE32-NEXT: s_mov_b32 s8, s4 1322; GFX10-WAVE32-NEXT: s_mov_b32 s9, s4 1323; GFX10-WAVE32-NEXT: s_mov_b32 s10, s4 1324; GFX10-WAVE32-NEXT: s_mov_b32 s11, s4 1325; GFX10-WAVE32-NEXT: image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1326; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1327; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1 1328; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1329; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1330; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB14_3 1331; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill 1332; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 1333; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr0 1334; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr1 1335; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB14_6 1336; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill 1337; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1338; GFX10-WAVE32-NEXT: .LBB14_3: ; %Flow 1339; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1 1340; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2 1341; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 1342; GFX10-WAVE32-NEXT: ; %bb.4: ; %live 1343; GFX10-WAVE32-NEXT: v_mul_f32_e32 v2, v0, v1 1344; GFX10-WAVE32-NEXT: ; %bb.5: ; %export 1345; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 1346; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1347; GFX10-WAVE32-NEXT: s_endpgm 1348; GFX10-WAVE32-NEXT: .LBB14_6: 1349; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1350; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1351; GFX10-WAVE32-NEXT: s_endpgm 1352.entry: 1353 %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) 1354 %cond0 = fcmp ugt float %sample, 0.000000e+00 1355 br i1 %cond0, label %live, label %kill 1356 1357kill: 1358 call void @llvm.amdgcn.kill(i1 false) 1359 br label %export 1360 1361live: 1362 %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample 1363 br label %export 1364 1365export: 1366 %proxy = phi float [ undef, %kill ], [ %scale, %live ] 1367 call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3 1368 ret void 1369} 1370 1371 1372define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { 1373; SI-LABEL: complex_loop: 1374; SI: ; %bb.0: ; %.entry 1375; SI-NEXT: s_cmp_lt_i32 s0, 1 1376; SI-NEXT: s_cbranch_scc1 .LBB15_7 1377; SI-NEXT: ; %bb.1: ; %.lr.ph 1378; SI-NEXT: s_mov_b64 s[2:3], exec 1379; SI-NEXT: s_mov_b32 s6, 0 1380; SI-NEXT: s_mov_b64 s[0:1], 0 1381; SI-NEXT: s_branch .LBB15_3 1382; SI-NEXT: .LBB15_2: ; %latch 1383; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1384; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1385; SI-NEXT: s_add_i32 s6, s6, 1 1386; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1387; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1388; SI-NEXT: v_mov_b32_e32 v2, s6 1389; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 1390; SI-NEXT: s_cbranch_execz .LBB15_6 1391; SI-NEXT: .LBB15_3: ; %hdr 1392; SI-NEXT: ; =>This Inner Loop Header: Depth=1 1393; SI-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 1394; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1395; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1396; SI-NEXT: s_cbranch_execz .LBB15_2 1397; SI-NEXT: ; %bb.4: ; %kill 1398; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1399; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1400; SI-NEXT: s_cbranch_scc0 .LBB15_8 1401; SI-NEXT: ; %bb.5: ; %kill 1402; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1403; SI-NEXT: s_mov_b64 exec, 0 1404; SI-NEXT: s_branch .LBB15_2 1405; SI-NEXT: .LBB15_6: ; %Flow 1406; SI-NEXT: s_or_b64 exec, exec, s[0:1] 1407; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1408; SI-NEXT: s_endpgm 1409; SI-NEXT: .LBB15_7: 1410; SI-NEXT: v_mov_b32_e32 v2, -1 1411; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1412; SI-NEXT: s_endpgm 1413; SI-NEXT: .LBB15_8: 1414; SI-NEXT: s_mov_b64 exec, 0 1415; SI-NEXT: exp null off, off, off, off done vm 1416; SI-NEXT: s_endpgm 1417; 1418; GFX10-WAVE64-LABEL: complex_loop: 1419; GFX10-WAVE64: ; %bb.0: ; %.entry 1420; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1 1421; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7 1422; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph 1423; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1424; GFX10-WAVE64-NEXT: s_mov_b32 s6, 0 1425; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], 0 1426; GFX10-WAVE64-NEXT: s_branch .LBB15_3 1427; GFX10-WAVE64-NEXT: .LBB15_2: ; %latch 1428; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1429; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1430; GFX10-WAVE64-NEXT: s_add_i32 s6, s6, 1 1431; GFX10-WAVE64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1432; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, s6 1433; GFX10-WAVE64-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1434; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, s[0:1] 1435; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_6 1436; GFX10-WAVE64-NEXT: .LBB15_3: ; %hdr 1437; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 1438; GFX10-WAVE64-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 1439; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1440; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1441; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_2 1442; GFX10-WAVE64-NEXT: ; %bb.4: ; %kill 1443; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1444; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1445; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB15_8 1446; GFX10-WAVE64-NEXT: ; %bb.5: ; %kill 1447; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1448; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1449; GFX10-WAVE64-NEXT: s_branch .LBB15_2 1450; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow 1451; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] 1452; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1453; GFX10-WAVE64-NEXT: s_endpgm 1454; GFX10-WAVE64-NEXT: .LBB15_7: 1455; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1 1456; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1457; GFX10-WAVE64-NEXT: s_endpgm 1458; GFX10-WAVE64-NEXT: .LBB15_8: 1459; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1460; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1461; GFX10-WAVE64-NEXT: s_endpgm 1462; 1463; GFX10-WAVE32-LABEL: complex_loop: 1464; GFX10-WAVE32: ; %bb.0: ; %.entry 1465; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1 1466; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7 1467; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph 1468; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 1469; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0 1470; GFX10-WAVE32-NEXT: s_mov_b32 s2, 0 1471; GFX10-WAVE32-NEXT: s_branch .LBB15_3 1472; GFX10-WAVE32-NEXT: .LBB15_2: ; %latch 1473; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1474; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s3 1475; GFX10-WAVE32-NEXT: s_add_i32 s2, s2, 1 1476; GFX10-WAVE32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1 1477; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, s2 1478; GFX10-WAVE32-NEXT: s_or_b32 s0, vcc_lo, s0 1479; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 1480; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_6 1481; GFX10-WAVE32-NEXT: .LBB15_3: ; %hdr 1482; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 1483; GFX10-WAVE32-NEXT: v_cmp_gt_u32_e32 vcc_lo, s2, v0 1484; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s3, vcc_lo 1485; GFX10-WAVE32-NEXT: s_xor_b32 s3, exec_lo, s3 1486; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_2 1487; GFX10-WAVE32-NEXT: ; %bb.4: ; %kill 1488; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1489; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, exec_lo 1490; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB15_8 1491; GFX10-WAVE32-NEXT: ; %bb.5: ; %kill 1492; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1493; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1494; GFX10-WAVE32-NEXT: s_branch .LBB15_2 1495; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow 1496; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 1497; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1498; GFX10-WAVE32-NEXT: s_endpgm 1499; GFX10-WAVE32-NEXT: .LBB15_7: 1500; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1 1501; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1502; GFX10-WAVE32-NEXT: s_endpgm 1503; GFX10-WAVE32-NEXT: .LBB15_8: 1504; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1505; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1506; GFX10-WAVE32-NEXT: s_endpgm 1507.entry: 1508 %flaga = icmp sgt i32 %cmpa, 0 1509 br i1 %flaga, label %.lr.ph, label %._crit_edge 1510 1511.lr.ph: 1512 br label %hdr 1513 1514hdr: 1515 %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ] 1516 %flagb = icmp ugt i32 %ctr, %cmpb 1517 br i1 %flagb, label %kill, label %latch 1518 1519kill: 1520 call void @llvm.amdgcn.kill(i1 false) 1521 br label %latch 1522 1523latch: 1524 %ctr.next = add nuw nsw i32 %ctr, 1 1525 %flagc = icmp slt i32 %ctr.next, %cmpc 1526 br i1 %flagc, label %hdr, label %._crit_edge 1527 1528._crit_edge: 1529 %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ] 1530 %out = bitcast i32 %tmp to float 1531 call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true) 1532 ret void 1533} 1534 1535define void @skip_mode_switch(i32 %arg) { 1536; SI-LABEL: skip_mode_switch: 1537; SI: ; %bb.0: ; %entry 1538; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1539; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1540; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1541; SI-NEXT: s_cbranch_execz .LBB16_2 1542; SI-NEXT: ; %bb.1: ; %bb.0 1543; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1544; SI-NEXT: .LBB16_2: ; %bb.1 1545; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1546; SI-NEXT: s_setpc_b64 s[30:31] 1547; 1548; GFX10-WAVE64-LABEL: skip_mode_switch: 1549; GFX10-WAVE64: ; %bb.0: ; %entry 1550; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1551; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1552; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1553; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1554; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB16_2 1555; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb.0 1556; GFX10-WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1557; GFX10-WAVE64-NEXT: .LBB16_2: ; %bb.1 1558; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1559; GFX10-WAVE64-NEXT: s_setpc_b64 s[30:31] 1560; 1561; GFX10-WAVE32-LABEL: skip_mode_switch: 1562; GFX10-WAVE32: ; %bb.0: ; %entry 1563; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1564; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1565; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1566; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo 1567; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2 1568; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb.0 1569; GFX10-WAVE32-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1570; GFX10-WAVE32-NEXT: .LBB16_2: ; %bb.1 1571; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s4 1572; GFX10-WAVE32-NEXT: s_setpc_b64 s[30:31] 1573entry: 1574 %cmp = icmp eq i32 %arg, 0 1575 br i1 %cmp, label %bb.0, label %bb.1 1576 1577bb.0: 1578 call void @llvm.amdgcn.s.setreg(i32 2049, i32 3) 1579 br label %bb.1 1580 1581bb.1: 1582 ret void 1583} 1584 1585declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3 1586declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 1587declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1588declare void @llvm.amdgcn.kill(i1) #0 1589 1590declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) 1591 1592attributes #0 = { nounwind } 1593attributes #1 = { nounwind readonly } 1594attributes #2 = { nounwind readnone speculatable } 1595attributes #3 = { inaccessiblememonly nounwind writeonly } 1596