1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs | FileCheck -check-prefix=VI %s 3; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s 4; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 5 6; =================================================================================== 7; V_OR3_B32 8; =================================================================================== 9 10define amdgpu_ps float @or3(i32 %a, i32 %b, i32 %c) { 11; VI-LABEL: or3: 12; VI: ; %bb.0: 13; VI-NEXT: v_or_b32_e32 v0, v0, v1 14; VI-NEXT: v_or_b32_e32 v0, v0, v2 15; VI-NEXT: ; return to shader part epilog 16; 17; GFX9-LABEL: or3: 18; GFX9: ; %bb.0: 19; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 20; GFX9-NEXT: ; return to shader part epilog 21; 22; GFX10-LABEL: or3: 23; GFX10: ; %bb.0: 24; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 25; GFX10-NEXT: ; return to shader part epilog 26 %x = or i32 %a, %b 27 %result = or i32 %x, %c 28 %bc = bitcast i32 %result to float 29 ret float %bc 30} 31 32; ThreeOp instruction variant not used due to Constant Bus Limitations 33; TODO: with reassociation it is possible to replace a v_or_b32_e32 with an s_or_b32 34define amdgpu_ps float @or3_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) { 35; VI-LABEL: or3_vgpr_a: 36; VI: ; %bb.0: 37; VI-NEXT: v_or_b32_e32 v0, s2, v0 38; VI-NEXT: v_or_b32_e32 v0, s3, v0 39; VI-NEXT: ; return to shader part epilog 40; 41; GFX9-LABEL: or3_vgpr_a: 42; GFX9: ; %bb.0: 43; GFX9-NEXT: v_or_b32_e32 v0, s2, v0 44; GFX9-NEXT: v_or_b32_e32 v0, s3, v0 45; GFX9-NEXT: ; return to shader part epilog 46; 47; GFX10-LABEL: or3_vgpr_a: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: v_or3_b32 v0, v0, s2, s3 50; GFX10-NEXT: ; return to shader part epilog 51 %x = or i32 %a, %b 52 %result = or i32 %x, %c 53 %bc = bitcast i32 %result to float 54 ret float %bc 55} 56 57define amdgpu_ps float @or3_vgpr_all2(i32 %a, i32 %b, i32 %c) { 58; VI-LABEL: or3_vgpr_all2: 59; VI: ; %bb.0: 60; VI-NEXT: v_or_b32_e32 v1, v1, v2 61; VI-NEXT: v_or_b32_e32 v0, v0, v1 62; VI-NEXT: ; return to shader part epilog 63; 64; GFX9-LABEL: or3_vgpr_all2: 65; GFX9: ; %bb.0: 66; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 67; GFX9-NEXT: ; return to shader part epilog 68; 69; GFX10-LABEL: or3_vgpr_all2: 70; GFX10: ; %bb.0: 71; GFX10-NEXT: v_or3_b32 v0, v1, v2, v0 72; GFX10-NEXT: ; return to shader part epilog 73 %x = or i32 %b, %c 74 %result = or i32 %a, %x 75 %bc = bitcast i32 %result to float 76 ret float %bc 77} 78 79define amdgpu_ps float @or3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) { 80; VI-LABEL: or3_vgpr_bc: 81; VI: ; %bb.0: 82; VI-NEXT: v_or_b32_e32 v0, s2, v0 83; VI-NEXT: v_or_b32_e32 v0, v0, v1 84; VI-NEXT: ; return to shader part epilog 85; 86; GFX9-LABEL: or3_vgpr_bc: 87; GFX9: ; %bb.0: 88; GFX9-NEXT: v_or3_b32 v0, s2, v0, v1 89; GFX9-NEXT: ; return to shader part epilog 90; 91; GFX10-LABEL: or3_vgpr_bc: 92; GFX10: ; %bb.0: 93; GFX10-NEXT: v_or3_b32 v0, s2, v0, v1 94; GFX10-NEXT: ; return to shader part epilog 95 %x = or i32 %a, %b 96 %result = or i32 %x, %c 97 %bc = bitcast i32 %result to float 98 ret float %bc 99} 100 101define amdgpu_ps float @or3_vgpr_const(i32 %a, i32 %b) { 102; VI-LABEL: or3_vgpr_const: 103; VI: ; %bb.0: 104; VI-NEXT: v_or_b32_e32 v0, v1, v0 105; VI-NEXT: v_or_b32_e32 v0, 64, v0 106; VI-NEXT: ; return to shader part epilog 107; 108; GFX9-LABEL: or3_vgpr_const: 109; GFX9: ; %bb.0: 110; GFX9-NEXT: v_or3_b32 v0, v1, v0, 64 111; GFX9-NEXT: ; return to shader part epilog 112; 113; GFX10-LABEL: or3_vgpr_const: 114; GFX10: ; %bb.0: 115; GFX10-NEXT: v_or3_b32 v0, v1, v0, 64 116; GFX10-NEXT: ; return to shader part epilog 117 %x = or i32 64, %b 118 %result = or i32 %x, %a 119 %bc = bitcast i32 %result to float 120 ret float %bc 121} 122