1; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-fix-function-bitcasts < %s | FileCheck -check-prefix=OPT %s 3 4; GCN-LABEL: {{^}}test_bitcast_return_type_noinline: 5; GCN: s_getpc_b64 6; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@lo+4 7; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@hi+12 8; GCN: s_swappc_b64 9; OPT-LABEL: @test_bitcast_return_type_noinline( 10; OPT: %val = call i32 @ret_i32_noinline() 11; OPT: bitcast i32 %val to float 12define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 { 13 %val = call float bitcast (i32()* @ret_i32_noinline to float()*)() 14 %op = fadd float %val, 1.0 15 store volatile float %op, float addrspace(1)* undef 16 ret void 17} 18 19; GCN-LABEL: {{^}}test_bitcast_return_type_alwaysinline: 20; GCN-NOT: s_getpc_b64 21; GCN-NOT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_alwaysinline@rel32@lo+4 22; GCN-NOT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_alwaysinline@rel32@hi+12 23; GCN-NOT: s_swappc_b64 24; OPT-LABEL: @test_bitcast_return_type_alwaysinline( 25; OPT: %val = call i32 @ret_i32_alwaysinline() 26; OPT: bitcast i32 %val to float 27define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 { 28 %val = call float bitcast (i32()* @ret_i32_alwaysinline to float()*)() 29 %op = fadd float %val, 1.0 30 store volatile float %op, float addrspace(1)* undef 31 ret void 32} 33 34; GCN-LABEL: {{^}}test_bitcast_argument_type: 35; GCN: s_getpc_b64 36; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 37; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 38; GCN: s_swappc_b64 39; OPT-LABEL: @test_bitcast_argument_type( 40; OPT: %1 = bitcast float 2.000000e+00 to i32 41; OPT: %val = call i32 @ident_i32(i32 %1) 42; OPT-NOT: bitcast i32 %val to float 43define amdgpu_kernel void @test_bitcast_argument_type() #0 { 44 %val = call i32 bitcast (i32(i32)* @ident_i32 to i32(float)*)(float 2.0) 45 %op = add i32 %val, 1 46 store volatile i32 %op, i32 addrspace(1)* undef 47 ret void 48} 49 50; GCN-LABEL: {{^}}test_bitcast_argument_and_return_types: 51; GCN: s_getpc_b64 52; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 53; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 54; GCN: s_swappc_b64 55; OPT-LABEL: @test_bitcast_argument_and_return_types( 56; OPT: %1 = bitcast float 2.000000e+00 to i32 57; OPT: %val = call i32 @ident_i32(i32 %1) 58; OPT: bitcast i32 %val to float 59define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 { 60 %val = call float bitcast (i32(i32)* @ident_i32 to float(float)*)(float 2.0) 61 %op = fadd float %val, 1.0 62 store volatile float %op, float addrspace(1)* undef 63 ret void 64} 65 66; GCN-LABEL: {{^}}use_workitem_id_x: 67; GCN: s_waitcnt 68; GCN-NEXT: v_and_b32_e32 v1, 0x3ff, v31 69; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 70; GCN-NEXT: s_setpc_b64 71define hidden i32 @use_workitem_id_x(i32 %arg0) #0 { 72 %id = call i32 @llvm.amdgcn.workitem.id.x() 73 %op = add i32 %id, %arg0 74 ret i32 %op 75} 76 77; GCN-LABEL: {{^}}test_bitcast_use_workitem_id_x: 78; GCN: s_getpc_b64 79; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@lo+4 80; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@hi+12 81; GCN: v_mov_b32_e32 v0, 9 82; GCN: s_swappc_b64 83; GCN: v_add_f32_e32 84; OPT-LABEL: @use_workitem_id_x( 85; OPT: %val = call i32 @use_workitem_id_x(i32 9) 86; OPT: bitcast i32 %val to float 87define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #0 { 88 %val = call float bitcast (i32(i32)* @use_workitem_id_x to float(i32)*)(i32 9) 89 %op = fadd float %val, 1.0 90 store volatile float %op, float addrspace(1)* undef 91 ret void 92} 93 94; GCN-LABEL: {{^}}test_invoke: 95; GCN: s_getpc_b64 96; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 97; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 98; GCN: s_swappc_b64 99; OPT-LABEL: @test_invoke( 100; OPT: %1 = bitcast float 2.000000e+00 to i32 101; OPT: %val = invoke i32 @ident_i32(i32 %1) 102; OPT-NEXT: to label %continue.split unwind label %broken 103; OPT-LABEL: continue.split: 104; OPT: bitcast i32 %val to float 105@_ZTIi = external global i8* 106declare i32 @__gxx_personality_v0(...) 107define amdgpu_kernel void @test_invoke() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 108 %val = invoke float bitcast (i32(i32)* @ident_i32 to float(float)*)(float 2.0) 109 to label %continue unwind label %broken 110 111broken: 112 landingpad { i8*, i32 } catch i8** @_ZTIi 113 ret void 114 115continue: 116 %op = fadd float %val, 1.0 117 store volatile float %op, float addrspace(1)* undef 118 ret void 119} 120 121; Callees appears last in source file to test that we still lower their 122; arguments before we lower any calls to them. 123 124define hidden i32 @ret_i32_noinline() #0 { 125 ret i32 4 126} 127 128define hidden i32 @ret_i32_alwaysinline() #1 { 129 ret i32 4 130} 131 132define hidden i32 @ident_i32(i32 %i) #0 { 133 ret i32 %i 134} 135 136declare i32 @llvm.amdgcn.workitem.id.x() #2 137 138attributes #0 = { nounwind noinline } 139attributes #1 = { alwaysinline nounwind } 140attributes #2 = { nounwind readnone speculatable } 141