1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=nvptx-unknown-unknown | FileCheck %s
3
4%struct.St8x4 = type { [4 x i64] }
5
6define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) {
7; CHECK-LABEL: caller_St8x4(
8; CHECK:       {
9; CHECK-NEXT:    .local .align 8 .b8 __local_depot0[32];
10; CHECK-NEXT:    .reg .b32 %SP;
11; CHECK-NEXT:    .reg .b32 %SPL;
12; CHECK-NEXT:    .reg .b32 %r<4>;
13; CHECK-NEXT:    .reg .b64 %rd<17>;
14; CHECK-EMPTY:
15; CHECK-NEXT:  // %bb.0:
16; CHECK-NEXT:    mov.u32 %SPL, __local_depot0;
17; CHECK-NEXT:    cvta.local.u32 %SP, %SPL;
18; CHECK-NEXT:    ld.param.u32 %r1, [caller_St8x4_param_1];
19; CHECK-NEXT:    add.u32 %r3, %SPL, 0;
20; CHECK-NEXT:    ld.param.u64 %rd1, [caller_St8x4_param_0+24];
21; CHECK-NEXT:    ld.param.u64 %rd2, [caller_St8x4_param_0+16];
22; CHECK-NEXT:    ld.param.u64 %rd3, [caller_St8x4_param_0+8];
23; CHECK-NEXT:    ld.param.u64 %rd4, [caller_St8x4_param_0];
24; CHECK-NEXT:    st.local.u64 [%r3], %rd4;
25; CHECK-NEXT:    st.local.u64 [%r3+8], %rd3;
26; CHECK-NEXT:    st.local.u64 [%r3+16], %rd2;
27; CHECK-NEXT:    st.local.u64 [%r3+24], %rd1;
28; CHECK-NEXT:    ld.u64 %rd5, [%SP+8];
29; CHECK-NEXT:    ld.u64 %rd6, [%SP+0];
30; CHECK-NEXT:    ld.u64 %rd7, [%SP+24];
31; CHECK-NEXT:    ld.u64 %rd8, [%SP+16];
32; CHECK-NEXT:    { // callseq 0, 0
33; CHECK-NEXT:    .reg .b32 temp_param_reg;
34; CHECK-NEXT:    .param .align 16 .b8 param0[32];
35; CHECK-NEXT:    st.param.v2.b64 [param0+0], {%rd6, %rd5};
36; CHECK-NEXT:    st.param.v2.b64 [param0+16], {%rd8, %rd7};
37; CHECK-NEXT:    .param .align 16 .b8 retval0[32];
38; CHECK-NEXT:    call.uni (retval0),
39; CHECK-NEXT:    callee_St8x4,
40; CHECK-NEXT:    (
41; CHECK-NEXT:    param0
42; CHECK-NEXT:    );
43; CHECK-NEXT:    ld.param.v2.b64 {%rd9, %rd10}, [retval0+0];
44; CHECK-NEXT:    ld.param.v2.b64 {%rd11, %rd12}, [retval0+16];
45; CHECK-NEXT:    } // callseq 0
46; CHECK-NEXT:    st.u64 [%r1], %rd9;
47; CHECK-NEXT:    st.u64 [%r1+8], %rd10;
48; CHECK-NEXT:    st.u64 [%r1+16], %rd11;
49; CHECK-NEXT:    st.u64 [%r1+24], %rd12;
50; CHECK-NEXT:    ret;
51  %call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2
52  %.fca.0.extract = extractvalue [4 x i64] %call, 0
53  %.fca.1.extract = extractvalue [4 x i64] %call, 1
54  %.fca.2.extract = extractvalue [4 x i64] %call, 2
55  %.fca.3.extract = extractvalue [4 x i64] %call, 3
56  store i64 %.fca.0.extract, ptr %ret, align 8
57  %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
58  store i64 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 8
59  %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16
60  store i64 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 8
61  %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 24
62  store i64 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 8
63  ret void
64}
65
66define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in) {
67; CHECK-LABEL: callee_St8x4(
68; CHECK:         // @callee_St8x4
69; CHECK-NEXT:  {
70; CHECK-NEXT:    .reg .b64 %rd<5>;
71; CHECK-EMPTY:
72; CHECK-NEXT:  // %bb.0:
73; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [callee_St8x4_param_0];
74; CHECK-NEXT:    ld.param.v2.u64 {%rd3, %rd4}, [callee_St8x4_param_0+16];
75; CHECK-NEXT:    st.param.v2.b64 [func_retval0+0], {%rd1, %rd2};
76; CHECK-NEXT:    st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
77; CHECK-NEXT:    ret;
78  %1 = load i64, ptr %in, align 8
79  %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1
80  %2 = load i64, ptr %arrayidx.1, align 8
81  %arrayidx.2 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 2
82  %3 = load i64, ptr %arrayidx.2, align 8
83  %arrayidx.3 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 3
84  %4 = load i64, ptr %arrayidx.3, align 8
85  %5 = insertvalue [4 x i64] poison, i64 %1, 0
86  %6 = insertvalue [4 x i64] %5, i64 %2, 1
87  %7 = insertvalue [4 x i64] %6, i64 %3, 2
88  %oldret = insertvalue [4 x i64] %7, i64 %4, 3
89  ret [4 x i64] %oldret
90}
91
92define void @call_void() {
93; CHECK-LABEL: call_void(
94; CHECK:       {
95; CHECK-EMPTY:
96; CHECK-EMPTY:
97; CHECK-NEXT:  // %bb.0:
98; CHECK-NEXT:    ret;
99  ret void
100}
101