1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2 3define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 4 ; CHECK: add_v4f32: 5 6 %1 = load <4 x float>* %a 7 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 8 %2 = load <4 x float>* %b 9 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 10 %3 = fadd <4 x float> %1, %2 11 ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 12 store <4 x float> %3, <4 x float>* %c 13 ; CHECK-DAG: st.w [[R3]], 0($4) 14 15 ret void 16 ; CHECK: .size add_v4f32 17} 18 19define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 20 ; CHECK: add_v2f64: 21 22 %1 = load <2 x double>* %a 23 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 24 %2 = load <2 x double>* %b 25 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 26 %3 = fadd <2 x double> %1, %2 27 ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 28 store <2 x double> %3, <2 x double>* %c 29 ; CHECK-DAG: st.d [[R3]], 0($4) 30 31 ret void 32 ; CHECK: .size add_v2f64 33} 34 35define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 36 ; CHECK: sub_v4f32: 37 38 %1 = load <4 x float>* %a 39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 40 %2 = load <4 x float>* %b 41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 42 %3 = fsub <4 x float> %1, %2 43 ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 44 store <4 x float> %3, <4 x float>* %c 45 ; CHECK-DAG: st.w [[R3]], 0($4) 46 47 ret void 48 ; CHECK: .size sub_v4f32 49} 50 51define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 52 ; CHECK: sub_v2f64: 53 54 %1 = load <2 x double>* %a 55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 56 %2 = load <2 x double>* %b 57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 58 %3 = fsub <2 x double> %1, %2 59 ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 60 store <2 x double> %3, <2 x double>* %c 61 ; CHECK-DAG: st.d [[R3]], 0($4) 62 63 ret void 64 ; CHECK: .size sub_v2f64 65} 66 67define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 68 ; CHECK: mul_v4f32: 69 70 %1 = load <4 x float>* %a 71 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 72 %2 = load <4 x float>* %b 73 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 74 %3 = fmul <4 x float> %1, %2 75 ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 76 store <4 x float> %3, <4 x float>* %c 77 ; CHECK-DAG: st.w [[R3]], 0($4) 78 79 ret void 80 ; CHECK: .size mul_v4f32 81} 82 83define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 84 ; CHECK: mul_v2f64: 85 86 %1 = load <2 x double>* %a 87 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 88 %2 = load <2 x double>* %b 89 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 90 %3 = fmul <2 x double> %1, %2 91 ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 92 store <2 x double> %3, <2 x double>* %c 93 ; CHECK-DAG: st.d [[R3]], 0($4) 94 95 ret void 96 ; CHECK: .size mul_v2f64 97} 98 99define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 100 ; CHECK: fdiv_v4f32: 101 102 %1 = load <4 x float>* %a 103 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 104 %2 = load <4 x float>* %b 105 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 106 %3 = fdiv <4 x float> %1, %2 107 ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 108 store <4 x float> %3, <4 x float>* %c 109 ; CHECK-DAG: st.w [[R3]], 0($4) 110 111 ret void 112 ; CHECK: .size fdiv_v4f32 113} 114 115define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 116 ; CHECK: fdiv_v2f64: 117 118 %1 = load <2 x double>* %a 119 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 120 %2 = load <2 x double>* %b 121 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 122 %3 = fdiv <2 x double> %1, %2 123 ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 124 store <2 x double> %3, <2 x double>* %c 125 ; CHECK-DAG: st.d [[R3]], 0($4) 126 127 ret void 128 ; CHECK: .size fdiv_v2f64 129} 130 131define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 132 ; CHECK: fabs_v4f32: 133 134 %1 = load <4 x float>* %a 135 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 136 %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1) 137 ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 138 store <4 x float> %2, <4 x float>* %c 139 ; CHECK-DAG: st.w [[R3]], 0($4) 140 141 ret void 142 ; CHECK: .size fabs_v4f32 143} 144 145define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 146 ; CHECK: fabs_v2f64: 147 148 %1 = load <2 x double>* %a 149 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 150 %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1) 151 ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]] 152 store <2 x double> %2, <2 x double>* %c 153 ; CHECK-DAG: st.d [[R3]], 0($4) 154 155 ret void 156 ; CHECK: .size fabs_v2f64 157} 158 159define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 160 ; CHECK: fsqrt_v4f32: 161 162 %1 = load <4 x float>* %a 163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 164 %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1) 165 ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]] 166 store <4 x float> %2, <4 x float>* %c 167 ; CHECK-DAG: st.w [[R3]], 0($4) 168 169 ret void 170 ; CHECK: .size fsqrt_v4f32 171} 172 173define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 174 ; CHECK: fsqrt_v2f64: 175 176 %1 = load <2 x double>* %a 177 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 178 %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1) 179 ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]] 180 store <2 x double> %2, <2 x double>* %c 181 ; CHECK-DAG: st.d [[R3]], 0($4) 182 183 ret void 184 ; CHECK: .size fsqrt_v2f64 185} 186 187declare <4 x float> @llvm.fabs.v4f32(<4 x float> %Val) 188declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val) 189declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val) 190declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val) 191