1; Test stores of element-swapped vector elements. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 4 5; Test v16i8 stores. 6define void @f1(<16 x i8> %val, <16 x i8> *%ptr) { 7; CHECK-LABEL: f1: 8; CHECK: vstbrq %v24, 0(%r2) 9; CHECK: br %r14 10 %swap = shufflevector <16 x i8> %val, <16 x i8> undef, 11 <16 x i32> <i32 15, i32 14, i32 13, i32 12, 12 i32 11, i32 10, i32 9, i32 8, 13 i32 7, i32 6, i32 5, i32 4, 14 i32 3, i32 2, i32 1, i32 0> 15 store <16 x i8> %swap, <16 x i8> *%ptr 16 ret void 17} 18 19; Test v8i16 stores. 20define void @f2(<8 x i16> %val, <8 x i16> *%ptr) { 21; CHECK-LABEL: f2: 22; CHECK: vsterh %v24, 0(%r2) 23; CHECK: br %r14 24 %swap = shufflevector <8 x i16> %val, <8 x i16> undef, 25 <8 x i32> <i32 7, i32 6, i32 5, i32 4, 26 i32 3, i32 2, i32 1, i32 0> 27 store <8 x i16> %swap, <8 x i16> *%ptr 28 ret void 29} 30 31; Test v4i32 stores. 32define void @f3(<4 x i32> %val, <4 x i32> *%ptr) { 33; CHECK-LABEL: f3: 34; CHECK: vsterf %v24, 0(%r2) 35; CHECK: br %r14 36 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 37 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 38 store <4 x i32> %swap, <4 x i32> *%ptr 39 ret void 40} 41 42; Test v2i64 stores. 43define void @f4(<2 x i64> %val, <2 x i64> *%ptr) { 44; CHECK-LABEL: f4: 45; CHECK: vsterg %v24, 0(%r2) 46; CHECK: br %r14 47 %swap = shufflevector <2 x i64> %val, <2 x i64> undef, 48 <2 x i32> <i32 1, i32 0> 49 store <2 x i64> %swap, <2 x i64> *%ptr 50 ret void 51} 52 53; Test v4f32 stores. 54define void @f5(<4 x float> %val, <4 x float> *%ptr) { 55; CHECK-LABEL: f5: 56; CHECK: vsterf %v24, 0(%r2) 57; CHECK: br %r14 58 %swap = shufflevector <4 x float> %val, <4 x float> undef, 59 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 60 store <4 x float> %swap, <4 x float> *%ptr 61 ret void 62} 63 64; Test v2f64 stores. 65define void @f6(<2 x double> %val, <2 x double> *%ptr) { 66; CHECK-LABEL: f6: 67; CHECK: vsterg %v24, 0(%r2) 68; CHECK: br %r14 69 %swap = shufflevector <2 x double> %val, <2 x double> undef, 70 <2 x i32> <i32 1, i32 0> 71 store <2 x double> %swap, <2 x double> *%ptr 72 ret void 73} 74 75; Test the highest aligned in-range offset. 76define void @f7(<4 x i32> %val, <4 x i32> *%base) { 77; CHECK-LABEL: f7: 78; CHECK: vsterf %v24, 4080(%r2) 79; CHECK: br %r14 80 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255 81 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 82 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 83 store <4 x i32> %swap, <4 x i32> *%ptr 84 ret void 85} 86 87; Test the highest unaligned in-range offset. 88define void @f8(<4 x i32> %val, i8 *%base) { 89; CHECK-LABEL: f8: 90; CHECK: vsterf %v24, 4095(%r2) 91; CHECK: br %r14 92 %addr = getelementptr i8, i8 *%base, i64 4095 93 %ptr = bitcast i8 *%addr to <4 x i32> * 94 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 95 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 96 store <4 x i32> %swap, <4 x i32> *%ptr, align 1 97 ret void 98} 99 100; Test the next offset up, which requires separate address logic, 101define void @f9(<4 x i32> %val, <4 x i32> *%base) { 102; CHECK-LABEL: f9: 103; CHECK: aghi %r2, 4096 104; CHECK: vsterf %v24, 0(%r2) 105; CHECK: br %r14 106 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256 107 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 108 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 109 store <4 x i32> %swap, <4 x i32> *%ptr 110 ret void 111} 112 113; Test negative offsets, which also require separate address logic, 114define void @f10(<4 x i32> %val, <4 x i32> *%base) { 115; CHECK-LABEL: f10: 116; CHECK: aghi %r2, -16 117; CHECK: vsterf %v24, 0(%r2) 118; CHECK: br %r14 119 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1 120 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 121 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 122 store <4 x i32> %swap, <4 x i32> *%ptr 123 ret void 124} 125 126; Check that indexes are allowed. 127define void @f11(<4 x i32> %val, i8 *%base, i64 %index) { 128; CHECK-LABEL: f11: 129; CHECK: vsterf %v24, 0(%r3,%r2) 130; CHECK: br %r14 131 %addr = getelementptr i8, i8 *%base, i64 %index 132 %ptr = bitcast i8 *%addr to <4 x i32> * 133 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 134 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 135 store <4 x i32> %swap, <4 x i32> *%ptr, align 1 136 ret void 137} 138 139