1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mattr=+simd128 | FileCheck %s 3 4;; Test that SIMD extending operations can be successfully selected 5 6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7target triple = "wasm32-unknown-unknown" 8 9define <8 x i16> @extend_low_i8x16_s(<16 x i8> %v) { 10; CHECK-LABEL: extend_low_i8x16_s: 11; CHECK: .functype extend_low_i8x16_s (v128) -> (v128) 12; CHECK-NEXT: # %bb.0: 13; CHECK-NEXT: local.get 0 14; CHECK-NEXT: i16x8.extend_low_i8x16_s 15; CHECK-NEXT: # fallthrough-return 16 %low = shufflevector <16 x i8> %v, <16 x i8> undef, 17 <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18 %extended = sext <8 x i8> %low to <8 x i16> 19 ret <8 x i16> %extended 20} 21 22define <8 x i16> @extend_low_i8x16_u(<16 x i8> %v) { 23; CHECK-LABEL: extend_low_i8x16_u: 24; CHECK: .functype extend_low_i8x16_u (v128) -> (v128) 25; CHECK-NEXT: # %bb.0: 26; CHECK-NEXT: local.get 0 27; CHECK-NEXT: i16x8.extend_low_i8x16_u 28; CHECK-NEXT: # fallthrough-return 29 %low = shufflevector <16 x i8> %v, <16 x i8> undef, 30 <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 31 %extended = zext <8 x i8> %low to <8 x i16> 32 ret <8 x i16> %extended 33} 34 35define <8 x i16> @extend_high_i8x16_s(<16 x i8> %v) { 36; CHECK-LABEL: extend_high_i8x16_s: 37; CHECK: .functype extend_high_i8x16_s (v128) -> (v128) 38; CHECK-NEXT: # %bb.0: 39; CHECK-NEXT: local.get 0 40; CHECK-NEXT: i16x8.extend_high_i8x16_s 41; CHECK-NEXT: # fallthrough-return 42 %low = shufflevector <16 x i8> %v, <16 x i8> undef, 43 <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 44 %extended = sext <8 x i8> %low to <8 x i16> 45 ret <8 x i16> %extended 46} 47 48define <8 x i16> @extend_high_i8x16_u(<16 x i8> %v) { 49; CHECK-LABEL: extend_high_i8x16_u: 50; CHECK: .functype extend_high_i8x16_u (v128) -> (v128) 51; CHECK-NEXT: # %bb.0: 52; CHECK-NEXT: local.get 0 53; CHECK-NEXT: i16x8.extend_high_i8x16_u 54; CHECK-NEXT: # fallthrough-return 55 %low = shufflevector <16 x i8> %v, <16 x i8> undef, 56 <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 57 %extended = zext <8 x i8> %low to <8 x i16> 58 ret <8 x i16> %extended 59} 60 61define <4 x i32> @extend_low_i16x8_s(<8 x i16> %v) { 62; CHECK-LABEL: extend_low_i16x8_s: 63; CHECK: .functype extend_low_i16x8_s (v128) -> (v128) 64; CHECK-NEXT: # %bb.0: 65; CHECK-NEXT: local.get 0 66; CHECK-NEXT: i32x4.extend_low_i16x8_s 67; CHECK-NEXT: # fallthrough-return 68 %low = shufflevector <8 x i16> %v, <8 x i16> undef, 69 <4 x i32> <i32 0, i32 1, i32 2, i32 3> 70 %extended = sext <4 x i16> %low to <4 x i32> 71 ret <4 x i32> %extended 72} 73 74define <4 x i32> @extend_low_i16x8_u(<8 x i16> %v) { 75; CHECK-LABEL: extend_low_i16x8_u: 76; CHECK: .functype extend_low_i16x8_u (v128) -> (v128) 77; CHECK-NEXT: # %bb.0: 78; CHECK-NEXT: local.get 0 79; CHECK-NEXT: i32x4.extend_low_i16x8_u 80; CHECK-NEXT: # fallthrough-return 81 %low = shufflevector <8 x i16> %v, <8 x i16> undef, 82 <4 x i32> <i32 0, i32 1, i32 2, i32 3> 83 %extended = zext <4 x i16> %low to <4 x i32> 84 ret <4 x i32> %extended 85} 86 87define <4 x i32> @extend_high_i16x8_s(<8 x i16> %v) { 88; CHECK-LABEL: extend_high_i16x8_s: 89; CHECK: .functype extend_high_i16x8_s (v128) -> (v128) 90; CHECK-NEXT: # %bb.0: 91; CHECK-NEXT: local.get 0 92; CHECK-NEXT: i32x4.extend_high_i16x8_s 93; CHECK-NEXT: # fallthrough-return 94 %low = shufflevector <8 x i16> %v, <8 x i16> undef, 95 <4 x i32> <i32 4, i32 5, i32 6, i32 7> 96 %extended = sext <4 x i16> %low to <4 x i32> 97 ret <4 x i32> %extended 98} 99 100define <4 x i32> @extend_high_i16x8_u(<8 x i16> %v) { 101; CHECK-LABEL: extend_high_i16x8_u: 102; CHECK: .functype extend_high_i16x8_u (v128) -> (v128) 103; CHECK-NEXT: # %bb.0: 104; CHECK-NEXT: local.get 0 105; CHECK-NEXT: i32x4.extend_high_i16x8_u 106; CHECK-NEXT: # fallthrough-return 107 %low = shufflevector <8 x i16> %v, <8 x i16> undef, 108 <4 x i32> <i32 4, i32 5, i32 6, i32 7> 109 %extended = zext <4 x i16> %low to <4 x i32> 110 ret <4 x i32> %extended 111} 112 113define <2 x i64> @extend_low_i32x4_s(<4 x i32> %v) { 114; CHECK-LABEL: extend_low_i32x4_s: 115; CHECK: .functype extend_low_i32x4_s (v128) -> (v128) 116; CHECK-NEXT: # %bb.0: 117; CHECK-NEXT: local.get 0 118; CHECK-NEXT: i64x2.extend_low_i32x4_s 119; CHECK-NEXT: # fallthrough-return 120 %low = shufflevector <4 x i32> %v, <4 x i32> undef, 121 <2 x i32> <i32 0, i32 1> 122 %extended = sext <2 x i32> %low to <2 x i64> 123 ret <2 x i64> %extended 124} 125 126define <2 x i64> @extend_low_i32x4_u(<4 x i32> %v) { 127; CHECK-LABEL: extend_low_i32x4_u: 128; CHECK: .functype extend_low_i32x4_u (v128) -> (v128) 129; CHECK-NEXT: # %bb.0: 130; CHECK-NEXT: local.get 0 131; CHECK-NEXT: i64x2.extend_low_i32x4_u 132; CHECK-NEXT: # fallthrough-return 133 %low = shufflevector <4 x i32> %v, <4 x i32> undef, 134 <2 x i32> <i32 0, i32 1> 135 %extended = zext <2 x i32> %low to <2 x i64> 136 ret <2 x i64> %extended 137} 138 139define <2 x i64> @extend_high_i32x4_s(<4 x i32> %v) { 140; CHECK-LABEL: extend_high_i32x4_s: 141; CHECK: .functype extend_high_i32x4_s (v128) -> (v128) 142; CHECK-NEXT: # %bb.0: 143; CHECK-NEXT: local.get 0 144; CHECK-NEXT: i64x2.extend_high_i32x4_s 145; CHECK-NEXT: # fallthrough-return 146 %low = shufflevector <4 x i32> %v, <4 x i32> undef, 147 <2 x i32> <i32 2, i32 3> 148 %extended = sext <2 x i32> %low to <2 x i64> 149 ret <2 x i64> %extended 150} 151 152define <2 x i64> @extend_high_i32x4_u(<4 x i32> %v) { 153; CHECK-LABEL: extend_high_i32x4_u: 154; CHECK: .functype extend_high_i32x4_u (v128) -> (v128) 155; CHECK-NEXT: # %bb.0: 156; CHECK-NEXT: local.get 0 157; CHECK-NEXT: i64x2.extend_high_i32x4_u 158; CHECK-NEXT: # fallthrough-return 159 %low = shufflevector <4 x i32> %v, <4 x i32> undef, 160 <2 x i32> <i32 2, i32 3> 161 %extended = zext <2 x i32> %low to <2 x i64> 162 ret <2 x i64> %extended 163} 164 165;; Also test that similar patterns with offsets not corresponding to 166;; the low or high half are correctly expanded. 167 168define <8 x i16> @extend_lowish_i8x16_s(<16 x i8> %v) { 169; CHECK-LABEL: extend_lowish_i8x16_s: 170; CHECK: .functype extend_lowish_i8x16_s (v128) -> (v128) 171; CHECK-NEXT: # %bb.0: 172; CHECK-NEXT: local.get 0 173; CHECK-NEXT: i8x16.extract_lane_u 1 174; CHECK-NEXT: i16x8.splat 175; CHECK-NEXT: local.get 0 176; CHECK-NEXT: i8x16.extract_lane_u 2 177; CHECK-NEXT: i16x8.replace_lane 1 178; CHECK-NEXT: local.get 0 179; CHECK-NEXT: i8x16.extract_lane_u 3 180; CHECK-NEXT: i16x8.replace_lane 2 181; CHECK-NEXT: local.get 0 182; CHECK-NEXT: i8x16.extract_lane_u 4 183; CHECK-NEXT: i16x8.replace_lane 3 184; CHECK-NEXT: local.get 0 185; CHECK-NEXT: i8x16.extract_lane_u 5 186; CHECK-NEXT: i16x8.replace_lane 4 187; CHECK-NEXT: local.get 0 188; CHECK-NEXT: i8x16.extract_lane_u 6 189; CHECK-NEXT: i16x8.replace_lane 5 190; CHECK-NEXT: local.get 0 191; CHECK-NEXT: i8x16.extract_lane_u 7 192; CHECK-NEXT: i16x8.replace_lane 6 193; CHECK-NEXT: local.get 0 194; CHECK-NEXT: i8x16.extract_lane_u 8 195; CHECK-NEXT: i16x8.replace_lane 7 196; CHECK-NEXT: i32.const 8 197; CHECK-NEXT: i16x8.shl 198; CHECK-NEXT: i32.const 8 199; CHECK-NEXT: i16x8.shr_s 200; CHECK-NEXT: # fallthrough-return 201 %lowish = shufflevector <16 x i8> %v, <16 x i8> undef, 202 <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 203 %extended = sext <8 x i8> %lowish to <8 x i16> 204 ret <8 x i16> %extended 205} 206 207define <4 x i32> @extend_lowish_i16x8_s(<8 x i16> %v) { 208; CHECK-LABEL: extend_lowish_i16x8_s: 209; CHECK: .functype extend_lowish_i16x8_s (v128) -> (v128) 210; CHECK-NEXT: # %bb.0: 211; CHECK-NEXT: local.get 0 212; CHECK-NEXT: i16x8.extract_lane_u 1 213; CHECK-NEXT: i32x4.splat 214; CHECK-NEXT: local.get 0 215; CHECK-NEXT: i16x8.extract_lane_u 2 216; CHECK-NEXT: i32x4.replace_lane 1 217; CHECK-NEXT: local.get 0 218; CHECK-NEXT: i16x8.extract_lane_u 3 219; CHECK-NEXT: i32x4.replace_lane 2 220; CHECK-NEXT: local.get 0 221; CHECK-NEXT: i16x8.extract_lane_u 4 222; CHECK-NEXT: i32x4.replace_lane 3 223; CHECK-NEXT: i32.const 16 224; CHECK-NEXT: i32x4.shl 225; CHECK-NEXT: i32.const 16 226; CHECK-NEXT: i32x4.shr_s 227; CHECK-NEXT: # fallthrough-return 228 %lowish = shufflevector <8 x i16> %v, <8 x i16> undef, 229 <4 x i32> <i32 1, i32 2, i32 3, i32 4> 230 %extended = sext <4 x i16> %lowish to <4 x i32> 231 ret <4 x i32> %extended 232} 233