1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test SIMD loads and stores 5 6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7target triple = "wasm32-unknown-unknown" 8 9; ============================================================================== 10; 16 x i8 11; ============================================================================== 12define <16 x i8> @load_v16i8(<16 x i8>* %p) { 13; CHECK-LABEL: load_v16i8: 14; CHECK: .functype load_v16i8 (i32) -> (v128) 15; CHECK-NEXT: # %bb.0: 16; CHECK-NEXT: local.get 0 17; CHECK-NEXT: v128.load 0 18; CHECK-NEXT: # fallthrough-return 19 %v = load <16 x i8>, <16 x i8>* %p 20 ret <16 x i8> %v 21} 22 23define <16 x i8> @load_splat_v16i8(i8* %p) { 24; CHECK-LABEL: load_splat_v16i8: 25; CHECK: .functype load_splat_v16i8 (i32) -> (v128) 26; CHECK-NEXT: # %bb.0: 27; CHECK-NEXT: local.get 0 28; CHECK-NEXT: v8x16.load_splat 0 29; CHECK-NEXT: # fallthrough-return 30 %e = load i8, i8* %p 31 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 32 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 33 ret <16 x i8> %v2 34} 35 36define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { 37; CHECK-LABEL: load_v16i8_with_folded_offset: 38; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128) 39; CHECK-NEXT: # %bb.0: 40; CHECK-NEXT: local.get 0 41; CHECK-NEXT: v128.load 16 42; CHECK-NEXT: # fallthrough-return 43 %q = ptrtoint <16 x i8>* %p to i32 44 %r = add nuw i32 %q, 16 45 %s = inttoptr i32 %r to <16 x i8>* 46 %v = load <16 x i8>, <16 x i8>* %s 47 ret <16 x i8> %v 48} 49 50define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { 51; CHECK-LABEL: load_splat_v16i8_with_folded_offset: 52; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) 53; CHECK-NEXT: # %bb.0: 54; CHECK-NEXT: local.get 0 55; CHECK-NEXT: v8x16.load_splat 16 56; CHECK-NEXT: # fallthrough-return 57 %q = ptrtoint i8* %p to i32 58 %r = add nuw i32 %q, 16 59 %s = inttoptr i32 %r to i8* 60 %e = load i8, i8* %s 61 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 62 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 63 ret <16 x i8> %v2 64} 65 66define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) { 67; CHECK-LABEL: load_v16i8_with_folded_gep_offset: 68; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128) 69; CHECK-NEXT: # %bb.0: 70; CHECK-NEXT: local.get 0 71; CHECK-NEXT: v128.load 16 72; CHECK-NEXT: # fallthrough-return 73 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 74 %v = load <16 x i8>, <16 x i8>* %s 75 ret <16 x i8> %v 76} 77 78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { 79; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: 80; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) 81; CHECK-NEXT: # %bb.0: 82; CHECK-NEXT: local.get 0 83; CHECK-NEXT: v8x16.load_splat 1 84; CHECK-NEXT: # fallthrough-return 85 %s = getelementptr inbounds i8, i8* %p, i32 1 86 %e = load i8, i8* %s 87 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 88 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 89 ret <16 x i8> %v2 90} 91 92define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) { 93; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: 94; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 95; CHECK-NEXT: # %bb.0: 96; CHECK-NEXT: local.get 0 97; CHECK-NEXT: i32.const -16 98; CHECK-NEXT: i32.add 99; CHECK-NEXT: v128.load 0 100; CHECK-NEXT: # fallthrough-return 101 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 102 %v = load <16 x i8>, <16 x i8>* %s 103 ret <16 x i8> %v 104} 105 106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { 107; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: 108; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 109; CHECK-NEXT: # %bb.0: 110; CHECK-NEXT: local.get 0 111; CHECK-NEXT: i32.const -1 112; CHECK-NEXT: i32.add 113; CHECK-NEXT: v8x16.load_splat 0 114; CHECK-NEXT: # fallthrough-return 115 %s = getelementptr inbounds i8, i8* %p, i32 -1 116 %e = load i8, i8* %s 117 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 118 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 119 ret <16 x i8> %v2 120} 121 122define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { 123; CHECK-LABEL: load_v16i8_with_unfolded_offset: 124; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128) 125; CHECK-NEXT: # %bb.0: 126; CHECK-NEXT: local.get 0 127; CHECK-NEXT: i32.const 16 128; CHECK-NEXT: i32.add 129; CHECK-NEXT: v128.load 0 130; CHECK-NEXT: # fallthrough-return 131 %q = ptrtoint <16 x i8>* %p to i32 132 %r = add nsw i32 %q, 16 133 %s = inttoptr i32 %r to <16 x i8>* 134 %v = load <16 x i8>, <16 x i8>* %s 135 ret <16 x i8> %v 136} 137 138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { 139; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: 140; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128) 141; CHECK-NEXT: # %bb.0: 142; CHECK-NEXT: local.get 0 143; CHECK-NEXT: i32.const 16 144; CHECK-NEXT: i32.add 145; CHECK-NEXT: v8x16.load_splat 0 146; CHECK-NEXT: # fallthrough-return 147 %q = ptrtoint i8* %p to i32 148 %r = add nsw i32 %q, 16 149 %s = inttoptr i32 %r to i8* 150 %e = load i8, i8* %s 151 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 152 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 153 ret <16 x i8> %v2 154} 155 156define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) { 157; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: 158; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128) 159; CHECK-NEXT: # %bb.0: 160; CHECK-NEXT: local.get 0 161; CHECK-NEXT: i32.const 16 162; CHECK-NEXT: i32.add 163; CHECK-NEXT: v128.load 0 164; CHECK-NEXT: # fallthrough-return 165 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 166 %v = load <16 x i8>, <16 x i8>* %s 167 ret <16 x i8> %v 168} 169 170define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { 171; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: 172; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128) 173; CHECK-NEXT: # %bb.0: 174; CHECK-NEXT: local.get 0 175; CHECK-NEXT: i32.const 1 176; CHECK-NEXT: i32.add 177; CHECK-NEXT: v8x16.load_splat 0 178; CHECK-NEXT: # fallthrough-return 179 %s = getelementptr i8, i8* %p, i32 1 180 %e = load i8, i8* %s 181 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 182 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 183 ret <16 x i8> %v2 184} 185 186define <16 x i8> @load_v16i8_from_numeric_address() { 187; CHECK-LABEL: load_v16i8_from_numeric_address: 188; CHECK: .functype load_v16i8_from_numeric_address () -> (v128) 189; CHECK-NEXT: # %bb.0: 190; CHECK-NEXT: i32.const 0 191; CHECK-NEXT: v128.load 32 192; CHECK-NEXT: # fallthrough-return 193 %s = inttoptr i32 32 to <16 x i8>* 194 %v = load <16 x i8>, <16 x i8>* %s 195 ret <16 x i8> %v 196} 197 198define <16 x i8> @load_splat_v16i8_from_numeric_address() { 199; CHECK-LABEL: load_splat_v16i8_from_numeric_address: 200; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) 201; CHECK-NEXT: # %bb.0: 202; CHECK-NEXT: i32.const 0 203; CHECK-NEXT: v8x16.load_splat 32 204; CHECK-NEXT: # fallthrough-return 205 %s = inttoptr i32 32 to i8* 206 %e = load i8, i8* %s 207 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 208 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 209 ret <16 x i8> %v2 210} 211 212@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 213define <16 x i8> @load_v16i8_from_global_address() { 214; CHECK-LABEL: load_v16i8_from_global_address: 215; CHECK: .functype load_v16i8_from_global_address () -> (v128) 216; CHECK-NEXT: # %bb.0: 217; CHECK-NEXT: i32.const 0 218; CHECK-NEXT: v128.load gv_v16i8 219; CHECK-NEXT: # fallthrough-return 220 %v = load <16 x i8>, <16 x i8>* @gv_v16i8 221 ret <16 x i8> %v 222} 223 224@gv_i8 = global i8 42 225define <16 x i8> @load_splat_v16i8_from_global_address() { 226; CHECK-LABEL: load_splat_v16i8_from_global_address: 227; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) 228; CHECK-NEXT: # %bb.0: 229; CHECK-NEXT: i32.const 0 230; CHECK-NEXT: v8x16.load_splat gv_i8 231; CHECK-NEXT: # fallthrough-return 232 %e = load i8, i8* @gv_i8 233 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 234 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 235 ret <16 x i8> %v2 236} 237 238define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) { 239; CHECK-LABEL: store_v16i8: 240; CHECK: .functype store_v16i8 (v128, i32) -> () 241; CHECK-NEXT: # %bb.0: 242; CHECK-NEXT: local.get 1 243; CHECK-NEXT: local.get 0 244; CHECK-NEXT: v128.store 0 245; CHECK-NEXT: # fallthrough-return 246 store <16 x i8> %v , <16 x i8>* %p 247 ret void 248} 249 250define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { 251; CHECK-LABEL: store_v16i8_with_folded_offset: 252; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> () 253; CHECK-NEXT: # %bb.0: 254; CHECK-NEXT: local.get 1 255; CHECK-NEXT: local.get 0 256; CHECK-NEXT: v128.store 16 257; CHECK-NEXT: # fallthrough-return 258 %q = ptrtoint <16 x i8>* %p to i32 259 %r = add nuw i32 %q, 16 260 %s = inttoptr i32 %r to <16 x i8>* 261 store <16 x i8> %v , <16 x i8>* %s 262 ret void 263} 264 265define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 266; CHECK-LABEL: store_v16i8_with_folded_gep_offset: 267; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> () 268; CHECK-NEXT: # %bb.0: 269; CHECK-NEXT: local.get 1 270; CHECK-NEXT: local.get 0 271; CHECK-NEXT: v128.store 16 272; CHECK-NEXT: # fallthrough-return 273 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 274 store <16 x i8> %v , <16 x i8>* %s 275 ret void 276} 277 278define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) { 279; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: 280; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> () 281; CHECK-NEXT: # %bb.0: 282; CHECK-NEXT: local.get 1 283; CHECK-NEXT: i32.const -16 284; CHECK-NEXT: i32.add 285; CHECK-NEXT: local.get 0 286; CHECK-NEXT: v128.store 0 287; CHECK-NEXT: # fallthrough-return 288 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 289 store <16 x i8> %v , <16 x i8>* %s 290 ret void 291} 292 293define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) { 294; CHECK-LABEL: store_v16i8_with_unfolded_offset: 295; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> () 296; CHECK-NEXT: # %bb.0: 297; CHECK-NEXT: local.get 1 298; CHECK-NEXT: i32.const 16 299; CHECK-NEXT: i32.add 300; CHECK-NEXT: local.get 0 301; CHECK-NEXT: v128.store 0 302; CHECK-NEXT: # fallthrough-return 303 %q = ptrtoint <16 x i8>* %p to i32 304 %r = add nsw i32 %q, 16 305 %s = inttoptr i32 %r to <16 x i8>* 306 store <16 x i8> %v , <16 x i8>* %s 307 ret void 308} 309 310define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 311; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: 312; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> () 313; CHECK-NEXT: # %bb.0: 314; CHECK-NEXT: local.get 1 315; CHECK-NEXT: i32.const 16 316; CHECK-NEXT: i32.add 317; CHECK-NEXT: local.get 0 318; CHECK-NEXT: v128.store 0 319; CHECK-NEXT: # fallthrough-return 320 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 321 store <16 x i8> %v , <16 x i8>* %s 322 ret void 323} 324 325define void @store_v16i8_to_numeric_address(<16 x i8> %v) { 326; CHECK-LABEL: store_v16i8_to_numeric_address: 327; CHECK: .functype store_v16i8_to_numeric_address (v128) -> () 328; CHECK-NEXT: # %bb.0: 329; CHECK-NEXT: i32.const 0 330; CHECK-NEXT: local.get 0 331; CHECK-NEXT: v128.store 32 332; CHECK-NEXT: # fallthrough-return 333 %s = inttoptr i32 32 to <16 x i8>* 334 store <16 x i8> %v , <16 x i8>* %s 335 ret void 336} 337 338define void @store_v16i8_to_global_address(<16 x i8> %v) { 339; CHECK-LABEL: store_v16i8_to_global_address: 340; CHECK: .functype store_v16i8_to_global_address (v128) -> () 341; CHECK-NEXT: # %bb.0: 342; CHECK-NEXT: i32.const 0 343; CHECK-NEXT: local.get 0 344; CHECK-NEXT: v128.store gv_v16i8 345; CHECK-NEXT: # fallthrough-return 346 store <16 x i8> %v , <16 x i8>* @gv_v16i8 347 ret void 348} 349 350; ============================================================================== 351; 8 x i16 352; ============================================================================== 353define <8 x i16> @load_v8i16(<8 x i16>* %p) { 354; CHECK-LABEL: load_v8i16: 355; CHECK: .functype load_v8i16 (i32) -> (v128) 356; CHECK-NEXT: # %bb.0: 357; CHECK-NEXT: local.get 0 358; CHECK-NEXT: v128.load 0 359; CHECK-NEXT: # fallthrough-return 360 %v = load <8 x i16>, <8 x i16>* %p 361 ret <8 x i16> %v 362} 363 364define <8 x i16> @load_splat_v8i16(i16* %p) { 365; CHECK-LABEL: load_splat_v8i16: 366; CHECK: .functype load_splat_v8i16 (i32) -> (v128) 367; CHECK-NEXT: # %bb.0: 368; CHECK-NEXT: local.get 0 369; CHECK-NEXT: v16x8.load_splat 0 370; CHECK-NEXT: # fallthrough-return 371 %e = load i16, i16* %p 372 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 373 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 374 ret <8 x i16> %v2 375} 376 377define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) { 378; CHECK-LABEL: load_sext_v8i16: 379; CHECK: .functype load_sext_v8i16 (i32) -> (v128) 380; CHECK-NEXT: # %bb.0: 381; CHECK-NEXT: local.get 0 382; CHECK-NEXT: i16x8.load8x8_s 0 383; CHECK-NEXT: # fallthrough-return 384 %v = load <8 x i8>, <8 x i8>* %p 385 %v2 = sext <8 x i8> %v to <8 x i16> 386 ret <8 x i16> %v2 387} 388 389define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) { 390; CHECK-LABEL: load_zext_v8i16: 391; CHECK: .functype load_zext_v8i16 (i32) -> (v128) 392; CHECK-NEXT: # %bb.0: 393; CHECK-NEXT: local.get 0 394; CHECK-NEXT: i16x8.load8x8_u 0 395; CHECK-NEXT: # fallthrough-return 396 %v = load <8 x i8>, <8 x i8>* %p 397 %v2 = zext <8 x i8> %v to <8 x i16> 398 ret <8 x i16> %v2 399} 400 401define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) { 402; CHECK-LABEL: load_ext_v8i16: 403; CHECK: .functype load_ext_v8i16 (i32) -> (v128) 404; CHECK-NEXT: # %bb.0: 405; CHECK-NEXT: local.get 0 406; CHECK-NEXT: i16x8.load8x8_u 0 407; CHECK-NEXT: # fallthrough-return 408 %v = load <8 x i8>, <8 x i8>* %p 409 ret <8 x i8> %v 410} 411 412define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { 413; CHECK-LABEL: load_v8i16_with_folded_offset: 414; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128) 415; CHECK-NEXT: # %bb.0: 416; CHECK-NEXT: local.get 0 417; CHECK-NEXT: v128.load 16 418; CHECK-NEXT: # fallthrough-return 419 %q = ptrtoint <8 x i16>* %p to i32 420 %r = add nuw i32 %q, 16 421 %s = inttoptr i32 %r to <8 x i16>* 422 %v = load <8 x i16>, <8 x i16>* %s 423 ret <8 x i16> %v 424} 425 426define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { 427; CHECK-LABEL: load_splat_v8i16_with_folded_offset: 428; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) 429; CHECK-NEXT: # %bb.0: 430; CHECK-NEXT: local.get 0 431; CHECK-NEXT: v16x8.load_splat 16 432; CHECK-NEXT: # fallthrough-return 433 %q = ptrtoint i16* %p to i32 434 %r = add nuw i32 %q, 16 435 %s = inttoptr i32 %r to i16* 436 %e = load i16, i16* %s 437 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 438 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 439 ret <8 x i16> %v2 440} 441 442define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) { 443; CHECK-LABEL: load_sext_v8i16_with_folded_offset: 444; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128) 445; CHECK-NEXT: # %bb.0: 446; CHECK-NEXT: local.get 0 447; CHECK-NEXT: i16x8.load8x8_s 16 448; CHECK-NEXT: # fallthrough-return 449 %q = ptrtoint <8 x i8>* %p to i32 450 %r = add nuw i32 %q, 16 451 %s = inttoptr i32 %r to <8 x i8>* 452 %v = load <8 x i8>, <8 x i8>* %s 453 %v2 = sext <8 x i8> %v to <8 x i16> 454 ret <8 x i16> %v2 455} 456 457define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) { 458; CHECK-LABEL: load_zext_v8i16_with_folded_offset: 459; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128) 460; CHECK-NEXT: # %bb.0: 461; CHECK-NEXT: local.get 0 462; CHECK-NEXT: i16x8.load8x8_u 16 463; CHECK-NEXT: # fallthrough-return 464 %q = ptrtoint <8 x i8>* %p to i32 465 %r = add nuw i32 %q, 16 466 %s = inttoptr i32 %r to <8 x i8>* 467 %v = load <8 x i8>, <8 x i8>* %s 468 %v2 = zext <8 x i8> %v to <8 x i16> 469 ret <8 x i16> %v2 470} 471 472define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) { 473; CHECK-LABEL: load_ext_v8i16_with_folded_offset: 474; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128) 475; CHECK-NEXT: # %bb.0: 476; CHECK-NEXT: local.get 0 477; CHECK-NEXT: i16x8.load8x8_u 16 478; CHECK-NEXT: # fallthrough-return 479 %q = ptrtoint <8 x i8>* %p to i32 480 %r = add nuw i32 %q, 16 481 %s = inttoptr i32 %r to <8 x i8>* 482 %v = load <8 x i8>, <8 x i8>* %s 483 ret <8 x i8> %v 484} 485 486define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) { 487; CHECK-LABEL: load_v8i16_with_folded_gep_offset: 488; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128) 489; CHECK-NEXT: # %bb.0: 490; CHECK-NEXT: local.get 0 491; CHECK-NEXT: v128.load 16 492; CHECK-NEXT: # fallthrough-return 493 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 494 %v = load <8 x i16>, <8 x i16>* %s 495 ret <8 x i16> %v 496} 497 498define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { 499; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: 500; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) 501; CHECK-NEXT: # %bb.0: 502; CHECK-NEXT: local.get 0 503; CHECK-NEXT: v16x8.load_splat 2 504; CHECK-NEXT: # fallthrough-return 505 %s = getelementptr inbounds i16, i16* %p, i32 1 506 %e = load i16, i16* %s 507 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 508 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 509 ret <8 x i16> %v2 510} 511 512define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 513; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: 514; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128) 515; CHECK-NEXT: # %bb.0: 516; CHECK-NEXT: local.get 0 517; CHECK-NEXT: i16x8.load8x8_s 8 518; CHECK-NEXT: # fallthrough-return 519 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 520 %v = load <8 x i8>, <8 x i8>* %s 521 %v2 = sext <8 x i8> %v to <8 x i16> 522 ret <8 x i16> %v2 523} 524 525define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 526; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: 527; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128) 528; CHECK-NEXT: # %bb.0: 529; CHECK-NEXT: local.get 0 530; CHECK-NEXT: i16x8.load8x8_u 8 531; CHECK-NEXT: # fallthrough-return 532 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 533 %v = load <8 x i8>, <8 x i8>* %s 534 %v2 = zext <8 x i8> %v to <8 x i16> 535 ret <8 x i16> %v2 536} 537 538define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 539; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: 540; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128) 541; CHECK-NEXT: # %bb.0: 542; CHECK-NEXT: local.get 0 543; CHECK-NEXT: i16x8.load8x8_u 8 544; CHECK-NEXT: # fallthrough-return 545 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 546 %v = load <8 x i8>, <8 x i8>* %s 547 ret <8 x i8> %v 548} 549 550define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) { 551; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: 552; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 553; CHECK-NEXT: # %bb.0: 554; CHECK-NEXT: local.get 0 555; CHECK-NEXT: i32.const -16 556; CHECK-NEXT: i32.add 557; CHECK-NEXT: v128.load 0 558; CHECK-NEXT: # fallthrough-return 559 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 560 %v = load <8 x i16>, <8 x i16>* %s 561 ret <8 x i16> %v 562} 563 564define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { 565; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: 566; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 567; CHECK-NEXT: # %bb.0: 568; CHECK-NEXT: local.get 0 569; CHECK-NEXT: i32.const -2 570; CHECK-NEXT: i32.add 571; CHECK-NEXT: v16x8.load_splat 0 572; CHECK-NEXT: # fallthrough-return 573 %s = getelementptr inbounds i16, i16* %p, i32 -1 574 %e = load i16, i16* %s 575 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 576 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 577 ret <8 x i16> %v2 578} 579 580define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 581; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: 582; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 583; CHECK-NEXT: # %bb.0: 584; CHECK-NEXT: local.get 0 585; CHECK-NEXT: i32.const -8 586; CHECK-NEXT: i32.add 587; CHECK-NEXT: i16x8.load8x8_s 0 588; CHECK-NEXT: # fallthrough-return 589 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 590 %v = load <8 x i8>, <8 x i8>* %s 591 %v2 = sext <8 x i8> %v to <8 x i16> 592 ret <8 x i16> %v2 593} 594 595define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 596; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: 597; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 598; CHECK-NEXT: # %bb.0: 599; CHECK-NEXT: local.get 0 600; CHECK-NEXT: i32.const -8 601; CHECK-NEXT: i32.add 602; CHECK-NEXT: i16x8.load8x8_u 0 603; CHECK-NEXT: # fallthrough-return 604 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 605 %v = load <8 x i8>, <8 x i8>* %s 606 %v2 = zext <8 x i8> %v to <8 x i16> 607 ret <8 x i16> %v2 608} 609 610define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 611; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: 612; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 613; CHECK-NEXT: # %bb.0: 614; CHECK-NEXT: local.get 0 615; CHECK-NEXT: i32.const -8 616; CHECK-NEXT: i32.add 617; CHECK-NEXT: i16x8.load8x8_u 0 618; CHECK-NEXT: # fallthrough-return 619 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 620 %v = load <8 x i8>, <8 x i8>* %s 621 ret <8 x i8> %v 622} 623 624define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { 625; CHECK-LABEL: load_v8i16_with_unfolded_offset: 626; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128) 627; CHECK-NEXT: # %bb.0: 628; CHECK-NEXT: local.get 0 629; CHECK-NEXT: i32.const 16 630; CHECK-NEXT: i32.add 631; CHECK-NEXT: v128.load 0 632; CHECK-NEXT: # fallthrough-return 633 %q = ptrtoint <8 x i16>* %p to i32 634 %r = add nsw i32 %q, 16 635 %s = inttoptr i32 %r to <8 x i16>* 636 %v = load <8 x i16>, <8 x i16>* %s 637 ret <8 x i16> %v 638} 639 640define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { 641; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: 642; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128) 643; CHECK-NEXT: # %bb.0: 644; CHECK-NEXT: local.get 0 645; CHECK-NEXT: i32.const 16 646; CHECK-NEXT: i32.add 647; CHECK-NEXT: v16x8.load_splat 0 648; CHECK-NEXT: # fallthrough-return 649 %q = ptrtoint i16* %p to i32 650 %r = add nsw i32 %q, 16 651 %s = inttoptr i32 %r to i16* 652 %e = load i16, i16* %s 653 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 654 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 655 ret <8 x i16> %v2 656} 657 658define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 659; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: 660; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128) 661; CHECK-NEXT: # %bb.0: 662; CHECK-NEXT: local.get 0 663; CHECK-NEXT: i32.const 16 664; CHECK-NEXT: i32.add 665; CHECK-NEXT: i16x8.load8x8_s 0 666; CHECK-NEXT: # fallthrough-return 667 %q = ptrtoint <8 x i8>* %p to i32 668 %r = add nsw i32 %q, 16 669 %s = inttoptr i32 %r to <8 x i8>* 670 %v = load <8 x i8>, <8 x i8>* %s 671 %v2 = sext <8 x i8> %v to <8 x i16> 672 ret <8 x i16> %v2 673} 674 675define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 676; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: 677; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128) 678; CHECK-NEXT: # %bb.0: 679; CHECK-NEXT: local.get 0 680; CHECK-NEXT: i32.const 16 681; CHECK-NEXT: i32.add 682; CHECK-NEXT: i16x8.load8x8_u 0 683; CHECK-NEXT: # fallthrough-return 684 %q = ptrtoint <8 x i8>* %p to i32 685 %r = add nsw i32 %q, 16 686 %s = inttoptr i32 %r to <8 x i8>* 687 %v = load <8 x i8>, <8 x i8>* %s 688 %v2 = zext <8 x i8> %v to <8 x i16> 689 ret <8 x i16> %v2 690} 691 692define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 693; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: 694; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128) 695; CHECK-NEXT: # %bb.0: 696; CHECK-NEXT: local.get 0 697; CHECK-NEXT: i32.const 16 698; CHECK-NEXT: i32.add 699; CHECK-NEXT: i16x8.load8x8_u 0 700; CHECK-NEXT: # fallthrough-return 701 %q = ptrtoint <8 x i8>* %p to i32 702 %r = add nsw i32 %q, 16 703 %s = inttoptr i32 %r to <8 x i8>* 704 %v = load <8 x i8>, <8 x i8>* %s 705 ret <8 x i8> %v 706} 707 708define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) { 709; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: 710; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128) 711; CHECK-NEXT: # %bb.0: 712; CHECK-NEXT: local.get 0 713; CHECK-NEXT: i32.const 16 714; CHECK-NEXT: i32.add 715; CHECK-NEXT: v128.load 0 716; CHECK-NEXT: # fallthrough-return 717 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 718 %v = load <8 x i16>, <8 x i16>* %s 719 ret <8 x i16> %v 720} 721 722define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { 723; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: 724; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128) 725; CHECK-NEXT: # %bb.0: 726; CHECK-NEXT: local.get 0 727; CHECK-NEXT: i32.const 2 728; CHECK-NEXT: i32.add 729; CHECK-NEXT: v16x8.load_splat 0 730; CHECK-NEXT: # fallthrough-return 731 %s = getelementptr i16, i16* %p, i32 1 732 %e = load i16, i16* %s 733 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 734 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 735 ret <8 x i16> %v2 736} 737 738define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 739; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: 740; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 741; CHECK-NEXT: # %bb.0: 742; CHECK-NEXT: local.get 0 743; CHECK-NEXT: i32.const 8 744; CHECK-NEXT: i32.add 745; CHECK-NEXT: i16x8.load8x8_s 0 746; CHECK-NEXT: # fallthrough-return 747 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 748 %v = load <8 x i8>, <8 x i8>* %s 749 %v2 = sext <8 x i8> %v to <8 x i16> 750 ret <8 x i16> %v2 751} 752 753define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 754; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: 755; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 756; CHECK-NEXT: # %bb.0: 757; CHECK-NEXT: local.get 0 758; CHECK-NEXT: i32.const 8 759; CHECK-NEXT: i32.add 760; CHECK-NEXT: i16x8.load8x8_u 0 761; CHECK-NEXT: # fallthrough-return 762 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 763 %v = load <8 x i8>, <8 x i8>* %s 764 %v2 = zext <8 x i8> %v to <8 x i16> 765 ret <8 x i16> %v2 766} 767 768define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 769; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: 770; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 771; CHECK-NEXT: # %bb.0: 772; CHECK-NEXT: local.get 0 773; CHECK-NEXT: i32.const 8 774; CHECK-NEXT: i32.add 775; CHECK-NEXT: i16x8.load8x8_u 0 776; CHECK-NEXT: # fallthrough-return 777 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 778 %v = load <8 x i8>, <8 x i8>* %s 779 ret <8 x i8> %v 780} 781 782define <8 x i16> @load_v8i16_from_numeric_address() { 783; CHECK-LABEL: load_v8i16_from_numeric_address: 784; CHECK: .functype load_v8i16_from_numeric_address () -> (v128) 785; CHECK-NEXT: # %bb.0: 786; CHECK-NEXT: i32.const 0 787; CHECK-NEXT: v128.load 32 788; CHECK-NEXT: # fallthrough-return 789 %s = inttoptr i32 32 to <8 x i16>* 790 %v = load <8 x i16>, <8 x i16>* %s 791 ret <8 x i16> %v 792} 793 794define <8 x i16> @load_splat_v8i16_from_numeric_address() { 795; CHECK-LABEL: load_splat_v8i16_from_numeric_address: 796; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) 797; CHECK-NEXT: # %bb.0: 798; CHECK-NEXT: i32.const 0 799; CHECK-NEXT: v16x8.load_splat 32 800; CHECK-NEXT: # fallthrough-return 801 %s = inttoptr i32 32 to i16* 802 %e = load i16, i16* %s 803 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 804 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 805 ret <8 x i16> %v2 806} 807 808define <8 x i16> @load_sext_v8i16_from_numeric_address() { 809; CHECK-LABEL: load_sext_v8i16_from_numeric_address: 810; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128) 811; CHECK-NEXT: # %bb.0: 812; CHECK-NEXT: i32.const 0 813; CHECK-NEXT: i16x8.load8x8_s 32 814; CHECK-NEXT: # fallthrough-return 815 %s = inttoptr i32 32 to <8 x i8>* 816 %v = load <8 x i8>, <8 x i8>* %s 817 %v2 = sext <8 x i8> %v to <8 x i16> 818 ret <8 x i16> %v2 819} 820 821define <8 x i16> @load_zext_v8i16_from_numeric_address() { 822; CHECK-LABEL: load_zext_v8i16_from_numeric_address: 823; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128) 824; CHECK-NEXT: # %bb.0: 825; CHECK-NEXT: i32.const 0 826; CHECK-NEXT: i16x8.load8x8_u 32 827; CHECK-NEXT: # fallthrough-return 828 %s = inttoptr i32 32 to <8 x i8>* 829 %v = load <8 x i8>, <8 x i8>* %s 830 %v2 = zext <8 x i8> %v to <8 x i16> 831 ret <8 x i16> %v2 832} 833 834define <8 x i8> @load_ext_v8i16_from_numeric_address() { 835; CHECK-LABEL: load_ext_v8i16_from_numeric_address: 836; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128) 837; CHECK-NEXT: # %bb.0: 838; CHECK-NEXT: i32.const 0 839; CHECK-NEXT: i16x8.load8x8_u 32 840; CHECK-NEXT: # fallthrough-return 841 %s = inttoptr i32 32 to <8 x i8>* 842 %v = load <8 x i8>, <8 x i8>* %s 843 ret <8 x i8> %v 844} 845 846@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 847define <8 x i16> @load_v8i16_from_global_address() { 848; CHECK-LABEL: load_v8i16_from_global_address: 849; CHECK: .functype load_v8i16_from_global_address () -> (v128) 850; CHECK-NEXT: # %bb.0: 851; CHECK-NEXT: i32.const 0 852; CHECK-NEXT: v128.load gv_v8i16 853; CHECK-NEXT: # fallthrough-return 854 %v = load <8 x i16>, <8 x i16>* @gv_v8i16 855 ret <8 x i16> %v 856} 857 858@gv_i16 = global i16 42 859define <8 x i16> @load_splat_v8i16_from_global_address() { 860; CHECK-LABEL: load_splat_v8i16_from_global_address: 861; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) 862; CHECK-NEXT: # %bb.0: 863; CHECK-NEXT: i32.const 0 864; CHECK-NEXT: v16x8.load_splat gv_i16 865; CHECK-NEXT: # fallthrough-return 866 %e = load i16, i16* @gv_i16 867 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 868 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 869 ret <8 x i16> %v2 870} 871 872@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 873define <8 x i16> @load_sext_v8i16_from_global_address() { 874; CHECK-LABEL: load_sext_v8i16_from_global_address: 875; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128) 876; CHECK-NEXT: # %bb.0: 877; CHECK-NEXT: i32.const 0 878; CHECK-NEXT: i16x8.load8x8_s gv_v8i8 879; CHECK-NEXT: # fallthrough-return 880 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 881 %v2 = sext <8 x i8> %v to <8 x i16> 882 ret <8 x i16> %v2 883} 884 885define <8 x i16> @load_zext_v8i16_from_global_address() { 886; CHECK-LABEL: load_zext_v8i16_from_global_address: 887; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128) 888; CHECK-NEXT: # %bb.0: 889; CHECK-NEXT: i32.const 0 890; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 891; CHECK-NEXT: # fallthrough-return 892 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 893 %v2 = zext <8 x i8> %v to <8 x i16> 894 ret <8 x i16> %v2 895} 896 897define <8 x i8> @load_ext_v8i16_from_global_address() { 898; CHECK-LABEL: load_ext_v8i16_from_global_address: 899; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128) 900; CHECK-NEXT: # %bb.0: 901; CHECK-NEXT: i32.const 0 902; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 903; CHECK-NEXT: # fallthrough-return 904 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 905 ret <8 x i8> %v 906} 907 908 909define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) { 910; CHECK-LABEL: store_v8i16: 911; CHECK: .functype store_v8i16 (v128, i32) -> () 912; CHECK-NEXT: # %bb.0: 913; CHECK-NEXT: local.get 1 914; CHECK-NEXT: local.get 0 915; CHECK-NEXT: v128.store 0 916; CHECK-NEXT: # fallthrough-return 917 store <8 x i16> %v , <8 x i16>* %p 918 ret void 919} 920 921define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) { 922; CHECK-LABEL: store_narrowing_v8i16: 923; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> () 924; CHECK-NEXT: # %bb.0: 925; CHECK-NEXT: local.get 1 926; CHECK-NEXT: i32.const 16711935 927; CHECK-NEXT: i32x4.splat 928; CHECK-NEXT: local.get 0 929; CHECK-NEXT: v128.and 930; CHECK-NEXT: local.get 0 931; CHECK-NEXT: i8x16.narrow_i16x8_u 932; CHECK-NEXT: i64x2.extract_lane 0 933; CHECK-NEXT: i64.store 0 934; CHECK-NEXT: # fallthrough-return 935 store <8 x i8> %v, <8 x i8>* %p 936 ret void 937} 938 939define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { 940; CHECK-LABEL: store_v8i16_with_folded_offset: 941; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () 942; CHECK-NEXT: # %bb.0: 943; CHECK-NEXT: local.get 1 944; CHECK-NEXT: local.get 0 945; CHECK-NEXT: v128.store 16 946; CHECK-NEXT: # fallthrough-return 947 %q = ptrtoint <8 x i16>* %p to i32 948 %r = add nuw i32 %q, 16 949 %s = inttoptr i32 %r to <8 x i16>* 950 store <8 x i16> %v , <8 x i16>* %s 951 ret void 952} 953 954define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) { 955; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset: 956; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> () 957; CHECK-NEXT: # %bb.0: 958; CHECK-NEXT: local.get 1 959; CHECK-NEXT: i32.const 16711935 960; CHECK-NEXT: i32x4.splat 961; CHECK-NEXT: local.get 0 962; CHECK-NEXT: v128.and 963; CHECK-NEXT: local.get 0 964; CHECK-NEXT: i8x16.narrow_i16x8_u 965; CHECK-NEXT: i64x2.extract_lane 0 966; CHECK-NEXT: i64.store 16 967; CHECK-NEXT: # fallthrough-return 968 %q = ptrtoint <8 x i8>* %p to i32 969 %r = add nuw i32 %q, 16 970 %s = inttoptr i32 %r to <8 x i8>* 971 store <8 x i8> %v , <8 x i8>* %s 972 ret void 973} 974 975define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 976; CHECK-LABEL: store_v8i16_with_folded_gep_offset: 977; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () 978; CHECK-NEXT: # %bb.0: 979; CHECK-NEXT: local.get 1 980; CHECK-NEXT: local.get 0 981; CHECK-NEXT: v128.store 16 982; CHECK-NEXT: # fallthrough-return 983 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 984 store <8 x i16> %v , <8 x i16>* %s 985 ret void 986} 987 988define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { 989; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset: 990; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> () 991; CHECK-NEXT: # %bb.0: 992; CHECK-NEXT: local.get 1 993; CHECK-NEXT: i32.const 16711935 994; CHECK-NEXT: i32x4.splat 995; CHECK-NEXT: local.get 0 996; CHECK-NEXT: v128.and 997; CHECK-NEXT: local.get 0 998; CHECK-NEXT: i8x16.narrow_i16x8_u 999; CHECK-NEXT: i64x2.extract_lane 0 1000; CHECK-NEXT: i64.store 8 1001; CHECK-NEXT: # fallthrough-return 1002 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 1003 store <8 x i8> %v , <8 x i8>* %s 1004 ret void 1005} 1006 1007define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { 1008; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: 1009; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 1010; CHECK-NEXT: # %bb.0: 1011; CHECK-NEXT: local.get 1 1012; CHECK-NEXT: i32.const -16 1013; CHECK-NEXT: i32.add 1014; CHECK-NEXT: local.get 0 1015; CHECK-NEXT: v128.store 0 1016; CHECK-NEXT: # fallthrough-return 1017 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 1018 store <8 x i16> %v , <8 x i16>* %s 1019 ret void 1020} 1021 1022define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) { 1023; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset: 1024; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 1025; CHECK-NEXT: # %bb.0: 1026; CHECK-NEXT: local.get 1 1027; CHECK-NEXT: i32.const -8 1028; CHECK-NEXT: i32.add 1029; CHECK-NEXT: i32.const 16711935 1030; CHECK-NEXT: i32x4.splat 1031; CHECK-NEXT: local.get 0 1032; CHECK-NEXT: v128.and 1033; CHECK-NEXT: local.get 0 1034; CHECK-NEXT: i8x16.narrow_i16x8_u 1035; CHECK-NEXT: i64x2.extract_lane 0 1036; CHECK-NEXT: i64.store 0 1037; CHECK-NEXT: # fallthrough-return 1038 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 1039 store <8 x i8> %v , <8 x i8>* %s 1040 ret void 1041} 1042 1043define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { 1044; CHECK-LABEL: store_v8i16_with_unfolded_offset: 1045; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () 1046; CHECK-NEXT: # %bb.0: 1047; CHECK-NEXT: local.get 1 1048; CHECK-NEXT: i32.const 16 1049; CHECK-NEXT: i32.add 1050; CHECK-NEXT: local.get 0 1051; CHECK-NEXT: v128.store 0 1052; CHECK-NEXT: # fallthrough-return 1053 %q = ptrtoint <8 x i16>* %p to i32 1054 %r = add nsw i32 %q, 16 1055 %s = inttoptr i32 %r to <8 x i16>* 1056 store <8 x i16> %v , <8 x i16>* %s 1057 ret void 1058} 1059 1060define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) { 1061; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset: 1062; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> () 1063; CHECK-NEXT: # %bb.0: 1064; CHECK-NEXT: local.get 1 1065; CHECK-NEXT: i32.const 16 1066; CHECK-NEXT: i32.add 1067; CHECK-NEXT: i32.const 16711935 1068; CHECK-NEXT: i32x4.splat 1069; CHECK-NEXT: local.get 0 1070; CHECK-NEXT: v128.and 1071; CHECK-NEXT: local.get 0 1072; CHECK-NEXT: i8x16.narrow_i16x8_u 1073; CHECK-NEXT: i64x2.extract_lane 0 1074; CHECK-NEXT: i64.store 0 1075; CHECK-NEXT: # fallthrough-return 1076 %q = ptrtoint <8 x i8>* %p to i32 1077 %r = add nsw i32 %q, 16 1078 %s = inttoptr i32 %r to <8 x i8>* 1079 store <8 x i8> %v , <8 x i8>* %s 1080 ret void 1081} 1082 1083define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 1084; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: 1085; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1086; CHECK-NEXT: # %bb.0: 1087; CHECK-NEXT: local.get 1 1088; CHECK-NEXT: i32.const 16 1089; CHECK-NEXT: i32.add 1090; CHECK-NEXT: local.get 0 1091; CHECK-NEXT: v128.store 0 1092; CHECK-NEXT: # fallthrough-return 1093 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 1094 store <8 x i16> %v , <8 x i16>* %s 1095 ret void 1096} 1097 1098define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { 1099; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset: 1100; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1101; CHECK-NEXT: # %bb.0: 1102; CHECK-NEXT: local.get 1 1103; CHECK-NEXT: i32.const 8 1104; CHECK-NEXT: i32.add 1105; CHECK-NEXT: i32.const 16711935 1106; CHECK-NEXT: i32x4.splat 1107; CHECK-NEXT: local.get 0 1108; CHECK-NEXT: v128.and 1109; CHECK-NEXT: local.get 0 1110; CHECK-NEXT: i8x16.narrow_i16x8_u 1111; CHECK-NEXT: i64x2.extract_lane 0 1112; CHECK-NEXT: i64.store 0 1113; CHECK-NEXT: # fallthrough-return 1114 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 1115 store <8 x i8> %v , <8 x i8>* %s 1116 ret void 1117} 1118 1119define void @store_v8i16_to_numeric_address(<8 x i16> %v) { 1120; CHECK-LABEL: store_v8i16_to_numeric_address: 1121; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () 1122; CHECK-NEXT: # %bb.0: 1123; CHECK-NEXT: i32.const 0 1124; CHECK-NEXT: local.get 0 1125; CHECK-NEXT: v128.store 32 1126; CHECK-NEXT: # fallthrough-return 1127 %s = inttoptr i32 32 to <8 x i16>* 1128 store <8 x i16> %v , <8 x i16>* %s 1129 ret void 1130} 1131 1132define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) { 1133; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address: 1134; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> () 1135; CHECK-NEXT: # %bb.0: 1136; CHECK-NEXT: i32.const 0 1137; CHECK-NEXT: i32.const 16711935 1138; CHECK-NEXT: i32x4.splat 1139; CHECK-NEXT: local.get 0 1140; CHECK-NEXT: v128.and 1141; CHECK-NEXT: local.get 0 1142; CHECK-NEXT: i8x16.narrow_i16x8_u 1143; CHECK-NEXT: i64x2.extract_lane 0 1144; CHECK-NEXT: i64.store 32 1145; CHECK-NEXT: # fallthrough-return 1146 %s = inttoptr i32 32 to <8 x i8>* 1147 store <8 x i8> %v , <8 x i8>* %s 1148 ret void 1149} 1150 1151define void @store_v8i16_to_global_address(<8 x i16> %v) { 1152; CHECK-LABEL: store_v8i16_to_global_address: 1153; CHECK: .functype store_v8i16_to_global_address (v128) -> () 1154; CHECK-NEXT: # %bb.0: 1155; CHECK-NEXT: i32.const 0 1156; CHECK-NEXT: local.get 0 1157; CHECK-NEXT: v128.store gv_v8i16 1158; CHECK-NEXT: # fallthrough-return 1159 store <8 x i16> %v , <8 x i16>* @gv_v8i16 1160 ret void 1161} 1162 1163define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) { 1164; CHECK-LABEL: store_narrowing_v8i16_to_global_address: 1165; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> () 1166; CHECK-NEXT: # %bb.0: 1167; CHECK-NEXT: i32.const 0 1168; CHECK-NEXT: i32.const 16711935 1169; CHECK-NEXT: i32x4.splat 1170; CHECK-NEXT: local.get 0 1171; CHECK-NEXT: v128.and 1172; CHECK-NEXT: local.get 0 1173; CHECK-NEXT: i8x16.narrow_i16x8_u 1174; CHECK-NEXT: i64x2.extract_lane 0 1175; CHECK-NEXT: i64.store gv_v8i8 1176; CHECK-NEXT: # fallthrough-return 1177 store <8 x i8> %v , <8 x i8>* @gv_v8i8 1178 ret void 1179} 1180 1181; ============================================================================== 1182; 4 x i32 1183; ============================================================================== 1184define <4 x i32> @load_v4i32(<4 x i32>* %p) { 1185; CHECK-LABEL: load_v4i32: 1186; CHECK: .functype load_v4i32 (i32) -> (v128) 1187; CHECK-NEXT: # %bb.0: 1188; CHECK-NEXT: local.get 0 1189; CHECK-NEXT: v128.load 0 1190; CHECK-NEXT: # fallthrough-return 1191 %v = load <4 x i32>, <4 x i32>* %p 1192 ret <4 x i32> %v 1193} 1194 1195define <4 x i32> @load_splat_v4i32(i32* %addr) { 1196; CHECK-LABEL: load_splat_v4i32: 1197; CHECK: .functype load_splat_v4i32 (i32) -> (v128) 1198; CHECK-NEXT: # %bb.0: 1199; CHECK-NEXT: local.get 0 1200; CHECK-NEXT: v32x4.load_splat 0 1201; CHECK-NEXT: # fallthrough-return 1202 %e = load i32, i32* %addr, align 4 1203 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1204 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1205 ret <4 x i32> %v2 1206} 1207 1208define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) { 1209; CHECK-LABEL: load_sext_v4i32: 1210; CHECK: .functype load_sext_v4i32 (i32) -> (v128) 1211; CHECK-NEXT: # %bb.0: 1212; CHECK-NEXT: local.get 0 1213; CHECK-NEXT: i32x4.load16x4_s 0 1214; CHECK-NEXT: # fallthrough-return 1215 %v = load <4 x i16>, <4 x i16>* %p 1216 %v2 = sext <4 x i16> %v to <4 x i32> 1217 ret <4 x i32> %v2 1218} 1219 1220define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) { 1221; CHECK-LABEL: load_zext_v4i32: 1222; CHECK: .functype load_zext_v4i32 (i32) -> (v128) 1223; CHECK-NEXT: # %bb.0: 1224; CHECK-NEXT: local.get 0 1225; CHECK-NEXT: i32x4.load16x4_u 0 1226; CHECK-NEXT: # fallthrough-return 1227 %v = load <4 x i16>, <4 x i16>* %p 1228 %v2 = zext <4 x i16> %v to <4 x i32> 1229 ret <4 x i32> %v2 1230} 1231 1232define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) { 1233; CHECK-LABEL: load_ext_v4i32: 1234; CHECK: .functype load_ext_v4i32 (i32) -> (v128) 1235; CHECK-NEXT: # %bb.0: 1236; CHECK-NEXT: local.get 0 1237; CHECK-NEXT: i32x4.load16x4_u 0 1238; CHECK-NEXT: # fallthrough-return 1239 %v = load <4 x i16>, <4 x i16>* %p 1240 ret <4 x i16> %v 1241} 1242 1243define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { 1244; CHECK-LABEL: load_v4i32_with_folded_offset: 1245; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128) 1246; CHECK-NEXT: # %bb.0: 1247; CHECK-NEXT: local.get 0 1248; CHECK-NEXT: v128.load 16 1249; CHECK-NEXT: # fallthrough-return 1250 %q = ptrtoint <4 x i32>* %p to i32 1251 %r = add nuw i32 %q, 16 1252 %s = inttoptr i32 %r to <4 x i32>* 1253 %v = load <4 x i32>, <4 x i32>* %s 1254 ret <4 x i32> %v 1255} 1256 1257define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { 1258; CHECK-LABEL: load_splat_v4i32_with_folded_offset: 1259; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) 1260; CHECK-NEXT: # %bb.0: 1261; CHECK-NEXT: local.get 0 1262; CHECK-NEXT: v32x4.load_splat 16 1263; CHECK-NEXT: # fallthrough-return 1264 %q = ptrtoint i32* %p to i32 1265 %r = add nuw i32 %q, 16 1266 %s = inttoptr i32 %r to i32* 1267 %e = load i32, i32* %s 1268 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1269 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1270 ret <4 x i32> %v2 1271} 1272 1273define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) { 1274; CHECK-LABEL: load_sext_v4i32_with_folded_offset: 1275; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128) 1276; CHECK-NEXT: # %bb.0: 1277; CHECK-NEXT: local.get 0 1278; CHECK-NEXT: i32x4.load16x4_s 16 1279; CHECK-NEXT: # fallthrough-return 1280 %q = ptrtoint <4 x i16>* %p to i32 1281 %r = add nuw i32 %q, 16 1282 %s = inttoptr i32 %r to <4 x i16>* 1283 %v = load <4 x i16>, <4 x i16>* %s 1284 %v2 = sext <4 x i16> %v to <4 x i32> 1285 ret <4 x i32> %v2 1286} 1287 1288define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) { 1289; CHECK-LABEL: load_zext_v4i32_with_folded_offset: 1290; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128) 1291; CHECK-NEXT: # %bb.0: 1292; CHECK-NEXT: local.get 0 1293; CHECK-NEXT: i32x4.load16x4_u 16 1294; CHECK-NEXT: # fallthrough-return 1295 %q = ptrtoint <4 x i16>* %p to i32 1296 %r = add nuw i32 %q, 16 1297 %s = inttoptr i32 %r to <4 x i16>* 1298 %v = load <4 x i16>, <4 x i16>* %s 1299 %v2 = zext <4 x i16> %v to <4 x i32> 1300 ret <4 x i32> %v2 1301} 1302 1303define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) { 1304; CHECK-LABEL: load_ext_v4i32_with_folded_offset: 1305; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128) 1306; CHECK-NEXT: # %bb.0: 1307; CHECK-NEXT: local.get 0 1308; CHECK-NEXT: i32x4.load16x4_u 16 1309; CHECK-NEXT: # fallthrough-return 1310 %q = ptrtoint <4 x i16>* %p to i32 1311 %r = add nuw i32 %q, 16 1312 %s = inttoptr i32 %r to <4 x i16>* 1313 %v = load <4 x i16>, <4 x i16>* %s 1314 ret <4 x i16> %v 1315} 1316 1317define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) { 1318; CHECK-LABEL: load_v4i32_with_folded_gep_offset: 1319; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128) 1320; CHECK-NEXT: # %bb.0: 1321; CHECK-NEXT: local.get 0 1322; CHECK-NEXT: v128.load 16 1323; CHECK-NEXT: # fallthrough-return 1324 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1325 %v = load <4 x i32>, <4 x i32>* %s 1326 ret <4 x i32> %v 1327} 1328 1329define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { 1330; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: 1331; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) 1332; CHECK-NEXT: # %bb.0: 1333; CHECK-NEXT: local.get 0 1334; CHECK-NEXT: v32x4.load_splat 4 1335; CHECK-NEXT: # fallthrough-return 1336 %s = getelementptr inbounds i32, i32* %p, i32 1 1337 %e = load i32, i32* %s 1338 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1339 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1340 ret <4 x i32> %v2 1341} 1342 1343define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1344; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset: 1345; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128) 1346; CHECK-NEXT: # %bb.0: 1347; CHECK-NEXT: local.get 0 1348; CHECK-NEXT: i32x4.load16x4_s 8 1349; CHECK-NEXT: # fallthrough-return 1350 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1351 %v = load <4 x i16>, <4 x i16>* %s 1352 %v2 = sext <4 x i16> %v to <4 x i32> 1353 ret <4 x i32> %v2 1354} 1355 1356define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1357; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset: 1358; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128) 1359; CHECK-NEXT: # %bb.0: 1360; CHECK-NEXT: local.get 0 1361; CHECK-NEXT: i32x4.load16x4_u 8 1362; CHECK-NEXT: # fallthrough-return 1363 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1364 %v = load <4 x i16>, <4 x i16>* %s 1365 %v2 = zext <4 x i16> %v to <4 x i32> 1366 ret <4 x i32> %v2 1367} 1368 1369define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1370; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: 1371; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128) 1372; CHECK-NEXT: # %bb.0: 1373; CHECK-NEXT: local.get 0 1374; CHECK-NEXT: i32x4.load16x4_u 8 1375; CHECK-NEXT: # fallthrough-return 1376 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1377 %v = load <4 x i16>, <4 x i16>* %s 1378 ret <4 x i16> %v 1379} 1380 1381define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) { 1382; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: 1383; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1384; CHECK-NEXT: # %bb.0: 1385; CHECK-NEXT: local.get 0 1386; CHECK-NEXT: i32.const -16 1387; CHECK-NEXT: i32.add 1388; CHECK-NEXT: v128.load 0 1389; CHECK-NEXT: # fallthrough-return 1390 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1391 %v = load <4 x i32>, <4 x i32>* %s 1392 ret <4 x i32> %v 1393} 1394 1395define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { 1396; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: 1397; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1398; CHECK-NEXT: # %bb.0: 1399; CHECK-NEXT: local.get 0 1400; CHECK-NEXT: i32.const -4 1401; CHECK-NEXT: i32.add 1402; CHECK-NEXT: v32x4.load_splat 0 1403; CHECK-NEXT: # fallthrough-return 1404 %s = getelementptr inbounds i32, i32* %p, i32 -1 1405 %e = load i32, i32* %s 1406 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1407 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1408 ret <4 x i32> %v2 1409} 1410 1411define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1412; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset: 1413; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1414; CHECK-NEXT: # %bb.0: 1415; CHECK-NEXT: local.get 0 1416; CHECK-NEXT: i32.const -8 1417; CHECK-NEXT: i32.add 1418; CHECK-NEXT: i32x4.load16x4_s 0 1419; CHECK-NEXT: # fallthrough-return 1420 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1421 %v = load <4 x i16>, <4 x i16>* %s 1422 %v2 = sext <4 x i16> %v to <4 x i32> 1423 ret <4 x i32> %v2 1424} 1425 1426define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1427; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset: 1428; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1429; CHECK-NEXT: # %bb.0: 1430; CHECK-NEXT: local.get 0 1431; CHECK-NEXT: i32.const -8 1432; CHECK-NEXT: i32.add 1433; CHECK-NEXT: i32x4.load16x4_u 0 1434; CHECK-NEXT: # fallthrough-return 1435 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1436 %v = load <4 x i16>, <4 x i16>* %s 1437 %v2 = zext <4 x i16> %v to <4 x i32> 1438 ret <4 x i32> %v2 1439} 1440 1441define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1442; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: 1443; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1444; CHECK-NEXT: # %bb.0: 1445; CHECK-NEXT: local.get 0 1446; CHECK-NEXT: i32.const -8 1447; CHECK-NEXT: i32.add 1448; CHECK-NEXT: i32x4.load16x4_u 0 1449; CHECK-NEXT: # fallthrough-return 1450 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1451 %v = load <4 x i16>, <4 x i16>* %s 1452 ret <4 x i16> %v 1453} 1454 1455define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { 1456; CHECK-LABEL: load_v4i32_with_unfolded_offset: 1457; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128) 1458; CHECK-NEXT: # %bb.0: 1459; CHECK-NEXT: local.get 0 1460; CHECK-NEXT: i32.const 16 1461; CHECK-NEXT: i32.add 1462; CHECK-NEXT: v128.load 0 1463; CHECK-NEXT: # fallthrough-return 1464 %q = ptrtoint <4 x i32>* %p to i32 1465 %r = add nsw i32 %q, 16 1466 %s = inttoptr i32 %r to <4 x i32>* 1467 %v = load <4 x i32>, <4 x i32>* %s 1468 ret <4 x i32> %v 1469} 1470 1471define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { 1472; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: 1473; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128) 1474; CHECK-NEXT: # %bb.0: 1475; CHECK-NEXT: local.get 0 1476; CHECK-NEXT: i32.const 16 1477; CHECK-NEXT: i32.add 1478; CHECK-NEXT: v32x4.load_splat 0 1479; CHECK-NEXT: # fallthrough-return 1480 %q = ptrtoint i32* %p to i32 1481 %r = add nsw i32 %q, 16 1482 %s = inttoptr i32 %r to i32* 1483 %e = load i32, i32* %s 1484 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1485 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1486 ret <4 x i32> %v2 1487} 1488 1489define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1490; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset: 1491; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128) 1492; CHECK-NEXT: # %bb.0: 1493; CHECK-NEXT: local.get 0 1494; CHECK-NEXT: i32.const 16 1495; CHECK-NEXT: i32.add 1496; CHECK-NEXT: i32x4.load16x4_s 0 1497; CHECK-NEXT: # fallthrough-return 1498 %q = ptrtoint <4 x i16>* %p to i32 1499 %r = add nsw i32 %q, 16 1500 %s = inttoptr i32 %r to <4 x i16>* 1501 %v = load <4 x i16>, <4 x i16>* %s 1502 %v2 = sext <4 x i16> %v to <4 x i32> 1503 ret <4 x i32> %v2 1504} 1505 1506define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1507; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset: 1508; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128) 1509; CHECK-NEXT: # %bb.0: 1510; CHECK-NEXT: local.get 0 1511; CHECK-NEXT: i32.const 16 1512; CHECK-NEXT: i32.add 1513; CHECK-NEXT: i32x4.load16x4_u 0 1514; CHECK-NEXT: # fallthrough-return 1515 %q = ptrtoint <4 x i16>* %p to i32 1516 %r = add nsw i32 %q, 16 1517 %s = inttoptr i32 %r to <4 x i16>* 1518 %v = load <4 x i16>, <4 x i16>* %s 1519 %v2 = zext <4 x i16> %v to <4 x i32> 1520 ret <4 x i32> %v2 1521} 1522 1523define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1524; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: 1525; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128) 1526; CHECK-NEXT: # %bb.0: 1527; CHECK-NEXT: local.get 0 1528; CHECK-NEXT: i32.const 16 1529; CHECK-NEXT: i32.add 1530; CHECK-NEXT: i32x4.load16x4_u 0 1531; CHECK-NEXT: # fallthrough-return 1532 %q = ptrtoint <4 x i16>* %p to i32 1533 %r = add nsw i32 %q, 16 1534 %s = inttoptr i32 %r to <4 x i16>* 1535 %v = load <4 x i16>, <4 x i16>* %s 1536 ret <4 x i16> %v 1537} 1538 1539define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) { 1540; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: 1541; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1542; CHECK-NEXT: # %bb.0: 1543; CHECK-NEXT: local.get 0 1544; CHECK-NEXT: i32.const 16 1545; CHECK-NEXT: i32.add 1546; CHECK-NEXT: v128.load 0 1547; CHECK-NEXT: # fallthrough-return 1548 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1549 %v = load <4 x i32>, <4 x i32>* %s 1550 ret <4 x i32> %v 1551} 1552 1553define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { 1554; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: 1555; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1556; CHECK-NEXT: # %bb.0: 1557; CHECK-NEXT: local.get 0 1558; CHECK-NEXT: i32.const 4 1559; CHECK-NEXT: i32.add 1560; CHECK-NEXT: v32x4.load_splat 0 1561; CHECK-NEXT: # fallthrough-return 1562 %s = getelementptr i32, i32* %p, i32 1 1563 %e = load i32, i32* %s 1564 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1565 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1566 ret <4 x i32> %v2 1567} 1568 1569define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1570; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset: 1571; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1572; CHECK-NEXT: # %bb.0: 1573; CHECK-NEXT: local.get 0 1574; CHECK-NEXT: i32.const 8 1575; CHECK-NEXT: i32.add 1576; CHECK-NEXT: i32x4.load16x4_s 0 1577; CHECK-NEXT: # fallthrough-return 1578 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1579 %v = load <4 x i16>, <4 x i16>* %s 1580 %v2 = sext <4 x i16> %v to <4 x i32> 1581 ret <4 x i32> %v2 1582} 1583 1584define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1585; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset: 1586; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1587; CHECK-NEXT: # %bb.0: 1588; CHECK-NEXT: local.get 0 1589; CHECK-NEXT: i32.const 8 1590; CHECK-NEXT: i32.add 1591; CHECK-NEXT: i32x4.load16x4_u 0 1592; CHECK-NEXT: # fallthrough-return 1593 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1594 %v = load <4 x i16>, <4 x i16>* %s 1595 %v2 = zext <4 x i16> %v to <4 x i32> 1596 ret <4 x i32> %v2 1597} 1598 1599define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1600; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: 1601; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1602; CHECK-NEXT: # %bb.0: 1603; CHECK-NEXT: local.get 0 1604; CHECK-NEXT: i32.const 8 1605; CHECK-NEXT: i32.add 1606; CHECK-NEXT: i32x4.load16x4_u 0 1607; CHECK-NEXT: # fallthrough-return 1608 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1609 %v = load <4 x i16>, <4 x i16>* %s 1610 ret <4 x i16> %v 1611} 1612 1613define <4 x i32> @load_v4i32_from_numeric_address() { 1614; CHECK-LABEL: load_v4i32_from_numeric_address: 1615; CHECK: .functype load_v4i32_from_numeric_address () -> (v128) 1616; CHECK-NEXT: # %bb.0: 1617; CHECK-NEXT: i32.const 0 1618; CHECK-NEXT: v128.load 32 1619; CHECK-NEXT: # fallthrough-return 1620 %s = inttoptr i32 32 to <4 x i32>* 1621 %v = load <4 x i32>, <4 x i32>* %s 1622 ret <4 x i32> %v 1623} 1624 1625define <4 x i32> @load_splat_v4i32_from_numeric_address() { 1626; CHECK-LABEL: load_splat_v4i32_from_numeric_address: 1627; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) 1628; CHECK-NEXT: # %bb.0: 1629; CHECK-NEXT: i32.const 0 1630; CHECK-NEXT: v32x4.load_splat 32 1631; CHECK-NEXT: # fallthrough-return 1632 %s = inttoptr i32 32 to i32* 1633 %e = load i32, i32* %s 1634 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1635 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1636 ret <4 x i32> %v2 1637} 1638 1639define <4 x i32> @load_sext_v4i32_from_numeric_address() { 1640; CHECK-LABEL: load_sext_v4i32_from_numeric_address: 1641; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128) 1642; CHECK-NEXT: # %bb.0: 1643; CHECK-NEXT: i32.const 0 1644; CHECK-NEXT: i32x4.load16x4_s 32 1645; CHECK-NEXT: # fallthrough-return 1646 %s = inttoptr i32 32 to <4 x i16>* 1647 %v = load <4 x i16>, <4 x i16>* %s 1648 %v2 = sext <4 x i16> %v to <4 x i32> 1649 ret <4 x i32> %v2 1650} 1651 1652define <4 x i32> @load_zext_v4i32_from_numeric_address() { 1653; CHECK-LABEL: load_zext_v4i32_from_numeric_address: 1654; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128) 1655; CHECK-NEXT: # %bb.0: 1656; CHECK-NEXT: i32.const 0 1657; CHECK-NEXT: i32x4.load16x4_u 32 1658; CHECK-NEXT: # fallthrough-return 1659 %s = inttoptr i32 32 to <4 x i16>* 1660 %v = load <4 x i16>, <4 x i16>* %s 1661 %v2 = zext <4 x i16> %v to <4 x i32> 1662 ret <4 x i32> %v2 1663} 1664 1665define <4 x i16> @load_ext_v4i32_from_numeric_address() { 1666; CHECK-LABEL: load_ext_v4i32_from_numeric_address: 1667; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128) 1668; CHECK-NEXT: # %bb.0: 1669; CHECK-NEXT: i32.const 0 1670; CHECK-NEXT: i32x4.load16x4_u 32 1671; CHECK-NEXT: # fallthrough-return 1672 %s = inttoptr i32 32 to <4 x i16>* 1673 %v = load <4 x i16>, <4 x i16>* %s 1674 ret <4 x i16> %v 1675} 1676 1677@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42> 1678define <4 x i32> @load_v4i32_from_global_address() { 1679; CHECK-LABEL: load_v4i32_from_global_address: 1680; CHECK: .functype load_v4i32_from_global_address () -> (v128) 1681; CHECK-NEXT: # %bb.0: 1682; CHECK-NEXT: i32.const 0 1683; CHECK-NEXT: v128.load gv_v4i32 1684; CHECK-NEXT: # fallthrough-return 1685 %v = load <4 x i32>, <4 x i32>* @gv_v4i32 1686 ret <4 x i32> %v 1687} 1688 1689@gv_i32 = global i32 42 1690define <4 x i32> @load_splat_v4i32_from_global_address() { 1691; CHECK-LABEL: load_splat_v4i32_from_global_address: 1692; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) 1693; CHECK-NEXT: # %bb.0: 1694; CHECK-NEXT: i32.const 0 1695; CHECK-NEXT: v32x4.load_splat gv_i32 1696; CHECK-NEXT: # fallthrough-return 1697 %e = load i32, i32* @gv_i32 1698 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1699 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1700 ret <4 x i32> %v2 1701} 1702 1703@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42> 1704define <4 x i32> @load_sext_v4i32_from_global_address() { 1705; CHECK-LABEL: load_sext_v4i32_from_global_address: 1706; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128) 1707; CHECK-NEXT: # %bb.0: 1708; CHECK-NEXT: i32.const 0 1709; CHECK-NEXT: i32x4.load16x4_s gv_v4i16 1710; CHECK-NEXT: # fallthrough-return 1711 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1712 %v2 = sext <4 x i16> %v to <4 x i32> 1713 ret <4 x i32> %v2 1714} 1715 1716define <4 x i32> @load_zext_v4i32_from_global_address() { 1717; CHECK-LABEL: load_zext_v4i32_from_global_address: 1718; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128) 1719; CHECK-NEXT: # %bb.0: 1720; CHECK-NEXT: i32.const 0 1721; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1722; CHECK-NEXT: # fallthrough-return 1723 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1724 %v2 = zext <4 x i16> %v to <4 x i32> 1725 ret <4 x i32> %v2 1726} 1727 1728define <4 x i16> @load_ext_v4i32_from_global_address() { 1729; CHECK-LABEL: load_ext_v4i32_from_global_address: 1730; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128) 1731; CHECK-NEXT: # %bb.0: 1732; CHECK-NEXT: i32.const 0 1733; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1734; CHECK-NEXT: # fallthrough-return 1735 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1736 ret <4 x i16> %v 1737} 1738 1739define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) { 1740; CHECK-LABEL: store_v4i32: 1741; CHECK: .functype store_v4i32 (v128, i32) -> () 1742; CHECK-NEXT: # %bb.0: 1743; CHECK-NEXT: local.get 1 1744; CHECK-NEXT: local.get 0 1745; CHECK-NEXT: v128.store 0 1746; CHECK-NEXT: # fallthrough-return 1747 store <4 x i32> %v , <4 x i32>* %p 1748 ret void 1749} 1750 1751define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) { 1752; CHECK-LABEL: store_narrowing_v4i32: 1753; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> () 1754; CHECK-NEXT: # %bb.0: 1755; CHECK-NEXT: local.get 1 1756; CHECK-NEXT: i32.const 65535 1757; CHECK-NEXT: i32x4.splat 1758; CHECK-NEXT: local.get 0 1759; CHECK-NEXT: v128.and 1760; CHECK-NEXT: local.get 0 1761; CHECK-NEXT: i16x8.narrow_i32x4_u 1762; CHECK-NEXT: i64x2.extract_lane 0 1763; CHECK-NEXT: i64.store 0 1764; CHECK-NEXT: # fallthrough-return 1765 store <4 x i16> %v , <4 x i16>* %p 1766 ret void 1767} 1768 1769define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { 1770; CHECK-LABEL: store_v4i32_with_folded_offset: 1771; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () 1772; CHECK-NEXT: # %bb.0: 1773; CHECK-NEXT: local.get 1 1774; CHECK-NEXT: local.get 0 1775; CHECK-NEXT: v128.store 16 1776; CHECK-NEXT: # fallthrough-return 1777 %q = ptrtoint <4 x i32>* %p to i32 1778 %r = add nuw i32 %q, 16 1779 %s = inttoptr i32 %r to <4 x i32>* 1780 store <4 x i32> %v , <4 x i32>* %s 1781 ret void 1782} 1783 1784define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) { 1785; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset: 1786; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> () 1787; CHECK-NEXT: # %bb.0: 1788; CHECK-NEXT: local.get 1 1789; CHECK-NEXT: i32.const 65535 1790; CHECK-NEXT: i32x4.splat 1791; CHECK-NEXT: local.get 0 1792; CHECK-NEXT: v128.and 1793; CHECK-NEXT: local.get 0 1794; CHECK-NEXT: i16x8.narrow_i32x4_u 1795; CHECK-NEXT: i64x2.extract_lane 0 1796; CHECK-NEXT: i64.store 16 1797; CHECK-NEXT: # fallthrough-return 1798 %q = ptrtoint <4 x i16>* %p to i32 1799 %r = add nuw i32 %q, 16 1800 %s = inttoptr i32 %r to <4 x i16>* 1801 store <4 x i16> %v , <4 x i16>* %s 1802 ret void 1803} 1804 1805define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1806; CHECK-LABEL: store_v4i32_with_folded_gep_offset: 1807; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () 1808; CHECK-NEXT: # %bb.0: 1809; CHECK-NEXT: local.get 1 1810; CHECK-NEXT: local.get 0 1811; CHECK-NEXT: v128.store 16 1812; CHECK-NEXT: # fallthrough-return 1813 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1814 store <4 x i32> %v , <4 x i32>* %s 1815 ret void 1816} 1817 1818define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { 1819; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset: 1820; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> () 1821; CHECK-NEXT: # %bb.0: 1822; CHECK-NEXT: local.get 1 1823; CHECK-NEXT: i32.const 65535 1824; CHECK-NEXT: i32x4.splat 1825; CHECK-NEXT: local.get 0 1826; CHECK-NEXT: v128.and 1827; CHECK-NEXT: local.get 0 1828; CHECK-NEXT: i16x8.narrow_i32x4_u 1829; CHECK-NEXT: i64x2.extract_lane 0 1830; CHECK-NEXT: i64.store 8 1831; CHECK-NEXT: # fallthrough-return 1832 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1833 store <4 x i16> %v , <4 x i16>* %s 1834 ret void 1835} 1836 1837define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { 1838; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: 1839; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1840; CHECK-NEXT: # %bb.0: 1841; CHECK-NEXT: local.get 1 1842; CHECK-NEXT: i32.const -16 1843; CHECK-NEXT: i32.add 1844; CHECK-NEXT: local.get 0 1845; CHECK-NEXT: v128.store 0 1846; CHECK-NEXT: # fallthrough-return 1847 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1848 store <4 x i32> %v , <4 x i32>* %s 1849 ret void 1850} 1851 1852define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) { 1853; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset: 1854; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1855; CHECK-NEXT: # %bb.0: 1856; CHECK-NEXT: local.get 1 1857; CHECK-NEXT: i32.const -8 1858; CHECK-NEXT: i32.add 1859; CHECK-NEXT: i32.const 65535 1860; CHECK-NEXT: i32x4.splat 1861; CHECK-NEXT: local.get 0 1862; CHECK-NEXT: v128.and 1863; CHECK-NEXT: local.get 0 1864; CHECK-NEXT: i16x8.narrow_i32x4_u 1865; CHECK-NEXT: i64x2.extract_lane 0 1866; CHECK-NEXT: i64.store 0 1867; CHECK-NEXT: # fallthrough-return 1868 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1869 store <4 x i16> %v , <4 x i16>* %s 1870 ret void 1871} 1872 1873define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { 1874; CHECK-LABEL: store_v4i32_with_unfolded_offset: 1875; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () 1876; CHECK-NEXT: # %bb.0: 1877; CHECK-NEXT: local.get 1 1878; CHECK-NEXT: i32.const 16 1879; CHECK-NEXT: i32.add 1880; CHECK-NEXT: local.get 0 1881; CHECK-NEXT: v128.store 0 1882; CHECK-NEXT: # fallthrough-return 1883 %q = ptrtoint <4 x i32>* %p to i32 1884 %r = add nsw i32 %q, 16 1885 %s = inttoptr i32 %r to <4 x i32>* 1886 store <4 x i32> %v , <4 x i32>* %s 1887 ret void 1888} 1889 1890define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) { 1891; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset: 1892; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> () 1893; CHECK-NEXT: # %bb.0: 1894; CHECK-NEXT: local.get 1 1895; CHECK-NEXT: i32.const 16 1896; CHECK-NEXT: i32.add 1897; CHECK-NEXT: i32.const 65535 1898; CHECK-NEXT: i32x4.splat 1899; CHECK-NEXT: local.get 0 1900; CHECK-NEXT: v128.and 1901; CHECK-NEXT: local.get 0 1902; CHECK-NEXT: i16x8.narrow_i32x4_u 1903; CHECK-NEXT: i64x2.extract_lane 0 1904; CHECK-NEXT: i64.store 0 1905; CHECK-NEXT: # fallthrough-return 1906 %q = ptrtoint <4 x i16>* %p to i32 1907 %r = add nsw i32 %q, 16 1908 %s = inttoptr i32 %r to <4 x i16>* 1909 store <4 x i16> %v , <4 x i16>* %s 1910 ret void 1911} 1912 1913define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1914; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: 1915; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1916; CHECK-NEXT: # %bb.0: 1917; CHECK-NEXT: local.get 1 1918; CHECK-NEXT: i32.const 16 1919; CHECK-NEXT: i32.add 1920; CHECK-NEXT: local.get 0 1921; CHECK-NEXT: v128.store 0 1922; CHECK-NEXT: # fallthrough-return 1923 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1924 store <4 x i32> %v , <4 x i32>* %s 1925 ret void 1926} 1927 1928define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { 1929; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset: 1930; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1931; CHECK-NEXT: # %bb.0: 1932; CHECK-NEXT: local.get 1 1933; CHECK-NEXT: i32.const 8 1934; CHECK-NEXT: i32.add 1935; CHECK-NEXT: i32.const 65535 1936; CHECK-NEXT: i32x4.splat 1937; CHECK-NEXT: local.get 0 1938; CHECK-NEXT: v128.and 1939; CHECK-NEXT: local.get 0 1940; CHECK-NEXT: i16x8.narrow_i32x4_u 1941; CHECK-NEXT: i64x2.extract_lane 0 1942; CHECK-NEXT: i64.store 0 1943; CHECK-NEXT: # fallthrough-return 1944 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1945 store <4 x i16> %v , <4 x i16>* %s 1946 ret void 1947} 1948 1949define void @store_v4i32_to_numeric_address(<4 x i32> %v) { 1950; CHECK-LABEL: store_v4i32_to_numeric_address: 1951; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () 1952; CHECK-NEXT: # %bb.0: 1953; CHECK-NEXT: i32.const 0 1954; CHECK-NEXT: local.get 0 1955; CHECK-NEXT: v128.store 32 1956; CHECK-NEXT: # fallthrough-return 1957 %s = inttoptr i32 32 to <4 x i32>* 1958 store <4 x i32> %v , <4 x i32>* %s 1959 ret void 1960} 1961 1962define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) { 1963; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address: 1964; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> () 1965; CHECK-NEXT: # %bb.0: 1966; CHECK-NEXT: i32.const 0 1967; CHECK-NEXT: i32.const 65535 1968; CHECK-NEXT: i32x4.splat 1969; CHECK-NEXT: local.get 0 1970; CHECK-NEXT: v128.and 1971; CHECK-NEXT: local.get 0 1972; CHECK-NEXT: i16x8.narrow_i32x4_u 1973; CHECK-NEXT: i64x2.extract_lane 0 1974; CHECK-NEXT: i64.store 32 1975; CHECK-NEXT: # fallthrough-return 1976 %s = inttoptr i32 32 to <4 x i16>* 1977 store <4 x i16> %v , <4 x i16>* %s 1978 ret void 1979} 1980 1981define void @store_v4i32_to_global_address(<4 x i32> %v) { 1982; CHECK-LABEL: store_v4i32_to_global_address: 1983; CHECK: .functype store_v4i32_to_global_address (v128) -> () 1984; CHECK-NEXT: # %bb.0: 1985; CHECK-NEXT: i32.const 0 1986; CHECK-NEXT: local.get 0 1987; CHECK-NEXT: v128.store gv_v4i32 1988; CHECK-NEXT: # fallthrough-return 1989 store <4 x i32> %v , <4 x i32>* @gv_v4i32 1990 ret void 1991} 1992 1993define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) { 1994; CHECK-LABEL: store_narrowing_v4i32_to_global_address: 1995; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> () 1996; CHECK-NEXT: # %bb.0: 1997; CHECK-NEXT: i32.const 0 1998; CHECK-NEXT: i32.const 65535 1999; CHECK-NEXT: i32x4.splat 2000; CHECK-NEXT: local.get 0 2001; CHECK-NEXT: v128.and 2002; CHECK-NEXT: local.get 0 2003; CHECK-NEXT: i16x8.narrow_i32x4_u 2004; CHECK-NEXT: i64x2.extract_lane 0 2005; CHECK-NEXT: i64.store gv_v4i16 2006; CHECK-NEXT: # fallthrough-return 2007 store <4 x i16> %v , <4 x i16>* @gv_v4i16 2008 ret void 2009} 2010 2011; ============================================================================== 2012; 2 x i64 2013; ============================================================================== 2014define <2 x i64> @load_v2i64(<2 x i64>* %p) { 2015; CHECK-LABEL: load_v2i64: 2016; CHECK: .functype load_v2i64 (i32) -> (v128) 2017; CHECK-NEXT: # %bb.0: 2018; CHECK-NEXT: local.get 0 2019; CHECK-NEXT: v128.load 0 2020; CHECK-NEXT: # fallthrough-return 2021 %v = load <2 x i64>, <2 x i64>* %p 2022 ret <2 x i64> %v 2023} 2024 2025define <2 x i64> @load_splat_v2i64(i64* %p) { 2026; CHECK-LABEL: load_splat_v2i64: 2027; CHECK: .functype load_splat_v2i64 (i32) -> (v128) 2028; CHECK-NEXT: # %bb.0: 2029; CHECK-NEXT: local.get 0 2030; CHECK-NEXT: v64x2.load_splat 0 2031; CHECK-NEXT: # fallthrough-return 2032 %e = load i64, i64* %p 2033 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2034 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2035 ret <2 x i64> %v2 2036} 2037 2038define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) { 2039; CHECK-LABEL: load_sext_v2i64: 2040; CHECK: .functype load_sext_v2i64 (i32) -> (v128) 2041; CHECK-NEXT: # %bb.0: 2042; CHECK-NEXT: local.get 0 2043; CHECK-NEXT: i64x2.load32x2_s 0 2044; CHECK-NEXT: # fallthrough-return 2045 %v = load <2 x i32>, <2 x i32>* %p 2046 %v2 = sext <2 x i32> %v to <2 x i64> 2047 ret <2 x i64> %v2 2048} 2049 2050define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) { 2051; CHECK-LABEL: load_zext_v2i64: 2052; CHECK: .functype load_zext_v2i64 (i32) -> (v128) 2053; CHECK-NEXT: # %bb.0: 2054; CHECK-NEXT: local.get 0 2055; CHECK-NEXT: i64x2.load32x2_u 0 2056; CHECK-NEXT: # fallthrough-return 2057 %v = load <2 x i32>, <2 x i32>* %p 2058 %v2 = zext <2 x i32> %v to <2 x i64> 2059 ret <2 x i64> %v2 2060} 2061 2062define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) { 2063; CHECK-LABEL: load_ext_v2i64: 2064; CHECK: .functype load_ext_v2i64 (i32) -> (v128) 2065; CHECK-NEXT: # %bb.0: 2066; CHECK-NEXT: local.get 0 2067; CHECK-NEXT: i64x2.load32x2_u 0 2068; CHECK-NEXT: # fallthrough-return 2069 %v = load <2 x i32>, <2 x i32>* %p 2070 ret <2 x i32> %v 2071} 2072 2073define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { 2074; CHECK-LABEL: load_v2i64_with_folded_offset: 2075; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128) 2076; CHECK-NEXT: # %bb.0: 2077; CHECK-NEXT: local.get 0 2078; CHECK-NEXT: v128.load 16 2079; CHECK-NEXT: # fallthrough-return 2080 %q = ptrtoint <2 x i64>* %p to i32 2081 %r = add nuw i32 %q, 16 2082 %s = inttoptr i32 %r to <2 x i64>* 2083 %v = load <2 x i64>, <2 x i64>* %s 2084 ret <2 x i64> %v 2085} 2086 2087define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { 2088; CHECK-LABEL: load_splat_v2i64_with_folded_offset: 2089; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) 2090; CHECK-NEXT: # %bb.0: 2091; CHECK-NEXT: local.get 0 2092; CHECK-NEXT: v64x2.load_splat 16 2093; CHECK-NEXT: # fallthrough-return 2094 %q = ptrtoint i64* %p to i32 2095 %r = add nuw i32 %q, 16 2096 %s = inttoptr i32 %r to i64* 2097 %e = load i64, i64* %s 2098 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2099 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2100 ret <2 x i64> %v2 2101} 2102 2103define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) { 2104; CHECK-LABEL: load_sext_v2i64_with_folded_offset: 2105; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128) 2106; CHECK-NEXT: # %bb.0: 2107; CHECK-NEXT: local.get 0 2108; CHECK-NEXT: i64x2.load32x2_s 16 2109; CHECK-NEXT: # fallthrough-return 2110 %q = ptrtoint <2 x i32>* %p to i32 2111 %r = add nuw i32 %q, 16 2112 %s = inttoptr i32 %r to <2 x i32>* 2113 %v = load <2 x i32>, <2 x i32>* %s 2114 %v2 = sext <2 x i32> %v to <2 x i64> 2115 ret <2 x i64> %v2 2116} 2117 2118define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) { 2119; CHECK-LABEL: load_zext_v2i64_with_folded_offset: 2120; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128) 2121; CHECK-NEXT: # %bb.0: 2122; CHECK-NEXT: local.get 0 2123; CHECK-NEXT: i64x2.load32x2_u 16 2124; CHECK-NEXT: # fallthrough-return 2125 %q = ptrtoint <2 x i32>* %p to i32 2126 %r = add nuw i32 %q, 16 2127 %s = inttoptr i32 %r to <2 x i32>* 2128 %v = load <2 x i32>, <2 x i32>* %s 2129 %v2 = zext <2 x i32> %v to <2 x i64> 2130 ret <2 x i64> %v2 2131} 2132 2133define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) { 2134; CHECK-LABEL: load_ext_v2i64_with_folded_offset: 2135; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128) 2136; CHECK-NEXT: # %bb.0: 2137; CHECK-NEXT: local.get 0 2138; CHECK-NEXT: i64x2.load32x2_u 16 2139; CHECK-NEXT: # fallthrough-return 2140 %q = ptrtoint <2 x i32>* %p to i32 2141 %r = add nuw i32 %q, 16 2142 %s = inttoptr i32 %r to <2 x i32>* 2143 %v = load <2 x i32>, <2 x i32>* %s 2144 ret <2 x i32> %v 2145} 2146 2147define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) { 2148; CHECK-LABEL: load_v2i64_with_folded_gep_offset: 2149; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128) 2150; CHECK-NEXT: # %bb.0: 2151; CHECK-NEXT: local.get 0 2152; CHECK-NEXT: v128.load 16 2153; CHECK-NEXT: # fallthrough-return 2154 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2155 %v = load <2 x i64>, <2 x i64>* %s 2156 ret <2 x i64> %v 2157} 2158 2159define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { 2160; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: 2161; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) 2162; CHECK-NEXT: # %bb.0: 2163; CHECK-NEXT: local.get 0 2164; CHECK-NEXT: v64x2.load_splat 8 2165; CHECK-NEXT: # fallthrough-return 2166 %s = getelementptr inbounds i64, i64* %p, i32 1 2167 %e = load i64, i64* %s 2168 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2169 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2170 ret <2 x i64> %v2 2171} 2172 2173define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2174; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: 2175; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128) 2176; CHECK-NEXT: # %bb.0: 2177; CHECK-NEXT: local.get 0 2178; CHECK-NEXT: i64x2.load32x2_s 8 2179; CHECK-NEXT: # fallthrough-return 2180 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2181 %v = load <2 x i32>, <2 x i32>* %s 2182 %v2 = sext <2 x i32> %v to <2 x i64> 2183 ret <2 x i64> %v2 2184} 2185 2186define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2187; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: 2188; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128) 2189; CHECK-NEXT: # %bb.0: 2190; CHECK-NEXT: local.get 0 2191; CHECK-NEXT: i64x2.load32x2_u 8 2192; CHECK-NEXT: # fallthrough-return 2193 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2194 %v = load <2 x i32>, <2 x i32>* %s 2195 %v2 = zext <2 x i32> %v to <2 x i64> 2196 ret <2 x i64> %v2 2197} 2198 2199define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2200; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: 2201; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128) 2202; CHECK-NEXT: # %bb.0: 2203; CHECK-NEXT: local.get 0 2204; CHECK-NEXT: i64x2.load32x2_u 8 2205; CHECK-NEXT: # fallthrough-return 2206 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2207 %v = load <2 x i32>, <2 x i32>* %s 2208 ret <2 x i32> %v 2209} 2210 2211define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) { 2212; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: 2213; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2214; CHECK-NEXT: # %bb.0: 2215; CHECK-NEXT: local.get 0 2216; CHECK-NEXT: i32.const -16 2217; CHECK-NEXT: i32.add 2218; CHECK-NEXT: v128.load 0 2219; CHECK-NEXT: # fallthrough-return 2220 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2221 %v = load <2 x i64>, <2 x i64>* %s 2222 ret <2 x i64> %v 2223} 2224 2225define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { 2226; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: 2227; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2228; CHECK-NEXT: # %bb.0: 2229; CHECK-NEXT: local.get 0 2230; CHECK-NEXT: i32.const -8 2231; CHECK-NEXT: i32.add 2232; CHECK-NEXT: v64x2.load_splat 0 2233; CHECK-NEXT: # fallthrough-return 2234 %s = getelementptr inbounds i64, i64* %p, i32 -1 2235 %e = load i64, i64* %s 2236 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2237 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2238 ret <2 x i64> %v2 2239} 2240 2241define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2242; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: 2243; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2244; CHECK-NEXT: # %bb.0: 2245; CHECK-NEXT: local.get 0 2246; CHECK-NEXT: i32.const -8 2247; CHECK-NEXT: i32.add 2248; CHECK-NEXT: i64x2.load32x2_s 0 2249; CHECK-NEXT: # fallthrough-return 2250 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2251 %v = load <2 x i32>, <2 x i32>* %s 2252 %v2 = sext <2 x i32> %v to <2 x i64> 2253 ret <2 x i64> %v2 2254} 2255 2256define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2257; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: 2258; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2259; CHECK-NEXT: # %bb.0: 2260; CHECK-NEXT: local.get 0 2261; CHECK-NEXT: i32.const -8 2262; CHECK-NEXT: i32.add 2263; CHECK-NEXT: i64x2.load32x2_u 0 2264; CHECK-NEXT: # fallthrough-return 2265 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2266 %v = load <2 x i32>, <2 x i32>* %s 2267 %v2 = zext <2 x i32> %v to <2 x i64> 2268 ret <2 x i64> %v2 2269} 2270 2271define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2272; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: 2273; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2274; CHECK-NEXT: # %bb.0: 2275; CHECK-NEXT: local.get 0 2276; CHECK-NEXT: i32.const -8 2277; CHECK-NEXT: i32.add 2278; CHECK-NEXT: i64x2.load32x2_u 0 2279; CHECK-NEXT: # fallthrough-return 2280 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2281 %v = load <2 x i32>, <2 x i32>* %s 2282 ret <2 x i32> %v 2283} 2284 2285define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { 2286; CHECK-LABEL: load_v2i64_with_unfolded_offset: 2287; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128) 2288; CHECK-NEXT: # %bb.0: 2289; CHECK-NEXT: local.get 0 2290; CHECK-NEXT: i32.const 16 2291; CHECK-NEXT: i32.add 2292; CHECK-NEXT: v128.load 0 2293; CHECK-NEXT: # fallthrough-return 2294 %q = ptrtoint <2 x i64>* %p to i32 2295 %r = add nsw i32 %q, 16 2296 %s = inttoptr i32 %r to <2 x i64>* 2297 %v = load <2 x i64>, <2 x i64>* %s 2298 ret <2 x i64> %v 2299} 2300 2301define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { 2302; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: 2303; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128) 2304; CHECK-NEXT: # %bb.0: 2305; CHECK-NEXT: local.get 0 2306; CHECK-NEXT: i32.const 16 2307; CHECK-NEXT: i32.add 2308; CHECK-NEXT: v64x2.load_splat 0 2309; CHECK-NEXT: # fallthrough-return 2310 %q = ptrtoint i64* %p to i32 2311 %r = add nsw i32 %q, 16 2312 %s = inttoptr i32 %r to i64* 2313 %e = load i64, i64* %s 2314 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2315 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2316 ret <2 x i64> %v2 2317} 2318 2319define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2320; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: 2321; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128) 2322; CHECK-NEXT: # %bb.0: 2323; CHECK-NEXT: local.get 0 2324; CHECK-NEXT: i32.const 16 2325; CHECK-NEXT: i32.add 2326; CHECK-NEXT: i64x2.load32x2_s 0 2327; CHECK-NEXT: # fallthrough-return 2328 %q = ptrtoint <2 x i32>* %p to i32 2329 %r = add nsw i32 %q, 16 2330 %s = inttoptr i32 %r to <2 x i32>* 2331 %v = load <2 x i32>, <2 x i32>* %s 2332 %v2 = sext <2 x i32> %v to <2 x i64> 2333 ret <2 x i64> %v2 2334} 2335 2336define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2337; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: 2338; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128) 2339; CHECK-NEXT: # %bb.0: 2340; CHECK-NEXT: local.get 0 2341; CHECK-NEXT: i32.const 16 2342; CHECK-NEXT: i32.add 2343; CHECK-NEXT: i64x2.load32x2_u 0 2344; CHECK-NEXT: # fallthrough-return 2345 %q = ptrtoint <2 x i32>* %p to i32 2346 %r = add nsw i32 %q, 16 2347 %s = inttoptr i32 %r to <2 x i32>* 2348 %v = load <2 x i32>, <2 x i32>* %s 2349 %v2 = zext <2 x i32> %v to <2 x i64> 2350 ret <2 x i64> %v2 2351} 2352 2353define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2354; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: 2355; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128) 2356; CHECK-NEXT: # %bb.0: 2357; CHECK-NEXT: local.get 0 2358; CHECK-NEXT: i32.const 16 2359; CHECK-NEXT: i32.add 2360; CHECK-NEXT: i64x2.load32x2_u 0 2361; CHECK-NEXT: # fallthrough-return 2362 %q = ptrtoint <2 x i32>* %p to i32 2363 %r = add nsw i32 %q, 16 2364 %s = inttoptr i32 %r to <2 x i32>* 2365 %v = load <2 x i32>, <2 x i32>* %s 2366 ret <2 x i32> %v 2367} 2368 2369define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) { 2370; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: 2371; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2372; CHECK-NEXT: # %bb.0: 2373; CHECK-NEXT: local.get 0 2374; CHECK-NEXT: i32.const 16 2375; CHECK-NEXT: i32.add 2376; CHECK-NEXT: v128.load 0 2377; CHECK-NEXT: # fallthrough-return 2378 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2379 %v = load <2 x i64>, <2 x i64>* %s 2380 ret <2 x i64> %v 2381} 2382 2383define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { 2384; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: 2385; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2386; CHECK-NEXT: # %bb.0: 2387; CHECK-NEXT: local.get 0 2388; CHECK-NEXT: i32.const 8 2389; CHECK-NEXT: i32.add 2390; CHECK-NEXT: v64x2.load_splat 0 2391; CHECK-NEXT: # fallthrough-return 2392 %s = getelementptr i64, i64* %p, i32 1 2393 %e = load i64, i64* %s 2394 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2395 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2396 ret <2 x i64> %v2 2397} 2398 2399define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2400; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: 2401; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2402; CHECK-NEXT: # %bb.0: 2403; CHECK-NEXT: local.get 0 2404; CHECK-NEXT: i32.const 8 2405; CHECK-NEXT: i32.add 2406; CHECK-NEXT: i64x2.load32x2_s 0 2407; CHECK-NEXT: # fallthrough-return 2408 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2409 %v = load <2 x i32>, <2 x i32>* %s 2410 %v2 = sext <2 x i32> %v to <2 x i64> 2411 ret <2 x i64> %v2 2412} 2413 2414define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2415; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: 2416; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2417; CHECK-NEXT: # %bb.0: 2418; CHECK-NEXT: local.get 0 2419; CHECK-NEXT: i32.const 8 2420; CHECK-NEXT: i32.add 2421; CHECK-NEXT: i64x2.load32x2_u 0 2422; CHECK-NEXT: # fallthrough-return 2423 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2424 %v = load <2 x i32>, <2 x i32>* %s 2425 %v2 = zext <2 x i32> %v to <2 x i64> 2426 ret <2 x i64> %v2 2427} 2428 2429define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2430; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: 2431; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2432; CHECK-NEXT: # %bb.0: 2433; CHECK-NEXT: local.get 0 2434; CHECK-NEXT: i32.const 8 2435; CHECK-NEXT: i32.add 2436; CHECK-NEXT: i64x2.load32x2_u 0 2437; CHECK-NEXT: # fallthrough-return 2438 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2439 %v = load <2 x i32>, <2 x i32>* %s 2440 ret <2 x i32> %v 2441} 2442 2443define <2 x i64> @load_v2i64_from_numeric_address() { 2444; CHECK-LABEL: load_v2i64_from_numeric_address: 2445; CHECK: .functype load_v2i64_from_numeric_address () -> (v128) 2446; CHECK-NEXT: # %bb.0: 2447; CHECK-NEXT: i32.const 0 2448; CHECK-NEXT: v128.load 32 2449; CHECK-NEXT: # fallthrough-return 2450 %s = inttoptr i32 32 to <2 x i64>* 2451 %v = load <2 x i64>, <2 x i64>* %s 2452 ret <2 x i64> %v 2453} 2454 2455define <2 x i64> @load_splat_v2i64_from_numeric_address() { 2456; CHECK-LABEL: load_splat_v2i64_from_numeric_address: 2457; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) 2458; CHECK-NEXT: # %bb.0: 2459; CHECK-NEXT: i32.const 0 2460; CHECK-NEXT: v64x2.load_splat 32 2461; CHECK-NEXT: # fallthrough-return 2462 %s = inttoptr i32 32 to i64* 2463 %e = load i64, i64* %s 2464 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2465 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2466 ret <2 x i64> %v2 2467} 2468 2469define <2 x i64> @load_sext_v2i64_from_numeric_address() { 2470; CHECK-LABEL: load_sext_v2i64_from_numeric_address: 2471; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128) 2472; CHECK-NEXT: # %bb.0: 2473; CHECK-NEXT: i32.const 0 2474; CHECK-NEXT: i64x2.load32x2_s 32 2475; CHECK-NEXT: # fallthrough-return 2476 %s = inttoptr i32 32 to <2 x i32>* 2477 %v = load <2 x i32>, <2 x i32>* %s 2478 %v2 = sext <2 x i32> %v to <2 x i64> 2479 ret <2 x i64> %v2 2480} 2481 2482define <2 x i64> @load_zext_v2i64_from_numeric_address() { 2483; CHECK-LABEL: load_zext_v2i64_from_numeric_address: 2484; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128) 2485; CHECK-NEXT: # %bb.0: 2486; CHECK-NEXT: i32.const 0 2487; CHECK-NEXT: i64x2.load32x2_u 32 2488; CHECK-NEXT: # fallthrough-return 2489 %s = inttoptr i32 32 to <2 x i32>* 2490 %v = load <2 x i32>, <2 x i32>* %s 2491 %v2 = zext <2 x i32> %v to <2 x i64> 2492 ret <2 x i64> %v2 2493} 2494 2495define <2 x i32> @load_ext_v2i64_from_numeric_address() { 2496; CHECK-LABEL: load_ext_v2i64_from_numeric_address: 2497; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128) 2498; CHECK-NEXT: # %bb.0: 2499; CHECK-NEXT: i32.const 0 2500; CHECK-NEXT: i64x2.load32x2_u 32 2501; CHECK-NEXT: # fallthrough-return 2502 %s = inttoptr i32 32 to <2 x i32>* 2503 %v = load <2 x i32>, <2 x i32>* %s 2504 ret <2 x i32> %v 2505} 2506 2507@gv_v2i64 = global <2 x i64> <i64 42, i64 42> 2508define <2 x i64> @load_v2i64_from_global_address() { 2509; CHECK-LABEL: load_v2i64_from_global_address: 2510; CHECK: .functype load_v2i64_from_global_address () -> (v128) 2511; CHECK-NEXT: # %bb.0: 2512; CHECK-NEXT: i32.const 0 2513; CHECK-NEXT: v128.load gv_v2i64 2514; CHECK-NEXT: # fallthrough-return 2515 %v = load <2 x i64>, <2 x i64>* @gv_v2i64 2516 ret <2 x i64> %v 2517} 2518 2519@gv_i64 = global i64 42 2520define <2 x i64> @load_splat_v2i64_from_global_address() { 2521; CHECK-LABEL: load_splat_v2i64_from_global_address: 2522; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) 2523; CHECK-NEXT: # %bb.0: 2524; CHECK-NEXT: i32.const 0 2525; CHECK-NEXT: v64x2.load_splat gv_i64 2526; CHECK-NEXT: # fallthrough-return 2527 %e = load i64, i64* @gv_i64 2528 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2529 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2530 ret <2 x i64> %v2 2531} 2532 2533@gv_v2i32 = global <2 x i32> <i32 42, i32 42> 2534define <2 x i64> @load_sext_v2i64_from_global_address() { 2535; CHECK-LABEL: load_sext_v2i64_from_global_address: 2536; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128) 2537; CHECK-NEXT: # %bb.0: 2538; CHECK-NEXT: i32.const 0 2539; CHECK-NEXT: i64x2.load32x2_s gv_v2i32 2540; CHECK-NEXT: # fallthrough-return 2541 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2542 %v2 = sext <2 x i32> %v to <2 x i64> 2543 ret <2 x i64> %v2 2544} 2545 2546define <2 x i64> @load_zext_v2i64_from_global_address() { 2547; CHECK-LABEL: load_zext_v2i64_from_global_address: 2548; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128) 2549; CHECK-NEXT: # %bb.0: 2550; CHECK-NEXT: i32.const 0 2551; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2552; CHECK-NEXT: # fallthrough-return 2553 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2554 %v2 = zext <2 x i32> %v to <2 x i64> 2555 ret <2 x i64> %v2 2556} 2557 2558define <2 x i32> @load_ext_v2i64_from_global_address() { 2559; CHECK-LABEL: load_ext_v2i64_from_global_address: 2560; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128) 2561; CHECK-NEXT: # %bb.0: 2562; CHECK-NEXT: i32.const 0 2563; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2564; CHECK-NEXT: # fallthrough-return 2565 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2566 ret <2 x i32> %v 2567} 2568 2569define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) { 2570; CHECK-LABEL: store_v2i64: 2571; CHECK: .functype store_v2i64 (v128, i32) -> () 2572; CHECK-NEXT: # %bb.0: 2573; CHECK-NEXT: local.get 1 2574; CHECK-NEXT: local.get 0 2575; CHECK-NEXT: v128.store 0 2576; CHECK-NEXT: # fallthrough-return 2577 store <2 x i64> %v , <2 x i64>* %p 2578 ret void 2579} 2580 2581define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { 2582; CHECK-LABEL: store_v2i64_with_folded_offset: 2583; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> () 2584; CHECK-NEXT: # %bb.0: 2585; CHECK-NEXT: local.get 1 2586; CHECK-NEXT: local.get 0 2587; CHECK-NEXT: v128.store 16 2588; CHECK-NEXT: # fallthrough-return 2589 %q = ptrtoint <2 x i64>* %p to i32 2590 %r = add nuw i32 %q, 16 2591 %s = inttoptr i32 %r to <2 x i64>* 2592 store <2 x i64> %v , <2 x i64>* %s 2593 ret void 2594} 2595 2596define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2597; CHECK-LABEL: store_v2i64_with_folded_gep_offset: 2598; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> () 2599; CHECK-NEXT: # %bb.0: 2600; CHECK-NEXT: local.get 1 2601; CHECK-NEXT: local.get 0 2602; CHECK-NEXT: v128.store 16 2603; CHECK-NEXT: # fallthrough-return 2604 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2605 store <2 x i64> %v , <2 x i64>* %s 2606 ret void 2607} 2608 2609define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) { 2610; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: 2611; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> () 2612; CHECK-NEXT: # %bb.0: 2613; CHECK-NEXT: local.get 1 2614; CHECK-NEXT: i32.const -16 2615; CHECK-NEXT: i32.add 2616; CHECK-NEXT: local.get 0 2617; CHECK-NEXT: v128.store 0 2618; CHECK-NEXT: # fallthrough-return 2619 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2620 store <2 x i64> %v , <2 x i64>* %s 2621 ret void 2622} 2623 2624define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) { 2625; CHECK-LABEL: store_v2i64_with_unfolded_offset: 2626; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> () 2627; CHECK-NEXT: # %bb.0: 2628; CHECK-NEXT: local.get 1 2629; CHECK-NEXT: i32.const 16 2630; CHECK-NEXT: i32.add 2631; CHECK-NEXT: local.get 0 2632; CHECK-NEXT: v128.store 0 2633; CHECK-NEXT: # fallthrough-return 2634 %q = ptrtoint <2 x i64>* %p to i32 2635 %r = add nsw i32 %q, 16 2636 %s = inttoptr i32 %r to <2 x i64>* 2637 store <2 x i64> %v , <2 x i64>* %s 2638 ret void 2639} 2640 2641define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2642; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: 2643; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> () 2644; CHECK-NEXT: # %bb.0: 2645; CHECK-NEXT: local.get 1 2646; CHECK-NEXT: i32.const 16 2647; CHECK-NEXT: i32.add 2648; CHECK-NEXT: local.get 0 2649; CHECK-NEXT: v128.store 0 2650; CHECK-NEXT: # fallthrough-return 2651 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2652 store <2 x i64> %v , <2 x i64>* %s 2653 ret void 2654} 2655 2656define void @store_v2i64_to_numeric_address(<2 x i64> %v) { 2657; CHECK-LABEL: store_v2i64_to_numeric_address: 2658; CHECK: .functype store_v2i64_to_numeric_address (v128) -> () 2659; CHECK-NEXT: # %bb.0: 2660; CHECK-NEXT: i32.const 0 2661; CHECK-NEXT: local.get 0 2662; CHECK-NEXT: v128.store 32 2663; CHECK-NEXT: # fallthrough-return 2664 %s = inttoptr i32 32 to <2 x i64>* 2665 store <2 x i64> %v , <2 x i64>* %s 2666 ret void 2667} 2668 2669define void @store_v2i64_to_global_address(<2 x i64> %v) { 2670; CHECK-LABEL: store_v2i64_to_global_address: 2671; CHECK: .functype store_v2i64_to_global_address (v128) -> () 2672; CHECK-NEXT: # %bb.0: 2673; CHECK-NEXT: i32.const 0 2674; CHECK-NEXT: local.get 0 2675; CHECK-NEXT: v128.store gv_v2i64 2676; CHECK-NEXT: # fallthrough-return 2677 store <2 x i64> %v , <2 x i64>* @gv_v2i64 2678 ret void 2679} 2680 2681; ============================================================================== 2682; 4 x float 2683; ============================================================================== 2684define <4 x float> @load_v4f32(<4 x float>* %p) { 2685; CHECK-LABEL: load_v4f32: 2686; CHECK: .functype load_v4f32 (i32) -> (v128) 2687; CHECK-NEXT: # %bb.0: 2688; CHECK-NEXT: local.get 0 2689; CHECK-NEXT: v128.load 0 2690; CHECK-NEXT: # fallthrough-return 2691 %v = load <4 x float>, <4 x float>* %p 2692 ret <4 x float> %v 2693} 2694 2695define <4 x float> @load_splat_v4f32(float* %p) { 2696; CHECK-LABEL: load_splat_v4f32: 2697; CHECK: .functype load_splat_v4f32 (i32) -> (v128) 2698; CHECK-NEXT: # %bb.0: 2699; CHECK-NEXT: local.get 0 2700; CHECK-NEXT: v32x4.load_splat 0 2701; CHECK-NEXT: # fallthrough-return 2702 %e = load float, float* %p 2703 %v1 = insertelement <4 x float> undef, float %e, i32 0 2704 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2705 ret <4 x float> %v2 2706} 2707 2708define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { 2709; CHECK-LABEL: load_v4f32_with_folded_offset: 2710; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128) 2711; CHECK-NEXT: # %bb.0: 2712; CHECK-NEXT: local.get 0 2713; CHECK-NEXT: v128.load 16 2714; CHECK-NEXT: # fallthrough-return 2715 %q = ptrtoint <4 x float>* %p to i32 2716 %r = add nuw i32 %q, 16 2717 %s = inttoptr i32 %r to <4 x float>* 2718 %v = load <4 x float>, <4 x float>* %s 2719 ret <4 x float> %v 2720} 2721 2722define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { 2723; CHECK-LABEL: load_splat_v4f32_with_folded_offset: 2724; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) 2725; CHECK-NEXT: # %bb.0: 2726; CHECK-NEXT: local.get 0 2727; CHECK-NEXT: v32x4.load_splat 16 2728; CHECK-NEXT: # fallthrough-return 2729 %q = ptrtoint float* %p to i32 2730 %r = add nuw i32 %q, 16 2731 %s = inttoptr i32 %r to float* 2732 %e = load float, float* %s 2733 %v1 = insertelement <4 x float> undef, float %e, i32 0 2734 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2735 ret <4 x float> %v2 2736} 2737 2738define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) { 2739; CHECK-LABEL: load_v4f32_with_folded_gep_offset: 2740; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128) 2741; CHECK-NEXT: # %bb.0: 2742; CHECK-NEXT: local.get 0 2743; CHECK-NEXT: v128.load 16 2744; CHECK-NEXT: # fallthrough-return 2745 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2746 %v = load <4 x float>, <4 x float>* %s 2747 ret <4 x float> %v 2748} 2749 2750define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { 2751; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: 2752; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) 2753; CHECK-NEXT: # %bb.0: 2754; CHECK-NEXT: local.get 0 2755; CHECK-NEXT: v32x4.load_splat 4 2756; CHECK-NEXT: # fallthrough-return 2757 %s = getelementptr inbounds float, float* %p, i32 1 2758 %e = load float, float* %s 2759 %v1 = insertelement <4 x float> undef, float %e, i32 0 2760 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2761 ret <4 x float> %v2 2762} 2763 2764define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) { 2765; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: 2766; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2767; CHECK-NEXT: # %bb.0: 2768; CHECK-NEXT: local.get 0 2769; CHECK-NEXT: i32.const -16 2770; CHECK-NEXT: i32.add 2771; CHECK-NEXT: v128.load 0 2772; CHECK-NEXT: # fallthrough-return 2773 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2774 %v = load <4 x float>, <4 x float>* %s 2775 ret <4 x float> %v 2776} 2777 2778define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) { 2779; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: 2780; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2781; CHECK-NEXT: # %bb.0: 2782; CHECK-NEXT: local.get 0 2783; CHECK-NEXT: i32.const -4 2784; CHECK-NEXT: i32.add 2785; CHECK-NEXT: v32x4.load_splat 0 2786; CHECK-NEXT: # fallthrough-return 2787 %s = getelementptr inbounds float, float* %p, i32 -1 2788 %e = load float, float* %s 2789 %v1 = insertelement <4 x float> undef, float %e, i32 0 2790 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2791 ret <4 x float> %v2 2792} 2793 2794define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { 2795; CHECK-LABEL: load_v4f32_with_unfolded_offset: 2796; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128) 2797; CHECK-NEXT: # %bb.0: 2798; CHECK-NEXT: local.get 0 2799; CHECK-NEXT: i32.const 16 2800; CHECK-NEXT: i32.add 2801; CHECK-NEXT: v128.load 0 2802; CHECK-NEXT: # fallthrough-return 2803 %q = ptrtoint <4 x float>* %p to i32 2804 %r = add nsw i32 %q, 16 2805 %s = inttoptr i32 %r to <4 x float>* 2806 %v = load <4 x float>, <4 x float>* %s 2807 ret <4 x float> %v 2808} 2809 2810define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { 2811; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: 2812; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128) 2813; CHECK-NEXT: # %bb.0: 2814; CHECK-NEXT: local.get 0 2815; CHECK-NEXT: i32.const 16 2816; CHECK-NEXT: i32.add 2817; CHECK-NEXT: v32x4.load_splat 0 2818; CHECK-NEXT: # fallthrough-return 2819 %q = ptrtoint float* %p to i32 2820 %r = add nsw i32 %q, 16 2821 %s = inttoptr i32 %r to float* 2822 %e = load float, float* %s 2823 %v1 = insertelement <4 x float> undef, float %e, i32 0 2824 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2825 ret <4 x float> %v2 2826} 2827 2828define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) { 2829; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: 2830; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2831; CHECK-NEXT: # %bb.0: 2832; CHECK-NEXT: local.get 0 2833; CHECK-NEXT: i32.const 16 2834; CHECK-NEXT: i32.add 2835; CHECK-NEXT: v128.load 0 2836; CHECK-NEXT: # fallthrough-return 2837 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2838 %v = load <4 x float>, <4 x float>* %s 2839 ret <4 x float> %v 2840} 2841 2842define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { 2843; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: 2844; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2845; CHECK-NEXT: # %bb.0: 2846; CHECK-NEXT: local.get 0 2847; CHECK-NEXT: i32.const 4 2848; CHECK-NEXT: i32.add 2849; CHECK-NEXT: v32x4.load_splat 0 2850; CHECK-NEXT: # fallthrough-return 2851 %s = getelementptr float, float* %p, i32 1 2852 %e = load float, float* %s 2853 %v1 = insertelement <4 x float> undef, float %e, i32 0 2854 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2855 ret <4 x float> %v2 2856} 2857 2858define <4 x float> @load_v4f32_from_numeric_address() { 2859; CHECK-LABEL: load_v4f32_from_numeric_address: 2860; CHECK: .functype load_v4f32_from_numeric_address () -> (v128) 2861; CHECK-NEXT: # %bb.0: 2862; CHECK-NEXT: i32.const 0 2863; CHECK-NEXT: v128.load 32 2864; CHECK-NEXT: # fallthrough-return 2865 %s = inttoptr i32 32 to <4 x float>* 2866 %v = load <4 x float>, <4 x float>* %s 2867 ret <4 x float> %v 2868} 2869 2870define <4 x float> @load_splat_v4f32_from_numeric_address() { 2871; CHECK-LABEL: load_splat_v4f32_from_numeric_address: 2872; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) 2873; CHECK-NEXT: # %bb.0: 2874; CHECK-NEXT: i32.const 0 2875; CHECK-NEXT: v32x4.load_splat 32 2876; CHECK-NEXT: # fallthrough-return 2877 %s = inttoptr i32 32 to float* 2878 %e = load float, float* %s 2879 %v1 = insertelement <4 x float> undef, float %e, i32 0 2880 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2881 ret <4 x float> %v2 2882} 2883 2884@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.> 2885define <4 x float> @load_v4f32_from_global_address() { 2886; CHECK-LABEL: load_v4f32_from_global_address: 2887; CHECK: .functype load_v4f32_from_global_address () -> (v128) 2888; CHECK-NEXT: # %bb.0: 2889; CHECK-NEXT: i32.const 0 2890; CHECK-NEXT: v128.load gv_v4f32 2891; CHECK-NEXT: # fallthrough-return 2892 %v = load <4 x float>, <4 x float>* @gv_v4f32 2893 ret <4 x float> %v 2894} 2895 2896@gv_f32 = global float 42. 2897define <4 x float> @load_splat_v4f32_from_global_address() { 2898; CHECK-LABEL: load_splat_v4f32_from_global_address: 2899; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) 2900; CHECK-NEXT: # %bb.0: 2901; CHECK-NEXT: i32.const 0 2902; CHECK-NEXT: v32x4.load_splat gv_f32 2903; CHECK-NEXT: # fallthrough-return 2904 %e = load float, float* @gv_f32 2905 %v1 = insertelement <4 x float> undef, float %e, i32 0 2906 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2907 ret <4 x float> %v2 2908} 2909 2910define void @store_v4f32(<4 x float> %v, <4 x float>* %p) { 2911; CHECK-LABEL: store_v4f32: 2912; CHECK: .functype store_v4f32 (v128, i32) -> () 2913; CHECK-NEXT: # %bb.0: 2914; CHECK-NEXT: local.get 1 2915; CHECK-NEXT: local.get 0 2916; CHECK-NEXT: v128.store 0 2917; CHECK-NEXT: # fallthrough-return 2918 store <4 x float> %v , <4 x float>* %p 2919 ret void 2920} 2921 2922define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { 2923; CHECK-LABEL: store_v4f32_with_folded_offset: 2924; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> () 2925; CHECK-NEXT: # %bb.0: 2926; CHECK-NEXT: local.get 1 2927; CHECK-NEXT: local.get 0 2928; CHECK-NEXT: v128.store 16 2929; CHECK-NEXT: # fallthrough-return 2930 %q = ptrtoint <4 x float>* %p to i32 2931 %r = add nuw i32 %q, 16 2932 %s = inttoptr i32 %r to <4 x float>* 2933 store <4 x float> %v , <4 x float>* %s 2934 ret void 2935} 2936 2937define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2938; CHECK-LABEL: store_v4f32_with_folded_gep_offset: 2939; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> () 2940; CHECK-NEXT: # %bb.0: 2941; CHECK-NEXT: local.get 1 2942; CHECK-NEXT: local.get 0 2943; CHECK-NEXT: v128.store 16 2944; CHECK-NEXT: # fallthrough-return 2945 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2946 store <4 x float> %v , <4 x float>* %s 2947 ret void 2948} 2949 2950define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) { 2951; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: 2952; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> () 2953; CHECK-NEXT: # %bb.0: 2954; CHECK-NEXT: local.get 1 2955; CHECK-NEXT: i32.const -16 2956; CHECK-NEXT: i32.add 2957; CHECK-NEXT: local.get 0 2958; CHECK-NEXT: v128.store 0 2959; CHECK-NEXT: # fallthrough-return 2960 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2961 store <4 x float> %v , <4 x float>* %s 2962 ret void 2963} 2964 2965define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) { 2966; CHECK-LABEL: store_v4f32_with_unfolded_offset: 2967; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> () 2968; CHECK-NEXT: # %bb.0: 2969; CHECK-NEXT: local.get 1 2970; CHECK-NEXT: i32.const 16 2971; CHECK-NEXT: i32.add 2972; CHECK-NEXT: local.get 0 2973; CHECK-NEXT: v128.store 0 2974; CHECK-NEXT: # fallthrough-return 2975 %q = ptrtoint <4 x float>* %p to i32 2976 %r = add nsw i32 %q, 16 2977 %s = inttoptr i32 %r to <4 x float>* 2978 store <4 x float> %v , <4 x float>* %s 2979 ret void 2980} 2981 2982define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2983; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: 2984; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> () 2985; CHECK-NEXT: # %bb.0: 2986; CHECK-NEXT: local.get 1 2987; CHECK-NEXT: i32.const 16 2988; CHECK-NEXT: i32.add 2989; CHECK-NEXT: local.get 0 2990; CHECK-NEXT: v128.store 0 2991; CHECK-NEXT: # fallthrough-return 2992 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2993 store <4 x float> %v , <4 x float>* %s 2994 ret void 2995} 2996 2997define void @store_v4f32_to_numeric_address(<4 x float> %v) { 2998; CHECK-LABEL: store_v4f32_to_numeric_address: 2999; CHECK: .functype store_v4f32_to_numeric_address (v128) -> () 3000; CHECK-NEXT: # %bb.0: 3001; CHECK-NEXT: i32.const 0 3002; CHECK-NEXT: local.get 0 3003; CHECK-NEXT: v128.store 32 3004; CHECK-NEXT: # fallthrough-return 3005 %s = inttoptr i32 32 to <4 x float>* 3006 store <4 x float> %v , <4 x float>* %s 3007 ret void 3008} 3009 3010define void @store_v4f32_to_global_address(<4 x float> %v) { 3011; CHECK-LABEL: store_v4f32_to_global_address: 3012; CHECK: .functype store_v4f32_to_global_address (v128) -> () 3013; CHECK-NEXT: # %bb.0: 3014; CHECK-NEXT: i32.const 0 3015; CHECK-NEXT: local.get 0 3016; CHECK-NEXT: v128.store gv_v4f32 3017; CHECK-NEXT: # fallthrough-return 3018 store <4 x float> %v , <4 x float>* @gv_v4f32 3019 ret void 3020} 3021 3022; ============================================================================== 3023; 2 x double 3024; ============================================================================== 3025define <2 x double> @load_v2f64(<2 x double>* %p) { 3026; CHECK-LABEL: load_v2f64: 3027; CHECK: .functype load_v2f64 (i32) -> (v128) 3028; CHECK-NEXT: # %bb.0: 3029; CHECK-NEXT: local.get 0 3030; CHECK-NEXT: v128.load 0 3031; CHECK-NEXT: # fallthrough-return 3032 %v = load <2 x double>, <2 x double>* %p 3033 ret <2 x double> %v 3034} 3035 3036define <2 x double> @load_splat_v2f64(double* %p) { 3037; CHECK-LABEL: load_splat_v2f64: 3038; CHECK: .functype load_splat_v2f64 (i32) -> (v128) 3039; CHECK-NEXT: # %bb.0: 3040; CHECK-NEXT: local.get 0 3041; CHECK-NEXT: v64x2.load_splat 0 3042; CHECK-NEXT: # fallthrough-return 3043 %e = load double, double* %p 3044 %v1 = insertelement <2 x double> undef, double %e, i32 0 3045 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3046 ret <2 x double> %v2 3047} 3048 3049define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { 3050; CHECK-LABEL: load_v2f64_with_folded_offset: 3051; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128) 3052; CHECK-NEXT: # %bb.0: 3053; CHECK-NEXT: local.get 0 3054; CHECK-NEXT: v128.load 16 3055; CHECK-NEXT: # fallthrough-return 3056 %q = ptrtoint <2 x double>* %p to i32 3057 %r = add nuw i32 %q, 16 3058 %s = inttoptr i32 %r to <2 x double>* 3059 %v = load <2 x double>, <2 x double>* %s 3060 ret <2 x double> %v 3061} 3062 3063define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { 3064; CHECK-LABEL: load_splat_v2f64_with_folded_offset: 3065; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) 3066; CHECK-NEXT: # %bb.0: 3067; CHECK-NEXT: local.get 0 3068; CHECK-NEXT: v64x2.load_splat 16 3069; CHECK-NEXT: # fallthrough-return 3070 %q = ptrtoint double* %p to i32 3071 %r = add nuw i32 %q, 16 3072 %s = inttoptr i32 %r to double* 3073 %e = load double, double* %s 3074 %v1 = insertelement <2 x double> undef, double %e, i32 0 3075 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3076 ret <2 x double> %v2 3077} 3078 3079define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) { 3080; CHECK-LABEL: load_v2f64_with_folded_gep_offset: 3081; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128) 3082; CHECK-NEXT: # %bb.0: 3083; CHECK-NEXT: local.get 0 3084; CHECK-NEXT: v128.load 16 3085; CHECK-NEXT: # fallthrough-return 3086 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 3087 %v = load <2 x double>, <2 x double>* %s 3088 ret <2 x double> %v 3089} 3090 3091define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { 3092; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: 3093; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) 3094; CHECK-NEXT: # %bb.0: 3095; CHECK-NEXT: local.get 0 3096; CHECK-NEXT: v64x2.load_splat 8 3097; CHECK-NEXT: # fallthrough-return 3098 %s = getelementptr inbounds double, double* %p, i32 1 3099 %e = load double, double* %s 3100 %v1 = insertelement <2 x double> undef, double %e, i32 0 3101 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3102 ret <2 x double> %v2 3103} 3104 3105define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) { 3106; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: 3107; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3108; CHECK-NEXT: # %bb.0: 3109; CHECK-NEXT: local.get 0 3110; CHECK-NEXT: i32.const -16 3111; CHECK-NEXT: i32.add 3112; CHECK-NEXT: v128.load 0 3113; CHECK-NEXT: # fallthrough-return 3114 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 3115 %v = load <2 x double>, <2 x double>* %s 3116 ret <2 x double> %v 3117} 3118 3119define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) { 3120; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: 3121; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3122; CHECK-NEXT: # %bb.0: 3123; CHECK-NEXT: local.get 0 3124; CHECK-NEXT: i32.const -8 3125; CHECK-NEXT: i32.add 3126; CHECK-NEXT: v64x2.load_splat 0 3127; CHECK-NEXT: # fallthrough-return 3128 %s = getelementptr inbounds double, double* %p, i32 -1 3129 %e = load double, double* %s 3130 %v1 = insertelement <2 x double> undef, double %e, i32 0 3131 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3132 ret <2 x double> %v2 3133} 3134 3135define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { 3136; CHECK-LABEL: load_v2f64_with_unfolded_offset: 3137; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128) 3138; CHECK-NEXT: # %bb.0: 3139; CHECK-NEXT: local.get 0 3140; CHECK-NEXT: i32.const 16 3141; CHECK-NEXT: i32.add 3142; CHECK-NEXT: v128.load 0 3143; CHECK-NEXT: # fallthrough-return 3144 %q = ptrtoint <2 x double>* %p to i32 3145 %r = add nsw i32 %q, 16 3146 %s = inttoptr i32 %r to <2 x double>* 3147 %v = load <2 x double>, <2 x double>* %s 3148 ret <2 x double> %v 3149} 3150 3151define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { 3152; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: 3153; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128) 3154; CHECK-NEXT: # %bb.0: 3155; CHECK-NEXT: local.get 0 3156; CHECK-NEXT: i32.const 16 3157; CHECK-NEXT: i32.add 3158; CHECK-NEXT: v64x2.load_splat 0 3159; CHECK-NEXT: # fallthrough-return 3160 %q = ptrtoint double* %p to i32 3161 %r = add nsw i32 %q, 16 3162 %s = inttoptr i32 %r to double* 3163 %e = load double, double* %s 3164 %v1 = insertelement <2 x double> undef, double %e, i32 0 3165 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3166 ret <2 x double> %v2 3167} 3168 3169define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) { 3170; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: 3171; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3172; CHECK-NEXT: # %bb.0: 3173; CHECK-NEXT: local.get 0 3174; CHECK-NEXT: i32.const 16 3175; CHECK-NEXT: i32.add 3176; CHECK-NEXT: v128.load 0 3177; CHECK-NEXT: # fallthrough-return 3178 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3179 %v = load <2 x double>, <2 x double>* %s 3180 ret <2 x double> %v 3181} 3182 3183define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { 3184; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: 3185; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3186; CHECK-NEXT: # %bb.0: 3187; CHECK-NEXT: local.get 0 3188; CHECK-NEXT: i32.const 8 3189; CHECK-NEXT: i32.add 3190; CHECK-NEXT: v64x2.load_splat 0 3191; CHECK-NEXT: # fallthrough-return 3192 %s = getelementptr double, double* %p, i32 1 3193 %e = load double, double* %s 3194 %v1 = insertelement <2 x double> undef, double %e, i32 0 3195 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3196 ret <2 x double> %v2 3197} 3198 3199define <2 x double> @load_v2f64_from_numeric_address() { 3200; CHECK-LABEL: load_v2f64_from_numeric_address: 3201; CHECK: .functype load_v2f64_from_numeric_address () -> (v128) 3202; CHECK-NEXT: # %bb.0: 3203; CHECK-NEXT: i32.const 0 3204; CHECK-NEXT: v128.load 32 3205; CHECK-NEXT: # fallthrough-return 3206 %s = inttoptr i32 32 to <2 x double>* 3207 %v = load <2 x double>, <2 x double>* %s 3208 ret <2 x double> %v 3209} 3210 3211define <2 x double> @load_splat_v2f64_from_numeric_address() { 3212; CHECK-LABEL: load_splat_v2f64_from_numeric_address: 3213; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) 3214; CHECK-NEXT: # %bb.0: 3215; CHECK-NEXT: i32.const 0 3216; CHECK-NEXT: v64x2.load_splat 32 3217; CHECK-NEXT: # fallthrough-return 3218 %s = inttoptr i32 32 to double* 3219 %e = load double, double* %s 3220 %v1 = insertelement <2 x double> undef, double %e, i32 0 3221 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3222 ret <2 x double> %v2 3223} 3224 3225@gv_v2f64 = global <2 x double> <double 42., double 42.> 3226define <2 x double> @load_v2f64_from_global_address() { 3227; CHECK-LABEL: load_v2f64_from_global_address: 3228; CHECK: .functype load_v2f64_from_global_address () -> (v128) 3229; CHECK-NEXT: # %bb.0: 3230; CHECK-NEXT: i32.const 0 3231; CHECK-NEXT: v128.load gv_v2f64 3232; CHECK-NEXT: # fallthrough-return 3233 %v = load <2 x double>, <2 x double>* @gv_v2f64 3234 ret <2 x double> %v 3235} 3236 3237@gv_f64 = global double 42. 3238define <2 x double> @load_splat_v2f64_from_global_address() { 3239; CHECK-LABEL: load_splat_v2f64_from_global_address: 3240; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) 3241; CHECK-NEXT: # %bb.0: 3242; CHECK-NEXT: i32.const 0 3243; CHECK-NEXT: v64x2.load_splat gv_f64 3244; CHECK-NEXT: # fallthrough-return 3245 %e = load double, double* @gv_f64 3246 %v1 = insertelement <2 x double> undef, double %e, i32 0 3247 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3248 ret <2 x double> %v2 3249} 3250 3251define void @store_v2f64(<2 x double> %v, <2 x double>* %p) { 3252; CHECK-LABEL: store_v2f64: 3253; CHECK: .functype store_v2f64 (v128, i32) -> () 3254; CHECK-NEXT: # %bb.0: 3255; CHECK-NEXT: local.get 1 3256; CHECK-NEXT: local.get 0 3257; CHECK-NEXT: v128.store 0 3258; CHECK-NEXT: # fallthrough-return 3259 store <2 x double> %v , <2 x double>* %p 3260 ret void 3261} 3262 3263define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { 3264; CHECK-LABEL: store_v2f64_with_folded_offset: 3265; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> () 3266; CHECK-NEXT: # %bb.0: 3267; CHECK-NEXT: local.get 1 3268; CHECK-NEXT: local.get 0 3269; CHECK-NEXT: v128.store 16 3270; CHECK-NEXT: # fallthrough-return 3271 %q = ptrtoint <2 x double>* %p to i32 3272 %r = add nuw i32 %q, 16 3273 %s = inttoptr i32 %r to <2 x double>* 3274 store <2 x double> %v , <2 x double>* %s 3275 ret void 3276} 3277 3278define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3279; CHECK-LABEL: store_v2f64_with_folded_gep_offset: 3280; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> () 3281; CHECK-NEXT: # %bb.0: 3282; CHECK-NEXT: local.get 1 3283; CHECK-NEXT: local.get 0 3284; CHECK-NEXT: v128.store 16 3285; CHECK-NEXT: # fallthrough-return 3286 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 3287 store <2 x double> %v , <2 x double>* %s 3288 ret void 3289} 3290 3291define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) { 3292; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: 3293; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> () 3294; CHECK-NEXT: # %bb.0: 3295; CHECK-NEXT: local.get 1 3296; CHECK-NEXT: i32.const -16 3297; CHECK-NEXT: i32.add 3298; CHECK-NEXT: local.get 0 3299; CHECK-NEXT: v128.store 0 3300; CHECK-NEXT: # fallthrough-return 3301 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 3302 store <2 x double> %v , <2 x double>* %s 3303 ret void 3304} 3305 3306define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) { 3307; CHECK-LABEL: store_v2f64_with_unfolded_offset: 3308; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> () 3309; CHECK-NEXT: # %bb.0: 3310; CHECK-NEXT: local.get 1 3311; CHECK-NEXT: i32.const 16 3312; CHECK-NEXT: i32.add 3313; CHECK-NEXT: local.get 0 3314; CHECK-NEXT: v128.store 0 3315; CHECK-NEXT: # fallthrough-return 3316 %q = ptrtoint <2 x double>* %p to i32 3317 %r = add nsw i32 %q, 16 3318 %s = inttoptr i32 %r to <2 x double>* 3319 store <2 x double> %v , <2 x double>* %s 3320 ret void 3321} 3322 3323define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3324; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: 3325; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> () 3326; CHECK-NEXT: # %bb.0: 3327; CHECK-NEXT: local.get 1 3328; CHECK-NEXT: i32.const 16 3329; CHECK-NEXT: i32.add 3330; CHECK-NEXT: local.get 0 3331; CHECK-NEXT: v128.store 0 3332; CHECK-NEXT: # fallthrough-return 3333 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3334 store <2 x double> %v , <2 x double>* %s 3335 ret void 3336} 3337 3338define void @store_v2f64_to_numeric_address(<2 x double> %v) { 3339; CHECK-LABEL: store_v2f64_to_numeric_address: 3340; CHECK: .functype store_v2f64_to_numeric_address (v128) -> () 3341; CHECK-NEXT: # %bb.0: 3342; CHECK-NEXT: i32.const 0 3343; CHECK-NEXT: local.get 0 3344; CHECK-NEXT: v128.store 32 3345; CHECK-NEXT: # fallthrough-return 3346 %s = inttoptr i32 32 to <2 x double>* 3347 store <2 x double> %v , <2 x double>* %s 3348 ret void 3349} 3350 3351define void @store_v2f64_to_global_address(<2 x double> %v) { 3352; CHECK-LABEL: store_v2f64_to_global_address: 3353; CHECK: .functype store_v2f64_to_global_address (v128) -> () 3354; CHECK-NEXT: # %bb.0: 3355; CHECK-NEXT: i32.const 0 3356; CHECK-NEXT: local.get 0 3357; CHECK-NEXT: v128.store gv_v2f64 3358; CHECK-NEXT: # fallthrough-return 3359 store <2 x double> %v , <2 x double>* @gv_v2f64 3360 ret void 3361} 3362