1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test SIMD loads and stores 5 6target triple = "wasm32-unknown-unknown" 7 8; ============================================================================== 9; 16 x i8 10; ============================================================================== 11define <16 x i8> @load_v16i8(<16 x i8>* %p) { 12; CHECK-LABEL: load_v16i8: 13; CHECK: .functype load_v16i8 (i32) -> (v128) 14; CHECK-NEXT: # %bb.0: 15; CHECK-NEXT: local.get 0 16; CHECK-NEXT: v128.load 0 17; CHECK-NEXT: # fallthrough-return 18 %v = load <16 x i8>, <16 x i8>* %p 19 ret <16 x i8> %v 20} 21 22define <16 x i8> @load_splat_v16i8(i8* %p) { 23; CHECK-LABEL: load_splat_v16i8: 24; CHECK: .functype load_splat_v16i8 (i32) -> (v128) 25; CHECK-NEXT: # %bb.0: 26; CHECK-NEXT: local.get 0 27; CHECK-NEXT: v128.load8_splat 0 28; CHECK-NEXT: # fallthrough-return 29 %e = load i8, i8* %p 30 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 31 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 32 ret <16 x i8> %v2 33} 34 35define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { 36; CHECK-LABEL: load_v16i8_with_folded_offset: 37; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128) 38; CHECK-NEXT: # %bb.0: 39; CHECK-NEXT: local.get 0 40; CHECK-NEXT: v128.load 16 41; CHECK-NEXT: # fallthrough-return 42 %q = ptrtoint <16 x i8>* %p to i32 43 %r = add nuw i32 %q, 16 44 %s = inttoptr i32 %r to <16 x i8>* 45 %v = load <16 x i8>, <16 x i8>* %s 46 ret <16 x i8> %v 47} 48 49define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { 50; CHECK-LABEL: load_splat_v16i8_with_folded_offset: 51; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) 52; CHECK-NEXT: # %bb.0: 53; CHECK-NEXT: local.get 0 54; CHECK-NEXT: v128.load8_splat 16 55; CHECK-NEXT: # fallthrough-return 56 %q = ptrtoint i8* %p to i32 57 %r = add nuw i32 %q, 16 58 %s = inttoptr i32 %r to i8* 59 %e = load i8, i8* %s 60 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 61 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 62 ret <16 x i8> %v2 63} 64 65define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) { 66; CHECK-LABEL: load_v16i8_with_folded_gep_offset: 67; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128) 68; CHECK-NEXT: # %bb.0: 69; CHECK-NEXT: local.get 0 70; CHECK-NEXT: v128.load 16 71; CHECK-NEXT: # fallthrough-return 72 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 73 %v = load <16 x i8>, <16 x i8>* %s 74 ret <16 x i8> %v 75} 76 77define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { 78; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: 79; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) 80; CHECK-NEXT: # %bb.0: 81; CHECK-NEXT: local.get 0 82; CHECK-NEXT: v128.load8_splat 1 83; CHECK-NEXT: # fallthrough-return 84 %s = getelementptr inbounds i8, i8* %p, i32 1 85 %e = load i8, i8* %s 86 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 87 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 88 ret <16 x i8> %v2 89} 90 91define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) { 92; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: 93; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 94; CHECK-NEXT: # %bb.0: 95; CHECK-NEXT: local.get 0 96; CHECK-NEXT: i32.const -16 97; CHECK-NEXT: i32.add 98; CHECK-NEXT: v128.load 0 99; CHECK-NEXT: # fallthrough-return 100 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 101 %v = load <16 x i8>, <16 x i8>* %s 102 ret <16 x i8> %v 103} 104 105define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { 106; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: 107; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 108; CHECK-NEXT: # %bb.0: 109; CHECK-NEXT: local.get 0 110; CHECK-NEXT: i32.const -1 111; CHECK-NEXT: i32.add 112; CHECK-NEXT: v128.load8_splat 0 113; CHECK-NEXT: # fallthrough-return 114 %s = getelementptr inbounds i8, i8* %p, i32 -1 115 %e = load i8, i8* %s 116 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 117 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 118 ret <16 x i8> %v2 119} 120 121define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { 122; CHECK-LABEL: load_v16i8_with_unfolded_offset: 123; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128) 124; CHECK-NEXT: # %bb.0: 125; CHECK-NEXT: local.get 0 126; CHECK-NEXT: i32.const 16 127; CHECK-NEXT: i32.add 128; CHECK-NEXT: v128.load 0 129; CHECK-NEXT: # fallthrough-return 130 %q = ptrtoint <16 x i8>* %p to i32 131 %r = add nsw i32 %q, 16 132 %s = inttoptr i32 %r to <16 x i8>* 133 %v = load <16 x i8>, <16 x i8>* %s 134 ret <16 x i8> %v 135} 136 137define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { 138; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: 139; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128) 140; CHECK-NEXT: # %bb.0: 141; CHECK-NEXT: local.get 0 142; CHECK-NEXT: i32.const 16 143; CHECK-NEXT: i32.add 144; CHECK-NEXT: v128.load8_splat 0 145; CHECK-NEXT: # fallthrough-return 146 %q = ptrtoint i8* %p to i32 147 %r = add nsw i32 %q, 16 148 %s = inttoptr i32 %r to i8* 149 %e = load i8, i8* %s 150 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 151 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 152 ret <16 x i8> %v2 153} 154 155define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) { 156; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: 157; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128) 158; CHECK-NEXT: # %bb.0: 159; CHECK-NEXT: local.get 0 160; CHECK-NEXT: i32.const 16 161; CHECK-NEXT: i32.add 162; CHECK-NEXT: v128.load 0 163; CHECK-NEXT: # fallthrough-return 164 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 165 %v = load <16 x i8>, <16 x i8>* %s 166 ret <16 x i8> %v 167} 168 169define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { 170; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: 171; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128) 172; CHECK-NEXT: # %bb.0: 173; CHECK-NEXT: local.get 0 174; CHECK-NEXT: i32.const 1 175; CHECK-NEXT: i32.add 176; CHECK-NEXT: v128.load8_splat 0 177; CHECK-NEXT: # fallthrough-return 178 %s = getelementptr i8, i8* %p, i32 1 179 %e = load i8, i8* %s 180 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 181 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 182 ret <16 x i8> %v2 183} 184 185define <16 x i8> @load_v16i8_from_numeric_address() { 186; CHECK-LABEL: load_v16i8_from_numeric_address: 187; CHECK: .functype load_v16i8_from_numeric_address () -> (v128) 188; CHECK-NEXT: # %bb.0: 189; CHECK-NEXT: i32.const 0 190; CHECK-NEXT: v128.load 32 191; CHECK-NEXT: # fallthrough-return 192 %s = inttoptr i32 32 to <16 x i8>* 193 %v = load <16 x i8>, <16 x i8>* %s 194 ret <16 x i8> %v 195} 196 197define <16 x i8> @load_splat_v16i8_from_numeric_address() { 198; CHECK-LABEL: load_splat_v16i8_from_numeric_address: 199; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) 200; CHECK-NEXT: # %bb.0: 201; CHECK-NEXT: i32.const 0 202; CHECK-NEXT: v128.load8_splat 32 203; CHECK-NEXT: # fallthrough-return 204 %s = inttoptr i32 32 to i8* 205 %e = load i8, i8* %s 206 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 207 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 208 ret <16 x i8> %v2 209} 210 211@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 212define <16 x i8> @load_v16i8_from_global_address() { 213; CHECK-LABEL: load_v16i8_from_global_address: 214; CHECK: .functype load_v16i8_from_global_address () -> (v128) 215; CHECK-NEXT: # %bb.0: 216; CHECK-NEXT: i32.const 0 217; CHECK-NEXT: v128.load gv_v16i8 218; CHECK-NEXT: # fallthrough-return 219 %v = load <16 x i8>, <16 x i8>* @gv_v16i8 220 ret <16 x i8> %v 221} 222 223@gv_i8 = global i8 42 224define <16 x i8> @load_splat_v16i8_from_global_address() { 225; CHECK-LABEL: load_splat_v16i8_from_global_address: 226; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) 227; CHECK-NEXT: # %bb.0: 228; CHECK-NEXT: i32.const 0 229; CHECK-NEXT: v128.load8_splat gv_i8 230; CHECK-NEXT: # fallthrough-return 231 %e = load i8, i8* @gv_i8 232 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 233 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 234 ret <16 x i8> %v2 235} 236 237define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) { 238; CHECK-LABEL: store_v16i8: 239; CHECK: .functype store_v16i8 (v128, i32) -> () 240; CHECK-NEXT: # %bb.0: 241; CHECK-NEXT: local.get 1 242; CHECK-NEXT: local.get 0 243; CHECK-NEXT: v128.store 0 244; CHECK-NEXT: # fallthrough-return 245 store <16 x i8> %v , <16 x i8>* %p 246 ret void 247} 248 249define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { 250; CHECK-LABEL: store_v16i8_with_folded_offset: 251; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> () 252; CHECK-NEXT: # %bb.0: 253; CHECK-NEXT: local.get 1 254; CHECK-NEXT: local.get 0 255; CHECK-NEXT: v128.store 16 256; CHECK-NEXT: # fallthrough-return 257 %q = ptrtoint <16 x i8>* %p to i32 258 %r = add nuw i32 %q, 16 259 %s = inttoptr i32 %r to <16 x i8>* 260 store <16 x i8> %v , <16 x i8>* %s 261 ret void 262} 263 264define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 265; CHECK-LABEL: store_v16i8_with_folded_gep_offset: 266; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> () 267; CHECK-NEXT: # %bb.0: 268; CHECK-NEXT: local.get 1 269; CHECK-NEXT: local.get 0 270; CHECK-NEXT: v128.store 16 271; CHECK-NEXT: # fallthrough-return 272 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 273 store <16 x i8> %v , <16 x i8>* %s 274 ret void 275} 276 277define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) { 278; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: 279; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> () 280; CHECK-NEXT: # %bb.0: 281; CHECK-NEXT: local.get 1 282; CHECK-NEXT: i32.const -16 283; CHECK-NEXT: i32.add 284; CHECK-NEXT: local.get 0 285; CHECK-NEXT: v128.store 0 286; CHECK-NEXT: # fallthrough-return 287 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 288 store <16 x i8> %v , <16 x i8>* %s 289 ret void 290} 291 292define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) { 293; CHECK-LABEL: store_v16i8_with_unfolded_offset: 294; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> () 295; CHECK-NEXT: # %bb.0: 296; CHECK-NEXT: local.get 1 297; CHECK-NEXT: i32.const 16 298; CHECK-NEXT: i32.add 299; CHECK-NEXT: local.get 0 300; CHECK-NEXT: v128.store 0 301; CHECK-NEXT: # fallthrough-return 302 %q = ptrtoint <16 x i8>* %p to i32 303 %r = add nsw i32 %q, 16 304 %s = inttoptr i32 %r to <16 x i8>* 305 store <16 x i8> %v , <16 x i8>* %s 306 ret void 307} 308 309define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 310; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: 311; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> () 312; CHECK-NEXT: # %bb.0: 313; CHECK-NEXT: local.get 1 314; CHECK-NEXT: i32.const 16 315; CHECK-NEXT: i32.add 316; CHECK-NEXT: local.get 0 317; CHECK-NEXT: v128.store 0 318; CHECK-NEXT: # fallthrough-return 319 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 320 store <16 x i8> %v , <16 x i8>* %s 321 ret void 322} 323 324define void @store_v16i8_to_numeric_address(<16 x i8> %v) { 325; CHECK-LABEL: store_v16i8_to_numeric_address: 326; CHECK: .functype store_v16i8_to_numeric_address (v128) -> () 327; CHECK-NEXT: # %bb.0: 328; CHECK-NEXT: i32.const 0 329; CHECK-NEXT: local.get 0 330; CHECK-NEXT: v128.store 32 331; CHECK-NEXT: # fallthrough-return 332 %s = inttoptr i32 32 to <16 x i8>* 333 store <16 x i8> %v , <16 x i8>* %s 334 ret void 335} 336 337define void @store_v16i8_to_global_address(<16 x i8> %v) { 338; CHECK-LABEL: store_v16i8_to_global_address: 339; CHECK: .functype store_v16i8_to_global_address (v128) -> () 340; CHECK-NEXT: # %bb.0: 341; CHECK-NEXT: i32.const 0 342; CHECK-NEXT: local.get 0 343; CHECK-NEXT: v128.store gv_v16i8 344; CHECK-NEXT: # fallthrough-return 345 store <16 x i8> %v , <16 x i8>* @gv_v16i8 346 ret void 347} 348 349; ============================================================================== 350; 8 x i16 351; ============================================================================== 352define <8 x i16> @load_v8i16(<8 x i16>* %p) { 353; CHECK-LABEL: load_v8i16: 354; CHECK: .functype load_v8i16 (i32) -> (v128) 355; CHECK-NEXT: # %bb.0: 356; CHECK-NEXT: local.get 0 357; CHECK-NEXT: v128.load 0 358; CHECK-NEXT: # fallthrough-return 359 %v = load <8 x i16>, <8 x i16>* %p 360 ret <8 x i16> %v 361} 362 363define <8 x i16> @load_splat_v8i16(i16* %p) { 364; CHECK-LABEL: load_splat_v8i16: 365; CHECK: .functype load_splat_v8i16 (i32) -> (v128) 366; CHECK-NEXT: # %bb.0: 367; CHECK-NEXT: local.get 0 368; CHECK-NEXT: v128.load16_splat 0 369; CHECK-NEXT: # fallthrough-return 370 %e = load i16, i16* %p 371 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 372 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 373 ret <8 x i16> %v2 374} 375 376define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) { 377; CHECK-LABEL: load_sext_v8i16: 378; CHECK: .functype load_sext_v8i16 (i32) -> (v128) 379; CHECK-NEXT: # %bb.0: 380; CHECK-NEXT: local.get 0 381; CHECK-NEXT: i16x8.load8x8_s 0 382; CHECK-NEXT: # fallthrough-return 383 %v = load <8 x i8>, <8 x i8>* %p 384 %v2 = sext <8 x i8> %v to <8 x i16> 385 ret <8 x i16> %v2 386} 387 388define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) { 389; CHECK-LABEL: load_zext_v8i16: 390; CHECK: .functype load_zext_v8i16 (i32) -> (v128) 391; CHECK-NEXT: # %bb.0: 392; CHECK-NEXT: local.get 0 393; CHECK-NEXT: i16x8.load8x8_u 0 394; CHECK-NEXT: # fallthrough-return 395 %v = load <8 x i8>, <8 x i8>* %p 396 %v2 = zext <8 x i8> %v to <8 x i16> 397 ret <8 x i16> %v2 398} 399 400define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) { 401; CHECK-LABEL: load_ext_v8i16: 402; CHECK: .functype load_ext_v8i16 (i32) -> (v128) 403; CHECK-NEXT: # %bb.0: 404; CHECK-NEXT: local.get 0 405; CHECK-NEXT: v128.load64_zero 0 406; CHECK-NEXT: # fallthrough-return 407 %v = load <8 x i8>, <8 x i8>* %p 408 ret <8 x i8> %v 409} 410 411define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { 412; CHECK-LABEL: load_v8i16_with_folded_offset: 413; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128) 414; CHECK-NEXT: # %bb.0: 415; CHECK-NEXT: local.get 0 416; CHECK-NEXT: v128.load 16 417; CHECK-NEXT: # fallthrough-return 418 %q = ptrtoint <8 x i16>* %p to i32 419 %r = add nuw i32 %q, 16 420 %s = inttoptr i32 %r to <8 x i16>* 421 %v = load <8 x i16>, <8 x i16>* %s 422 ret <8 x i16> %v 423} 424 425define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { 426; CHECK-LABEL: load_splat_v8i16_with_folded_offset: 427; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) 428; CHECK-NEXT: # %bb.0: 429; CHECK-NEXT: local.get 0 430; CHECK-NEXT: v128.load16_splat 16 431; CHECK-NEXT: # fallthrough-return 432 %q = ptrtoint i16* %p to i32 433 %r = add nuw i32 %q, 16 434 %s = inttoptr i32 %r to i16* 435 %e = load i16, i16* %s 436 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 437 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 438 ret <8 x i16> %v2 439} 440 441define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) { 442; CHECK-LABEL: load_sext_v8i16_with_folded_offset: 443; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128) 444; CHECK-NEXT: # %bb.0: 445; CHECK-NEXT: local.get 0 446; CHECK-NEXT: i16x8.load8x8_s 16 447; CHECK-NEXT: # fallthrough-return 448 %q = ptrtoint <8 x i8>* %p to i32 449 %r = add nuw i32 %q, 16 450 %s = inttoptr i32 %r to <8 x i8>* 451 %v = load <8 x i8>, <8 x i8>* %s 452 %v2 = sext <8 x i8> %v to <8 x i16> 453 ret <8 x i16> %v2 454} 455 456define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) { 457; CHECK-LABEL: load_zext_v8i16_with_folded_offset: 458; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128) 459; CHECK-NEXT: # %bb.0: 460; CHECK-NEXT: local.get 0 461; CHECK-NEXT: i16x8.load8x8_u 16 462; CHECK-NEXT: # fallthrough-return 463 %q = ptrtoint <8 x i8>* %p to i32 464 %r = add nuw i32 %q, 16 465 %s = inttoptr i32 %r to <8 x i8>* 466 %v = load <8 x i8>, <8 x i8>* %s 467 %v2 = zext <8 x i8> %v to <8 x i16> 468 ret <8 x i16> %v2 469} 470 471define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) { 472; CHECK-LABEL: load_ext_v8i16_with_folded_offset: 473; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128) 474; CHECK-NEXT: # %bb.0: 475; CHECK-NEXT: local.get 0 476; CHECK-NEXT: v128.load64_zero 16 477; CHECK-NEXT: # fallthrough-return 478 %q = ptrtoint <8 x i8>* %p to i32 479 %r = add nuw i32 %q, 16 480 %s = inttoptr i32 %r to <8 x i8>* 481 %v = load <8 x i8>, <8 x i8>* %s 482 ret <8 x i8> %v 483} 484 485define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) { 486; CHECK-LABEL: load_v8i16_with_folded_gep_offset: 487; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128) 488; CHECK-NEXT: # %bb.0: 489; CHECK-NEXT: local.get 0 490; CHECK-NEXT: v128.load 16 491; CHECK-NEXT: # fallthrough-return 492 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 493 %v = load <8 x i16>, <8 x i16>* %s 494 ret <8 x i16> %v 495} 496 497define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { 498; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: 499; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) 500; CHECK-NEXT: # %bb.0: 501; CHECK-NEXT: local.get 0 502; CHECK-NEXT: v128.load16_splat 2 503; CHECK-NEXT: # fallthrough-return 504 %s = getelementptr inbounds i16, i16* %p, i32 1 505 %e = load i16, i16* %s 506 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 507 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 508 ret <8 x i16> %v2 509} 510 511define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 512; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: 513; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128) 514; CHECK-NEXT: # %bb.0: 515; CHECK-NEXT: local.get 0 516; CHECK-NEXT: i16x8.load8x8_s 8 517; CHECK-NEXT: # fallthrough-return 518 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 519 %v = load <8 x i8>, <8 x i8>* %s 520 %v2 = sext <8 x i8> %v to <8 x i16> 521 ret <8 x i16> %v2 522} 523 524define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 525; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: 526; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128) 527; CHECK-NEXT: # %bb.0: 528; CHECK-NEXT: local.get 0 529; CHECK-NEXT: i16x8.load8x8_u 8 530; CHECK-NEXT: # fallthrough-return 531 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 532 %v = load <8 x i8>, <8 x i8>* %s 533 %v2 = zext <8 x i8> %v to <8 x i16> 534 ret <8 x i16> %v2 535} 536 537define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 538; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: 539; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128) 540; CHECK-NEXT: # %bb.0: 541; CHECK-NEXT: local.get 0 542; CHECK-NEXT: v128.load64_zero 8 543; CHECK-NEXT: # fallthrough-return 544 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 545 %v = load <8 x i8>, <8 x i8>* %s 546 ret <8 x i8> %v 547} 548 549define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) { 550; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: 551; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 552; CHECK-NEXT: # %bb.0: 553; CHECK-NEXT: local.get 0 554; CHECK-NEXT: i32.const -16 555; CHECK-NEXT: i32.add 556; CHECK-NEXT: v128.load 0 557; CHECK-NEXT: # fallthrough-return 558 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 559 %v = load <8 x i16>, <8 x i16>* %s 560 ret <8 x i16> %v 561} 562 563define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { 564; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: 565; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 566; CHECK-NEXT: # %bb.0: 567; CHECK-NEXT: local.get 0 568; CHECK-NEXT: i32.const -2 569; CHECK-NEXT: i32.add 570; CHECK-NEXT: v128.load16_splat 0 571; CHECK-NEXT: # fallthrough-return 572 %s = getelementptr inbounds i16, i16* %p, i32 -1 573 %e = load i16, i16* %s 574 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 575 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 576 ret <8 x i16> %v2 577} 578 579define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 580; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: 581; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 582; CHECK-NEXT: # %bb.0: 583; CHECK-NEXT: local.get 0 584; CHECK-NEXT: i32.const -8 585; CHECK-NEXT: i32.add 586; CHECK-NEXT: i16x8.load8x8_s 0 587; CHECK-NEXT: # fallthrough-return 588 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 589 %v = load <8 x i8>, <8 x i8>* %s 590 %v2 = sext <8 x i8> %v to <8 x i16> 591 ret <8 x i16> %v2 592} 593 594define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 595; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: 596; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 597; CHECK-NEXT: # %bb.0: 598; CHECK-NEXT: local.get 0 599; CHECK-NEXT: i32.const -8 600; CHECK-NEXT: i32.add 601; CHECK-NEXT: i16x8.load8x8_u 0 602; CHECK-NEXT: # fallthrough-return 603 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 604 %v = load <8 x i8>, <8 x i8>* %s 605 %v2 = zext <8 x i8> %v to <8 x i16> 606 ret <8 x i16> %v2 607} 608 609define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 610; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: 611; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 612; CHECK-NEXT: # %bb.0: 613; CHECK-NEXT: local.get 0 614; CHECK-NEXT: i32.const -8 615; CHECK-NEXT: i32.add 616; CHECK-NEXT: v128.load64_zero 0 617; CHECK-NEXT: # fallthrough-return 618 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 619 %v = load <8 x i8>, <8 x i8>* %s 620 ret <8 x i8> %v 621} 622 623define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { 624; CHECK-LABEL: load_v8i16_with_unfolded_offset: 625; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128) 626; CHECK-NEXT: # %bb.0: 627; CHECK-NEXT: local.get 0 628; CHECK-NEXT: i32.const 16 629; CHECK-NEXT: i32.add 630; CHECK-NEXT: v128.load 0 631; CHECK-NEXT: # fallthrough-return 632 %q = ptrtoint <8 x i16>* %p to i32 633 %r = add nsw i32 %q, 16 634 %s = inttoptr i32 %r to <8 x i16>* 635 %v = load <8 x i16>, <8 x i16>* %s 636 ret <8 x i16> %v 637} 638 639define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { 640; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: 641; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128) 642; CHECK-NEXT: # %bb.0: 643; CHECK-NEXT: local.get 0 644; CHECK-NEXT: i32.const 16 645; CHECK-NEXT: i32.add 646; CHECK-NEXT: v128.load16_splat 0 647; CHECK-NEXT: # fallthrough-return 648 %q = ptrtoint i16* %p to i32 649 %r = add nsw i32 %q, 16 650 %s = inttoptr i32 %r to i16* 651 %e = load i16, i16* %s 652 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 653 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 654 ret <8 x i16> %v2 655} 656 657define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 658; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: 659; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128) 660; CHECK-NEXT: # %bb.0: 661; CHECK-NEXT: local.get 0 662; CHECK-NEXT: i32.const 16 663; CHECK-NEXT: i32.add 664; CHECK-NEXT: i16x8.load8x8_s 0 665; CHECK-NEXT: # fallthrough-return 666 %q = ptrtoint <8 x i8>* %p to i32 667 %r = add nsw i32 %q, 16 668 %s = inttoptr i32 %r to <8 x i8>* 669 %v = load <8 x i8>, <8 x i8>* %s 670 %v2 = sext <8 x i8> %v to <8 x i16> 671 ret <8 x i16> %v2 672} 673 674define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 675; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: 676; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128) 677; CHECK-NEXT: # %bb.0: 678; CHECK-NEXT: local.get 0 679; CHECK-NEXT: i32.const 16 680; CHECK-NEXT: i32.add 681; CHECK-NEXT: i16x8.load8x8_u 0 682; CHECK-NEXT: # fallthrough-return 683 %q = ptrtoint <8 x i8>* %p to i32 684 %r = add nsw i32 %q, 16 685 %s = inttoptr i32 %r to <8 x i8>* 686 %v = load <8 x i8>, <8 x i8>* %s 687 %v2 = zext <8 x i8> %v to <8 x i16> 688 ret <8 x i16> %v2 689} 690 691define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 692; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: 693; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128) 694; CHECK-NEXT: # %bb.0: 695; CHECK-NEXT: local.get 0 696; CHECK-NEXT: i32.const 16 697; CHECK-NEXT: i32.add 698; CHECK-NEXT: v128.load64_zero 0 699; CHECK-NEXT: # fallthrough-return 700 %q = ptrtoint <8 x i8>* %p to i32 701 %r = add nsw i32 %q, 16 702 %s = inttoptr i32 %r to <8 x i8>* 703 %v = load <8 x i8>, <8 x i8>* %s 704 ret <8 x i8> %v 705} 706 707define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) { 708; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: 709; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128) 710; CHECK-NEXT: # %bb.0: 711; CHECK-NEXT: local.get 0 712; CHECK-NEXT: i32.const 16 713; CHECK-NEXT: i32.add 714; CHECK-NEXT: v128.load 0 715; CHECK-NEXT: # fallthrough-return 716 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 717 %v = load <8 x i16>, <8 x i16>* %s 718 ret <8 x i16> %v 719} 720 721define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { 722; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: 723; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128) 724; CHECK-NEXT: # %bb.0: 725; CHECK-NEXT: local.get 0 726; CHECK-NEXT: i32.const 2 727; CHECK-NEXT: i32.add 728; CHECK-NEXT: v128.load16_splat 0 729; CHECK-NEXT: # fallthrough-return 730 %s = getelementptr i16, i16* %p, i32 1 731 %e = load i16, i16* %s 732 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 733 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 734 ret <8 x i16> %v2 735} 736 737define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 738; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: 739; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 740; CHECK-NEXT: # %bb.0: 741; CHECK-NEXT: local.get 0 742; CHECK-NEXT: i32.const 8 743; CHECK-NEXT: i32.add 744; CHECK-NEXT: i16x8.load8x8_s 0 745; CHECK-NEXT: # fallthrough-return 746 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 747 %v = load <8 x i8>, <8 x i8>* %s 748 %v2 = sext <8 x i8> %v to <8 x i16> 749 ret <8 x i16> %v2 750} 751 752define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 753; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: 754; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 755; CHECK-NEXT: # %bb.0: 756; CHECK-NEXT: local.get 0 757; CHECK-NEXT: i32.const 8 758; CHECK-NEXT: i32.add 759; CHECK-NEXT: i16x8.load8x8_u 0 760; CHECK-NEXT: # fallthrough-return 761 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 762 %v = load <8 x i8>, <8 x i8>* %s 763 %v2 = zext <8 x i8> %v to <8 x i16> 764 ret <8 x i16> %v2 765} 766 767define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 768; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: 769; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 770; CHECK-NEXT: # %bb.0: 771; CHECK-NEXT: local.get 0 772; CHECK-NEXT: i32.const 8 773; CHECK-NEXT: i32.add 774; CHECK-NEXT: v128.load64_zero 0 775; CHECK-NEXT: # fallthrough-return 776 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 777 %v = load <8 x i8>, <8 x i8>* %s 778 ret <8 x i8> %v 779} 780 781define <8 x i16> @load_v8i16_from_numeric_address() { 782; CHECK-LABEL: load_v8i16_from_numeric_address: 783; CHECK: .functype load_v8i16_from_numeric_address () -> (v128) 784; CHECK-NEXT: # %bb.0: 785; CHECK-NEXT: i32.const 0 786; CHECK-NEXT: v128.load 32 787; CHECK-NEXT: # fallthrough-return 788 %s = inttoptr i32 32 to <8 x i16>* 789 %v = load <8 x i16>, <8 x i16>* %s 790 ret <8 x i16> %v 791} 792 793define <8 x i16> @load_splat_v8i16_from_numeric_address() { 794; CHECK-LABEL: load_splat_v8i16_from_numeric_address: 795; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) 796; CHECK-NEXT: # %bb.0: 797; CHECK-NEXT: i32.const 0 798; CHECK-NEXT: v128.load16_splat 32 799; CHECK-NEXT: # fallthrough-return 800 %s = inttoptr i32 32 to i16* 801 %e = load i16, i16* %s 802 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 803 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 804 ret <8 x i16> %v2 805} 806 807define <8 x i16> @load_sext_v8i16_from_numeric_address() { 808; CHECK-LABEL: load_sext_v8i16_from_numeric_address: 809; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128) 810; CHECK-NEXT: # %bb.0: 811; CHECK-NEXT: i32.const 0 812; CHECK-NEXT: i16x8.load8x8_s 32 813; CHECK-NEXT: # fallthrough-return 814 %s = inttoptr i32 32 to <8 x i8>* 815 %v = load <8 x i8>, <8 x i8>* %s 816 %v2 = sext <8 x i8> %v to <8 x i16> 817 ret <8 x i16> %v2 818} 819 820define <8 x i16> @load_zext_v8i16_from_numeric_address() { 821; CHECK-LABEL: load_zext_v8i16_from_numeric_address: 822; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128) 823; CHECK-NEXT: # %bb.0: 824; CHECK-NEXT: i32.const 0 825; CHECK-NEXT: i16x8.load8x8_u 32 826; CHECK-NEXT: # fallthrough-return 827 %s = inttoptr i32 32 to <8 x i8>* 828 %v = load <8 x i8>, <8 x i8>* %s 829 %v2 = zext <8 x i8> %v to <8 x i16> 830 ret <8 x i16> %v2 831} 832 833define <8 x i8> @load_ext_v8i16_from_numeric_address() { 834; CHECK-LABEL: load_ext_v8i16_from_numeric_address: 835; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128) 836; CHECK-NEXT: # %bb.0: 837; CHECK-NEXT: i32.const 0 838; CHECK-NEXT: v128.load64_zero 32 839; CHECK-NEXT: # fallthrough-return 840 %s = inttoptr i32 32 to <8 x i8>* 841 %v = load <8 x i8>, <8 x i8>* %s 842 ret <8 x i8> %v 843} 844 845@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 846define <8 x i16> @load_v8i16_from_global_address() { 847; CHECK-LABEL: load_v8i16_from_global_address: 848; CHECK: .functype load_v8i16_from_global_address () -> (v128) 849; CHECK-NEXT: # %bb.0: 850; CHECK-NEXT: i32.const 0 851; CHECK-NEXT: v128.load gv_v8i16 852; CHECK-NEXT: # fallthrough-return 853 %v = load <8 x i16>, <8 x i16>* @gv_v8i16 854 ret <8 x i16> %v 855} 856 857@gv_i16 = global i16 42 858define <8 x i16> @load_splat_v8i16_from_global_address() { 859; CHECK-LABEL: load_splat_v8i16_from_global_address: 860; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) 861; CHECK-NEXT: # %bb.0: 862; CHECK-NEXT: i32.const 0 863; CHECK-NEXT: v128.load16_splat gv_i16 864; CHECK-NEXT: # fallthrough-return 865 %e = load i16, i16* @gv_i16 866 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 867 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 868 ret <8 x i16> %v2 869} 870 871@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 872define <8 x i16> @load_sext_v8i16_from_global_address() { 873; CHECK-LABEL: load_sext_v8i16_from_global_address: 874; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128) 875; CHECK-NEXT: # %bb.0: 876; CHECK-NEXT: i32.const 0 877; CHECK-NEXT: i16x8.load8x8_s gv_v8i8 878; CHECK-NEXT: # fallthrough-return 879 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 880 %v2 = sext <8 x i8> %v to <8 x i16> 881 ret <8 x i16> %v2 882} 883 884define <8 x i16> @load_zext_v8i16_from_global_address() { 885; CHECK-LABEL: load_zext_v8i16_from_global_address: 886; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128) 887; CHECK-NEXT: # %bb.0: 888; CHECK-NEXT: i32.const 0 889; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 890; CHECK-NEXT: # fallthrough-return 891 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 892 %v2 = zext <8 x i8> %v to <8 x i16> 893 ret <8 x i16> %v2 894} 895 896define <8 x i8> @load_ext_v8i16_from_global_address() { 897; CHECK-LABEL: load_ext_v8i16_from_global_address: 898; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128) 899; CHECK-NEXT: # %bb.0: 900; CHECK-NEXT: i32.const 0 901; CHECK-NEXT: v128.load64_zero gv_v8i8 902; CHECK-NEXT: # fallthrough-return 903 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 904 ret <8 x i8> %v 905} 906 907 908define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) { 909; CHECK-LABEL: store_v8i16: 910; CHECK: .functype store_v8i16 (v128, i32) -> () 911; CHECK-NEXT: # %bb.0: 912; CHECK-NEXT: local.get 1 913; CHECK-NEXT: local.get 0 914; CHECK-NEXT: v128.store 0 915; CHECK-NEXT: # fallthrough-return 916 store <8 x i16> %v , <8 x i16>* %p 917 ret void 918} 919 920define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) { 921; CHECK-LABEL: store_narrowing_v8i16: 922; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> () 923; CHECK-NEXT: # %bb.0: 924; CHECK-NEXT: local.get 1 925; CHECK-NEXT: local.get 0 926; CHECK-NEXT: v128.store64_lane 0, 0 927; CHECK-NEXT: # fallthrough-return 928 store <8 x i8> %v, <8 x i8>* %p 929 ret void 930} 931 932define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { 933; CHECK-LABEL: store_v8i16_with_folded_offset: 934; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () 935; CHECK-NEXT: # %bb.0: 936; CHECK-NEXT: local.get 1 937; CHECK-NEXT: local.get 0 938; CHECK-NEXT: v128.store 16 939; CHECK-NEXT: # fallthrough-return 940 %q = ptrtoint <8 x i16>* %p to i32 941 %r = add nuw i32 %q, 16 942 %s = inttoptr i32 %r to <8 x i16>* 943 store <8 x i16> %v , <8 x i16>* %s 944 ret void 945} 946 947define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) { 948; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset: 949; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> () 950; CHECK-NEXT: # %bb.0: 951; CHECK-NEXT: local.get 1 952; CHECK-NEXT: i32.const 16 953; CHECK-NEXT: i32.add 954; CHECK-NEXT: local.get 0 955; CHECK-NEXT: v128.store64_lane 0, 0 956; CHECK-NEXT: # fallthrough-return 957 %q = ptrtoint <8 x i8>* %p to i32 958 %r = add nuw i32 %q, 16 959 %s = inttoptr i32 %r to <8 x i8>* 960 store <8 x i8> %v , <8 x i8>* %s 961 ret void 962} 963 964define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 965; CHECK-LABEL: store_v8i16_with_folded_gep_offset: 966; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () 967; CHECK-NEXT: # %bb.0: 968; CHECK-NEXT: local.get 1 969; CHECK-NEXT: local.get 0 970; CHECK-NEXT: v128.store 16 971; CHECK-NEXT: # fallthrough-return 972 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 973 store <8 x i16> %v , <8 x i16>* %s 974 ret void 975} 976 977define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { 978; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset: 979; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> () 980; CHECK-NEXT: # %bb.0: 981; CHECK-NEXT: local.get 1 982; CHECK-NEXT: i32.const 8 983; CHECK-NEXT: i32.add 984; CHECK-NEXT: local.get 0 985; CHECK-NEXT: v128.store64_lane 0, 0 986; CHECK-NEXT: # fallthrough-return 987 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 988 store <8 x i8> %v , <8 x i8>* %s 989 ret void 990} 991 992define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { 993; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: 994; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 995; CHECK-NEXT: # %bb.0: 996; CHECK-NEXT: local.get 1 997; CHECK-NEXT: i32.const -16 998; CHECK-NEXT: i32.add 999; CHECK-NEXT: local.get 0 1000; CHECK-NEXT: v128.store 0 1001; CHECK-NEXT: # fallthrough-return 1002 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 1003 store <8 x i16> %v , <8 x i16>* %s 1004 ret void 1005} 1006 1007define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) { 1008; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset: 1009; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 1010; CHECK-NEXT: # %bb.0: 1011; CHECK-NEXT: local.get 1 1012; CHECK-NEXT: i32.const -8 1013; CHECK-NEXT: i32.add 1014; CHECK-NEXT: local.get 0 1015; CHECK-NEXT: v128.store64_lane 0, 0 1016; CHECK-NEXT: # fallthrough-return 1017 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 1018 store <8 x i8> %v , <8 x i8>* %s 1019 ret void 1020} 1021 1022define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { 1023; CHECK-LABEL: store_v8i16_with_unfolded_offset: 1024; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () 1025; CHECK-NEXT: # %bb.0: 1026; CHECK-NEXT: local.get 1 1027; CHECK-NEXT: i32.const 16 1028; CHECK-NEXT: i32.add 1029; CHECK-NEXT: local.get 0 1030; CHECK-NEXT: v128.store 0 1031; CHECK-NEXT: # fallthrough-return 1032 %q = ptrtoint <8 x i16>* %p to i32 1033 %r = add nsw i32 %q, 16 1034 %s = inttoptr i32 %r to <8 x i16>* 1035 store <8 x i16> %v , <8 x i16>* %s 1036 ret void 1037} 1038 1039define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) { 1040; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset: 1041; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> () 1042; CHECK-NEXT: # %bb.0: 1043; CHECK-NEXT: local.get 1 1044; CHECK-NEXT: i32.const 16 1045; CHECK-NEXT: i32.add 1046; CHECK-NEXT: local.get 0 1047; CHECK-NEXT: v128.store64_lane 0, 0 1048; CHECK-NEXT: # fallthrough-return 1049 %q = ptrtoint <8 x i8>* %p to i32 1050 %r = add nsw i32 %q, 16 1051 %s = inttoptr i32 %r to <8 x i8>* 1052 store <8 x i8> %v , <8 x i8>* %s 1053 ret void 1054} 1055 1056define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 1057; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: 1058; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1059; CHECK-NEXT: # %bb.0: 1060; CHECK-NEXT: local.get 1 1061; CHECK-NEXT: i32.const 16 1062; CHECK-NEXT: i32.add 1063; CHECK-NEXT: local.get 0 1064; CHECK-NEXT: v128.store 0 1065; CHECK-NEXT: # fallthrough-return 1066 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 1067 store <8 x i16> %v , <8 x i16>* %s 1068 ret void 1069} 1070 1071define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { 1072; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset: 1073; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1074; CHECK-NEXT: # %bb.0: 1075; CHECK-NEXT: local.get 1 1076; CHECK-NEXT: i32.const 8 1077; CHECK-NEXT: i32.add 1078; CHECK-NEXT: local.get 0 1079; CHECK-NEXT: v128.store64_lane 0, 0 1080; CHECK-NEXT: # fallthrough-return 1081 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 1082 store <8 x i8> %v , <8 x i8>* %s 1083 ret void 1084} 1085 1086define void @store_v8i16_to_numeric_address(<8 x i16> %v) { 1087; CHECK-LABEL: store_v8i16_to_numeric_address: 1088; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () 1089; CHECK-NEXT: # %bb.0: 1090; CHECK-NEXT: i32.const 0 1091; CHECK-NEXT: local.get 0 1092; CHECK-NEXT: v128.store 32 1093; CHECK-NEXT: # fallthrough-return 1094 %s = inttoptr i32 32 to <8 x i16>* 1095 store <8 x i16> %v , <8 x i16>* %s 1096 ret void 1097} 1098 1099define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) { 1100; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address: 1101; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> () 1102; CHECK-NEXT: # %bb.0: 1103; CHECK-NEXT: i32.const 32 1104; CHECK-NEXT: local.get 0 1105; CHECK-NEXT: v128.store64_lane 0, 0 1106; CHECK-NEXT: # fallthrough-return 1107 %s = inttoptr i32 32 to <8 x i8>* 1108 store <8 x i8> %v , <8 x i8>* %s 1109 ret void 1110} 1111 1112define void @store_v8i16_to_global_address(<8 x i16> %v) { 1113; CHECK-LABEL: store_v8i16_to_global_address: 1114; CHECK: .functype store_v8i16_to_global_address (v128) -> () 1115; CHECK-NEXT: # %bb.0: 1116; CHECK-NEXT: i32.const 0 1117; CHECK-NEXT: local.get 0 1118; CHECK-NEXT: v128.store gv_v8i16 1119; CHECK-NEXT: # fallthrough-return 1120 store <8 x i16> %v , <8 x i16>* @gv_v8i16 1121 ret void 1122} 1123 1124define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) { 1125; CHECK-LABEL: store_narrowing_v8i16_to_global_address: 1126; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> () 1127; CHECK-NEXT: # %bb.0: 1128; CHECK-NEXT: i32.const gv_v8i8 1129; CHECK-NEXT: local.get 0 1130; CHECK-NEXT: v128.store64_lane 0, 0 1131; CHECK-NEXT: # fallthrough-return 1132 store <8 x i8> %v , <8 x i8>* @gv_v8i8 1133 ret void 1134} 1135 1136; ============================================================================== 1137; 4 x i32 1138; ============================================================================== 1139define <4 x i32> @load_v4i32(<4 x i32>* %p) { 1140; CHECK-LABEL: load_v4i32: 1141; CHECK: .functype load_v4i32 (i32) -> (v128) 1142; CHECK-NEXT: # %bb.0: 1143; CHECK-NEXT: local.get 0 1144; CHECK-NEXT: v128.load 0 1145; CHECK-NEXT: # fallthrough-return 1146 %v = load <4 x i32>, <4 x i32>* %p 1147 ret <4 x i32> %v 1148} 1149 1150define <4 x i32> @load_splat_v4i32(i32* %addr) { 1151; CHECK-LABEL: load_splat_v4i32: 1152; CHECK: .functype load_splat_v4i32 (i32) -> (v128) 1153; CHECK-NEXT: # %bb.0: 1154; CHECK-NEXT: local.get 0 1155; CHECK-NEXT: v128.load32_splat 0 1156; CHECK-NEXT: # fallthrough-return 1157 %e = load i32, i32* %addr, align 4 1158 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1159 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1160 ret <4 x i32> %v2 1161} 1162 1163define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) { 1164; CHECK-LABEL: load_sext_v4i32: 1165; CHECK: .functype load_sext_v4i32 (i32) -> (v128) 1166; CHECK-NEXT: # %bb.0: 1167; CHECK-NEXT: local.get 0 1168; CHECK-NEXT: i32x4.load16x4_s 0 1169; CHECK-NEXT: # fallthrough-return 1170 %v = load <4 x i16>, <4 x i16>* %p 1171 %v2 = sext <4 x i16> %v to <4 x i32> 1172 ret <4 x i32> %v2 1173} 1174 1175define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) { 1176; CHECK-LABEL: load_zext_v4i32: 1177; CHECK: .functype load_zext_v4i32 (i32) -> (v128) 1178; CHECK-NEXT: # %bb.0: 1179; CHECK-NEXT: local.get 0 1180; CHECK-NEXT: i32x4.load16x4_u 0 1181; CHECK-NEXT: # fallthrough-return 1182 %v = load <4 x i16>, <4 x i16>* %p 1183 %v2 = zext <4 x i16> %v to <4 x i32> 1184 ret <4 x i32> %v2 1185} 1186 1187define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) { 1188; CHECK-LABEL: load_ext_v4i32: 1189; CHECK: .functype load_ext_v4i32 (i32) -> (v128) 1190; CHECK-NEXT: # %bb.0: 1191; CHECK-NEXT: local.get 0 1192; CHECK-NEXT: v128.load64_zero 0 1193; CHECK-NEXT: # fallthrough-return 1194 %v = load <4 x i16>, <4 x i16>* %p 1195 ret <4 x i16> %v 1196} 1197 1198define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { 1199; CHECK-LABEL: load_v4i32_with_folded_offset: 1200; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128) 1201; CHECK-NEXT: # %bb.0: 1202; CHECK-NEXT: local.get 0 1203; CHECK-NEXT: v128.load 16 1204; CHECK-NEXT: # fallthrough-return 1205 %q = ptrtoint <4 x i32>* %p to i32 1206 %r = add nuw i32 %q, 16 1207 %s = inttoptr i32 %r to <4 x i32>* 1208 %v = load <4 x i32>, <4 x i32>* %s 1209 ret <4 x i32> %v 1210} 1211 1212define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { 1213; CHECK-LABEL: load_splat_v4i32_with_folded_offset: 1214; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) 1215; CHECK-NEXT: # %bb.0: 1216; CHECK-NEXT: local.get 0 1217; CHECK-NEXT: v128.load32_splat 16 1218; CHECK-NEXT: # fallthrough-return 1219 %q = ptrtoint i32* %p to i32 1220 %r = add nuw i32 %q, 16 1221 %s = inttoptr i32 %r to i32* 1222 %e = load i32, i32* %s 1223 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1224 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1225 ret <4 x i32> %v2 1226} 1227 1228define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) { 1229; CHECK-LABEL: load_sext_v4i32_with_folded_offset: 1230; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128) 1231; CHECK-NEXT: # %bb.0: 1232; CHECK-NEXT: local.get 0 1233; CHECK-NEXT: i32x4.load16x4_s 16 1234; CHECK-NEXT: # fallthrough-return 1235 %q = ptrtoint <4 x i16>* %p to i32 1236 %r = add nuw i32 %q, 16 1237 %s = inttoptr i32 %r to <4 x i16>* 1238 %v = load <4 x i16>, <4 x i16>* %s 1239 %v2 = sext <4 x i16> %v to <4 x i32> 1240 ret <4 x i32> %v2 1241} 1242 1243define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) { 1244; CHECK-LABEL: load_zext_v4i32_with_folded_offset: 1245; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128) 1246; CHECK-NEXT: # %bb.0: 1247; CHECK-NEXT: local.get 0 1248; CHECK-NEXT: i32x4.load16x4_u 16 1249; CHECK-NEXT: # fallthrough-return 1250 %q = ptrtoint <4 x i16>* %p to i32 1251 %r = add nuw i32 %q, 16 1252 %s = inttoptr i32 %r to <4 x i16>* 1253 %v = load <4 x i16>, <4 x i16>* %s 1254 %v2 = zext <4 x i16> %v to <4 x i32> 1255 ret <4 x i32> %v2 1256} 1257 1258define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) { 1259; CHECK-LABEL: load_ext_v4i32_with_folded_offset: 1260; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128) 1261; CHECK-NEXT: # %bb.0: 1262; CHECK-NEXT: local.get 0 1263; CHECK-NEXT: v128.load64_zero 16 1264; CHECK-NEXT: # fallthrough-return 1265 %q = ptrtoint <4 x i16>* %p to i32 1266 %r = add nuw i32 %q, 16 1267 %s = inttoptr i32 %r to <4 x i16>* 1268 %v = load <4 x i16>, <4 x i16>* %s 1269 ret <4 x i16> %v 1270} 1271 1272define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) { 1273; CHECK-LABEL: load_v4i32_with_folded_gep_offset: 1274; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128) 1275; CHECK-NEXT: # %bb.0: 1276; CHECK-NEXT: local.get 0 1277; CHECK-NEXT: v128.load 16 1278; CHECK-NEXT: # fallthrough-return 1279 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1280 %v = load <4 x i32>, <4 x i32>* %s 1281 ret <4 x i32> %v 1282} 1283 1284define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { 1285; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: 1286; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) 1287; CHECK-NEXT: # %bb.0: 1288; CHECK-NEXT: local.get 0 1289; CHECK-NEXT: v128.load32_splat 4 1290; CHECK-NEXT: # fallthrough-return 1291 %s = getelementptr inbounds i32, i32* %p, i32 1 1292 %e = load i32, i32* %s 1293 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1294 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1295 ret <4 x i32> %v2 1296} 1297 1298define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1299; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset: 1300; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128) 1301; CHECK-NEXT: # %bb.0: 1302; CHECK-NEXT: local.get 0 1303; CHECK-NEXT: i32x4.load16x4_s 8 1304; CHECK-NEXT: # fallthrough-return 1305 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1306 %v = load <4 x i16>, <4 x i16>* %s 1307 %v2 = sext <4 x i16> %v to <4 x i32> 1308 ret <4 x i32> %v2 1309} 1310 1311define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1312; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset: 1313; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128) 1314; CHECK-NEXT: # %bb.0: 1315; CHECK-NEXT: local.get 0 1316; CHECK-NEXT: i32x4.load16x4_u 8 1317; CHECK-NEXT: # fallthrough-return 1318 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1319 %v = load <4 x i16>, <4 x i16>* %s 1320 %v2 = zext <4 x i16> %v to <4 x i32> 1321 ret <4 x i32> %v2 1322} 1323 1324define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1325; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: 1326; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128) 1327; CHECK-NEXT: # %bb.0: 1328; CHECK-NEXT: local.get 0 1329; CHECK-NEXT: v128.load64_zero 8 1330; CHECK-NEXT: # fallthrough-return 1331 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1332 %v = load <4 x i16>, <4 x i16>* %s 1333 ret <4 x i16> %v 1334} 1335 1336define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) { 1337; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: 1338; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1339; CHECK-NEXT: # %bb.0: 1340; CHECK-NEXT: local.get 0 1341; CHECK-NEXT: i32.const -16 1342; CHECK-NEXT: i32.add 1343; CHECK-NEXT: v128.load 0 1344; CHECK-NEXT: # fallthrough-return 1345 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1346 %v = load <4 x i32>, <4 x i32>* %s 1347 ret <4 x i32> %v 1348} 1349 1350define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { 1351; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: 1352; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1353; CHECK-NEXT: # %bb.0: 1354; CHECK-NEXT: local.get 0 1355; CHECK-NEXT: i32.const -4 1356; CHECK-NEXT: i32.add 1357; CHECK-NEXT: v128.load32_splat 0 1358; CHECK-NEXT: # fallthrough-return 1359 %s = getelementptr inbounds i32, i32* %p, i32 -1 1360 %e = load i32, i32* %s 1361 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1362 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1363 ret <4 x i32> %v2 1364} 1365 1366define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1367; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset: 1368; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1369; CHECK-NEXT: # %bb.0: 1370; CHECK-NEXT: local.get 0 1371; CHECK-NEXT: i32.const -8 1372; CHECK-NEXT: i32.add 1373; CHECK-NEXT: i32x4.load16x4_s 0 1374; CHECK-NEXT: # fallthrough-return 1375 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1376 %v = load <4 x i16>, <4 x i16>* %s 1377 %v2 = sext <4 x i16> %v to <4 x i32> 1378 ret <4 x i32> %v2 1379} 1380 1381define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1382; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset: 1383; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1384; CHECK-NEXT: # %bb.0: 1385; CHECK-NEXT: local.get 0 1386; CHECK-NEXT: i32.const -8 1387; CHECK-NEXT: i32.add 1388; CHECK-NEXT: i32x4.load16x4_u 0 1389; CHECK-NEXT: # fallthrough-return 1390 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1391 %v = load <4 x i16>, <4 x i16>* %s 1392 %v2 = zext <4 x i16> %v to <4 x i32> 1393 ret <4 x i32> %v2 1394} 1395 1396define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1397; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: 1398; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1399; CHECK-NEXT: # %bb.0: 1400; CHECK-NEXT: local.get 0 1401; CHECK-NEXT: i32.const -8 1402; CHECK-NEXT: i32.add 1403; CHECK-NEXT: v128.load64_zero 0 1404; CHECK-NEXT: # fallthrough-return 1405 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1406 %v = load <4 x i16>, <4 x i16>* %s 1407 ret <4 x i16> %v 1408} 1409 1410define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { 1411; CHECK-LABEL: load_v4i32_with_unfolded_offset: 1412; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128) 1413; CHECK-NEXT: # %bb.0: 1414; CHECK-NEXT: local.get 0 1415; CHECK-NEXT: i32.const 16 1416; CHECK-NEXT: i32.add 1417; CHECK-NEXT: v128.load 0 1418; CHECK-NEXT: # fallthrough-return 1419 %q = ptrtoint <4 x i32>* %p to i32 1420 %r = add nsw i32 %q, 16 1421 %s = inttoptr i32 %r to <4 x i32>* 1422 %v = load <4 x i32>, <4 x i32>* %s 1423 ret <4 x i32> %v 1424} 1425 1426define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { 1427; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: 1428; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128) 1429; CHECK-NEXT: # %bb.0: 1430; CHECK-NEXT: local.get 0 1431; CHECK-NEXT: i32.const 16 1432; CHECK-NEXT: i32.add 1433; CHECK-NEXT: v128.load32_splat 0 1434; CHECK-NEXT: # fallthrough-return 1435 %q = ptrtoint i32* %p to i32 1436 %r = add nsw i32 %q, 16 1437 %s = inttoptr i32 %r to i32* 1438 %e = load i32, i32* %s 1439 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1440 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1441 ret <4 x i32> %v2 1442} 1443 1444define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1445; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset: 1446; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128) 1447; CHECK-NEXT: # %bb.0: 1448; CHECK-NEXT: local.get 0 1449; CHECK-NEXT: i32.const 16 1450; CHECK-NEXT: i32.add 1451; CHECK-NEXT: i32x4.load16x4_s 0 1452; CHECK-NEXT: # fallthrough-return 1453 %q = ptrtoint <4 x i16>* %p to i32 1454 %r = add nsw i32 %q, 16 1455 %s = inttoptr i32 %r to <4 x i16>* 1456 %v = load <4 x i16>, <4 x i16>* %s 1457 %v2 = sext <4 x i16> %v to <4 x i32> 1458 ret <4 x i32> %v2 1459} 1460 1461define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1462; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset: 1463; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128) 1464; CHECK-NEXT: # %bb.0: 1465; CHECK-NEXT: local.get 0 1466; CHECK-NEXT: i32.const 16 1467; CHECK-NEXT: i32.add 1468; CHECK-NEXT: i32x4.load16x4_u 0 1469; CHECK-NEXT: # fallthrough-return 1470 %q = ptrtoint <4 x i16>* %p to i32 1471 %r = add nsw i32 %q, 16 1472 %s = inttoptr i32 %r to <4 x i16>* 1473 %v = load <4 x i16>, <4 x i16>* %s 1474 %v2 = zext <4 x i16> %v to <4 x i32> 1475 ret <4 x i32> %v2 1476} 1477 1478define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1479; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: 1480; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128) 1481; CHECK-NEXT: # %bb.0: 1482; CHECK-NEXT: local.get 0 1483; CHECK-NEXT: i32.const 16 1484; CHECK-NEXT: i32.add 1485; CHECK-NEXT: v128.load64_zero 0 1486; CHECK-NEXT: # fallthrough-return 1487 %q = ptrtoint <4 x i16>* %p to i32 1488 %r = add nsw i32 %q, 16 1489 %s = inttoptr i32 %r to <4 x i16>* 1490 %v = load <4 x i16>, <4 x i16>* %s 1491 ret <4 x i16> %v 1492} 1493 1494define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) { 1495; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: 1496; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1497; CHECK-NEXT: # %bb.0: 1498; CHECK-NEXT: local.get 0 1499; CHECK-NEXT: i32.const 16 1500; CHECK-NEXT: i32.add 1501; CHECK-NEXT: v128.load 0 1502; CHECK-NEXT: # fallthrough-return 1503 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1504 %v = load <4 x i32>, <4 x i32>* %s 1505 ret <4 x i32> %v 1506} 1507 1508define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { 1509; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: 1510; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1511; CHECK-NEXT: # %bb.0: 1512; CHECK-NEXT: local.get 0 1513; CHECK-NEXT: i32.const 4 1514; CHECK-NEXT: i32.add 1515; CHECK-NEXT: v128.load32_splat 0 1516; CHECK-NEXT: # fallthrough-return 1517 %s = getelementptr i32, i32* %p, i32 1 1518 %e = load i32, i32* %s 1519 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1520 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1521 ret <4 x i32> %v2 1522} 1523 1524define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1525; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset: 1526; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1527; CHECK-NEXT: # %bb.0: 1528; CHECK-NEXT: local.get 0 1529; CHECK-NEXT: i32.const 8 1530; CHECK-NEXT: i32.add 1531; CHECK-NEXT: i32x4.load16x4_s 0 1532; CHECK-NEXT: # fallthrough-return 1533 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1534 %v = load <4 x i16>, <4 x i16>* %s 1535 %v2 = sext <4 x i16> %v to <4 x i32> 1536 ret <4 x i32> %v2 1537} 1538 1539define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1540; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset: 1541; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1542; CHECK-NEXT: # %bb.0: 1543; CHECK-NEXT: local.get 0 1544; CHECK-NEXT: i32.const 8 1545; CHECK-NEXT: i32.add 1546; CHECK-NEXT: i32x4.load16x4_u 0 1547; CHECK-NEXT: # fallthrough-return 1548 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1549 %v = load <4 x i16>, <4 x i16>* %s 1550 %v2 = zext <4 x i16> %v to <4 x i32> 1551 ret <4 x i32> %v2 1552} 1553 1554define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1555; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: 1556; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1557; CHECK-NEXT: # %bb.0: 1558; CHECK-NEXT: local.get 0 1559; CHECK-NEXT: i32.const 8 1560; CHECK-NEXT: i32.add 1561; CHECK-NEXT: v128.load64_zero 0 1562; CHECK-NEXT: # fallthrough-return 1563 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1564 %v = load <4 x i16>, <4 x i16>* %s 1565 ret <4 x i16> %v 1566} 1567 1568define <4 x i32> @load_v4i32_from_numeric_address() { 1569; CHECK-LABEL: load_v4i32_from_numeric_address: 1570; CHECK: .functype load_v4i32_from_numeric_address () -> (v128) 1571; CHECK-NEXT: # %bb.0: 1572; CHECK-NEXT: i32.const 0 1573; CHECK-NEXT: v128.load 32 1574; CHECK-NEXT: # fallthrough-return 1575 %s = inttoptr i32 32 to <4 x i32>* 1576 %v = load <4 x i32>, <4 x i32>* %s 1577 ret <4 x i32> %v 1578} 1579 1580define <4 x i32> @load_splat_v4i32_from_numeric_address() { 1581; CHECK-LABEL: load_splat_v4i32_from_numeric_address: 1582; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) 1583; CHECK-NEXT: # %bb.0: 1584; CHECK-NEXT: i32.const 0 1585; CHECK-NEXT: v128.load32_splat 32 1586; CHECK-NEXT: # fallthrough-return 1587 %s = inttoptr i32 32 to i32* 1588 %e = load i32, i32* %s 1589 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1590 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1591 ret <4 x i32> %v2 1592} 1593 1594define <4 x i32> @load_sext_v4i32_from_numeric_address() { 1595; CHECK-LABEL: load_sext_v4i32_from_numeric_address: 1596; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128) 1597; CHECK-NEXT: # %bb.0: 1598; CHECK-NEXT: i32.const 0 1599; CHECK-NEXT: i32x4.load16x4_s 32 1600; CHECK-NEXT: # fallthrough-return 1601 %s = inttoptr i32 32 to <4 x i16>* 1602 %v = load <4 x i16>, <4 x i16>* %s 1603 %v2 = sext <4 x i16> %v to <4 x i32> 1604 ret <4 x i32> %v2 1605} 1606 1607define <4 x i32> @load_zext_v4i32_from_numeric_address() { 1608; CHECK-LABEL: load_zext_v4i32_from_numeric_address: 1609; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128) 1610; CHECK-NEXT: # %bb.0: 1611; CHECK-NEXT: i32.const 0 1612; CHECK-NEXT: i32x4.load16x4_u 32 1613; CHECK-NEXT: # fallthrough-return 1614 %s = inttoptr i32 32 to <4 x i16>* 1615 %v = load <4 x i16>, <4 x i16>* %s 1616 %v2 = zext <4 x i16> %v to <4 x i32> 1617 ret <4 x i32> %v2 1618} 1619 1620define <4 x i16> @load_ext_v4i32_from_numeric_address() { 1621; CHECK-LABEL: load_ext_v4i32_from_numeric_address: 1622; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128) 1623; CHECK-NEXT: # %bb.0: 1624; CHECK-NEXT: i32.const 0 1625; CHECK-NEXT: v128.load64_zero 32 1626; CHECK-NEXT: # fallthrough-return 1627 %s = inttoptr i32 32 to <4 x i16>* 1628 %v = load <4 x i16>, <4 x i16>* %s 1629 ret <4 x i16> %v 1630} 1631 1632@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42> 1633define <4 x i32> @load_v4i32_from_global_address() { 1634; CHECK-LABEL: load_v4i32_from_global_address: 1635; CHECK: .functype load_v4i32_from_global_address () -> (v128) 1636; CHECK-NEXT: # %bb.0: 1637; CHECK-NEXT: i32.const 0 1638; CHECK-NEXT: v128.load gv_v4i32 1639; CHECK-NEXT: # fallthrough-return 1640 %v = load <4 x i32>, <4 x i32>* @gv_v4i32 1641 ret <4 x i32> %v 1642} 1643 1644@gv_i32 = global i32 42 1645define <4 x i32> @load_splat_v4i32_from_global_address() { 1646; CHECK-LABEL: load_splat_v4i32_from_global_address: 1647; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) 1648; CHECK-NEXT: # %bb.0: 1649; CHECK-NEXT: i32.const 0 1650; CHECK-NEXT: v128.load32_splat gv_i32 1651; CHECK-NEXT: # fallthrough-return 1652 %e = load i32, i32* @gv_i32 1653 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1654 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1655 ret <4 x i32> %v2 1656} 1657 1658@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42> 1659define <4 x i32> @load_sext_v4i32_from_global_address() { 1660; CHECK-LABEL: load_sext_v4i32_from_global_address: 1661; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128) 1662; CHECK-NEXT: # %bb.0: 1663; CHECK-NEXT: i32.const 0 1664; CHECK-NEXT: i32x4.load16x4_s gv_v4i16 1665; CHECK-NEXT: # fallthrough-return 1666 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1667 %v2 = sext <4 x i16> %v to <4 x i32> 1668 ret <4 x i32> %v2 1669} 1670 1671define <4 x i32> @load_zext_v4i32_from_global_address() { 1672; CHECK-LABEL: load_zext_v4i32_from_global_address: 1673; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128) 1674; CHECK-NEXT: # %bb.0: 1675; CHECK-NEXT: i32.const 0 1676; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1677; CHECK-NEXT: # fallthrough-return 1678 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1679 %v2 = zext <4 x i16> %v to <4 x i32> 1680 ret <4 x i32> %v2 1681} 1682 1683define <4 x i16> @load_ext_v4i32_from_global_address() { 1684; CHECK-LABEL: load_ext_v4i32_from_global_address: 1685; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128) 1686; CHECK-NEXT: # %bb.0: 1687; CHECK-NEXT: i32.const 0 1688; CHECK-NEXT: v128.load64_zero gv_v4i16 1689; CHECK-NEXT: # fallthrough-return 1690 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1691 ret <4 x i16> %v 1692} 1693 1694define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) { 1695; CHECK-LABEL: store_v4i32: 1696; CHECK: .functype store_v4i32 (v128, i32) -> () 1697; CHECK-NEXT: # %bb.0: 1698; CHECK-NEXT: local.get 1 1699; CHECK-NEXT: local.get 0 1700; CHECK-NEXT: v128.store 0 1701; CHECK-NEXT: # fallthrough-return 1702 store <4 x i32> %v , <4 x i32>* %p 1703 ret void 1704} 1705 1706define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) { 1707; CHECK-LABEL: store_narrowing_v4i32: 1708; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> () 1709; CHECK-NEXT: # %bb.0: 1710; CHECK-NEXT: local.get 1 1711; CHECK-NEXT: local.get 0 1712; CHECK-NEXT: v128.store64_lane 0, 0 1713; CHECK-NEXT: # fallthrough-return 1714 store <4 x i16> %v , <4 x i16>* %p 1715 ret void 1716} 1717 1718define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { 1719; CHECK-LABEL: store_v4i32_with_folded_offset: 1720; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () 1721; CHECK-NEXT: # %bb.0: 1722; CHECK-NEXT: local.get 1 1723; CHECK-NEXT: local.get 0 1724; CHECK-NEXT: v128.store 16 1725; CHECK-NEXT: # fallthrough-return 1726 %q = ptrtoint <4 x i32>* %p to i32 1727 %r = add nuw i32 %q, 16 1728 %s = inttoptr i32 %r to <4 x i32>* 1729 store <4 x i32> %v , <4 x i32>* %s 1730 ret void 1731} 1732 1733define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) { 1734; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset: 1735; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> () 1736; CHECK-NEXT: # %bb.0: 1737; CHECK-NEXT: local.get 1 1738; CHECK-NEXT: i32.const 16 1739; CHECK-NEXT: i32.add 1740; CHECK-NEXT: local.get 0 1741; CHECK-NEXT: v128.store64_lane 0, 0 1742; CHECK-NEXT: # fallthrough-return 1743 %q = ptrtoint <4 x i16>* %p to i32 1744 %r = add nuw i32 %q, 16 1745 %s = inttoptr i32 %r to <4 x i16>* 1746 store <4 x i16> %v , <4 x i16>* %s 1747 ret void 1748} 1749 1750define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1751; CHECK-LABEL: store_v4i32_with_folded_gep_offset: 1752; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () 1753; CHECK-NEXT: # %bb.0: 1754; CHECK-NEXT: local.get 1 1755; CHECK-NEXT: local.get 0 1756; CHECK-NEXT: v128.store 16 1757; CHECK-NEXT: # fallthrough-return 1758 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1759 store <4 x i32> %v , <4 x i32>* %s 1760 ret void 1761} 1762 1763define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { 1764; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset: 1765; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> () 1766; CHECK-NEXT: # %bb.0: 1767; CHECK-NEXT: local.get 1 1768; CHECK-NEXT: i32.const 8 1769; CHECK-NEXT: i32.add 1770; CHECK-NEXT: local.get 0 1771; CHECK-NEXT: v128.store64_lane 0, 0 1772; CHECK-NEXT: # fallthrough-return 1773 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1774 store <4 x i16> %v , <4 x i16>* %s 1775 ret void 1776} 1777 1778define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { 1779; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: 1780; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1781; CHECK-NEXT: # %bb.0: 1782; CHECK-NEXT: local.get 1 1783; CHECK-NEXT: i32.const -16 1784; CHECK-NEXT: i32.add 1785; CHECK-NEXT: local.get 0 1786; CHECK-NEXT: v128.store 0 1787; CHECK-NEXT: # fallthrough-return 1788 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1789 store <4 x i32> %v , <4 x i32>* %s 1790 ret void 1791} 1792 1793define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) { 1794; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset: 1795; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1796; CHECK-NEXT: # %bb.0: 1797; CHECK-NEXT: local.get 1 1798; CHECK-NEXT: i32.const -8 1799; CHECK-NEXT: i32.add 1800; CHECK-NEXT: local.get 0 1801; CHECK-NEXT: v128.store64_lane 0, 0 1802; CHECK-NEXT: # fallthrough-return 1803 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1804 store <4 x i16> %v , <4 x i16>* %s 1805 ret void 1806} 1807 1808define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { 1809; CHECK-LABEL: store_v4i32_with_unfolded_offset: 1810; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () 1811; CHECK-NEXT: # %bb.0: 1812; CHECK-NEXT: local.get 1 1813; CHECK-NEXT: i32.const 16 1814; CHECK-NEXT: i32.add 1815; CHECK-NEXT: local.get 0 1816; CHECK-NEXT: v128.store 0 1817; CHECK-NEXT: # fallthrough-return 1818 %q = ptrtoint <4 x i32>* %p to i32 1819 %r = add nsw i32 %q, 16 1820 %s = inttoptr i32 %r to <4 x i32>* 1821 store <4 x i32> %v , <4 x i32>* %s 1822 ret void 1823} 1824 1825define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) { 1826; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset: 1827; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> () 1828; CHECK-NEXT: # %bb.0: 1829; CHECK-NEXT: local.get 1 1830; CHECK-NEXT: i32.const 16 1831; CHECK-NEXT: i32.add 1832; CHECK-NEXT: local.get 0 1833; CHECK-NEXT: v128.store64_lane 0, 0 1834; CHECK-NEXT: # fallthrough-return 1835 %q = ptrtoint <4 x i16>* %p to i32 1836 %r = add nsw i32 %q, 16 1837 %s = inttoptr i32 %r to <4 x i16>* 1838 store <4 x i16> %v , <4 x i16>* %s 1839 ret void 1840} 1841 1842define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1843; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: 1844; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1845; CHECK-NEXT: # %bb.0: 1846; CHECK-NEXT: local.get 1 1847; CHECK-NEXT: i32.const 16 1848; CHECK-NEXT: i32.add 1849; CHECK-NEXT: local.get 0 1850; CHECK-NEXT: v128.store 0 1851; CHECK-NEXT: # fallthrough-return 1852 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1853 store <4 x i32> %v , <4 x i32>* %s 1854 ret void 1855} 1856 1857define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { 1858; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset: 1859; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1860; CHECK-NEXT: # %bb.0: 1861; CHECK-NEXT: local.get 1 1862; CHECK-NEXT: i32.const 8 1863; CHECK-NEXT: i32.add 1864; CHECK-NEXT: local.get 0 1865; CHECK-NEXT: v128.store64_lane 0, 0 1866; CHECK-NEXT: # fallthrough-return 1867 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1868 store <4 x i16> %v , <4 x i16>* %s 1869 ret void 1870} 1871 1872define void @store_v4i32_to_numeric_address(<4 x i32> %v) { 1873; CHECK-LABEL: store_v4i32_to_numeric_address: 1874; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () 1875; CHECK-NEXT: # %bb.0: 1876; CHECK-NEXT: i32.const 0 1877; CHECK-NEXT: local.get 0 1878; CHECK-NEXT: v128.store 32 1879; CHECK-NEXT: # fallthrough-return 1880 %s = inttoptr i32 32 to <4 x i32>* 1881 store <4 x i32> %v , <4 x i32>* %s 1882 ret void 1883} 1884 1885define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) { 1886; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address: 1887; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> () 1888; CHECK-NEXT: # %bb.0: 1889; CHECK-NEXT: i32.const 32 1890; CHECK-NEXT: local.get 0 1891; CHECK-NEXT: v128.store64_lane 0, 0 1892; CHECK-NEXT: # fallthrough-return 1893 %s = inttoptr i32 32 to <4 x i16>* 1894 store <4 x i16> %v , <4 x i16>* %s 1895 ret void 1896} 1897 1898define void @store_v4i32_to_global_address(<4 x i32> %v) { 1899; CHECK-LABEL: store_v4i32_to_global_address: 1900; CHECK: .functype store_v4i32_to_global_address (v128) -> () 1901; CHECK-NEXT: # %bb.0: 1902; CHECK-NEXT: i32.const 0 1903; CHECK-NEXT: local.get 0 1904; CHECK-NEXT: v128.store gv_v4i32 1905; CHECK-NEXT: # fallthrough-return 1906 store <4 x i32> %v , <4 x i32>* @gv_v4i32 1907 ret void 1908} 1909 1910define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) { 1911; CHECK-LABEL: store_narrowing_v4i32_to_global_address: 1912; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> () 1913; CHECK-NEXT: # %bb.0: 1914; CHECK-NEXT: i32.const gv_v4i16 1915; CHECK-NEXT: local.get 0 1916; CHECK-NEXT: v128.store64_lane 0, 0 1917; CHECK-NEXT: # fallthrough-return 1918 store <4 x i16> %v , <4 x i16>* @gv_v4i16 1919 ret void 1920} 1921 1922; ============================================================================== 1923; 2 x i64 1924; ============================================================================== 1925define <2 x i64> @load_v2i64(<2 x i64>* %p) { 1926; CHECK-LABEL: load_v2i64: 1927; CHECK: .functype load_v2i64 (i32) -> (v128) 1928; CHECK-NEXT: # %bb.0: 1929; CHECK-NEXT: local.get 0 1930; CHECK-NEXT: v128.load 0 1931; CHECK-NEXT: # fallthrough-return 1932 %v = load <2 x i64>, <2 x i64>* %p 1933 ret <2 x i64> %v 1934} 1935 1936define <2 x i64> @load_splat_v2i64(i64* %p) { 1937; CHECK-LABEL: load_splat_v2i64: 1938; CHECK: .functype load_splat_v2i64 (i32) -> (v128) 1939; CHECK-NEXT: # %bb.0: 1940; CHECK-NEXT: local.get 0 1941; CHECK-NEXT: v128.load64_splat 0 1942; CHECK-NEXT: # fallthrough-return 1943 %e = load i64, i64* %p 1944 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1945 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1946 ret <2 x i64> %v2 1947} 1948 1949define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) { 1950; CHECK-LABEL: load_sext_v2i64: 1951; CHECK: .functype load_sext_v2i64 (i32) -> (v128) 1952; CHECK-NEXT: # %bb.0: 1953; CHECK-NEXT: local.get 0 1954; CHECK-NEXT: i64x2.load32x2_s 0 1955; CHECK-NEXT: # fallthrough-return 1956 %v = load <2 x i32>, <2 x i32>* %p 1957 %v2 = sext <2 x i32> %v to <2 x i64> 1958 ret <2 x i64> %v2 1959} 1960 1961define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) { 1962; CHECK-LABEL: load_zext_v2i64: 1963; CHECK: .functype load_zext_v2i64 (i32) -> (v128) 1964; CHECK-NEXT: # %bb.0: 1965; CHECK-NEXT: local.get 0 1966; CHECK-NEXT: i64x2.load32x2_u 0 1967; CHECK-NEXT: # fallthrough-return 1968 %v = load <2 x i32>, <2 x i32>* %p 1969 %v2 = zext <2 x i32> %v to <2 x i64> 1970 ret <2 x i64> %v2 1971} 1972 1973define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) { 1974; CHECK-LABEL: load_ext_v2i64: 1975; CHECK: .functype load_ext_v2i64 (i32) -> (v128) 1976; CHECK-NEXT: # %bb.0: 1977; CHECK-NEXT: local.get 0 1978; CHECK-NEXT: v128.load64_zero 0 1979; CHECK-NEXT: # fallthrough-return 1980 %v = load <2 x i32>, <2 x i32>* %p 1981 ret <2 x i32> %v 1982} 1983 1984define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { 1985; CHECK-LABEL: load_v2i64_with_folded_offset: 1986; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128) 1987; CHECK-NEXT: # %bb.0: 1988; CHECK-NEXT: local.get 0 1989; CHECK-NEXT: v128.load 16 1990; CHECK-NEXT: # fallthrough-return 1991 %q = ptrtoint <2 x i64>* %p to i32 1992 %r = add nuw i32 %q, 16 1993 %s = inttoptr i32 %r to <2 x i64>* 1994 %v = load <2 x i64>, <2 x i64>* %s 1995 ret <2 x i64> %v 1996} 1997 1998define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { 1999; CHECK-LABEL: load_splat_v2i64_with_folded_offset: 2000; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) 2001; CHECK-NEXT: # %bb.0: 2002; CHECK-NEXT: local.get 0 2003; CHECK-NEXT: v128.load64_splat 16 2004; CHECK-NEXT: # fallthrough-return 2005 %q = ptrtoint i64* %p to i32 2006 %r = add nuw i32 %q, 16 2007 %s = inttoptr i32 %r to i64* 2008 %e = load i64, i64* %s 2009 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2010 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2011 ret <2 x i64> %v2 2012} 2013 2014define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) { 2015; CHECK-LABEL: load_sext_v2i64_with_folded_offset: 2016; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128) 2017; CHECK-NEXT: # %bb.0: 2018; CHECK-NEXT: local.get 0 2019; CHECK-NEXT: i64x2.load32x2_s 16 2020; CHECK-NEXT: # fallthrough-return 2021 %q = ptrtoint <2 x i32>* %p to i32 2022 %r = add nuw i32 %q, 16 2023 %s = inttoptr i32 %r to <2 x i32>* 2024 %v = load <2 x i32>, <2 x i32>* %s 2025 %v2 = sext <2 x i32> %v to <2 x i64> 2026 ret <2 x i64> %v2 2027} 2028 2029define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) { 2030; CHECK-LABEL: load_zext_v2i64_with_folded_offset: 2031; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128) 2032; CHECK-NEXT: # %bb.0: 2033; CHECK-NEXT: local.get 0 2034; CHECK-NEXT: i64x2.load32x2_u 16 2035; CHECK-NEXT: # fallthrough-return 2036 %q = ptrtoint <2 x i32>* %p to i32 2037 %r = add nuw i32 %q, 16 2038 %s = inttoptr i32 %r to <2 x i32>* 2039 %v = load <2 x i32>, <2 x i32>* %s 2040 %v2 = zext <2 x i32> %v to <2 x i64> 2041 ret <2 x i64> %v2 2042} 2043 2044define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) { 2045; CHECK-LABEL: load_ext_v2i64_with_folded_offset: 2046; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128) 2047; CHECK-NEXT: # %bb.0: 2048; CHECK-NEXT: local.get 0 2049; CHECK-NEXT: v128.load64_zero 16 2050; CHECK-NEXT: # fallthrough-return 2051 %q = ptrtoint <2 x i32>* %p to i32 2052 %r = add nuw i32 %q, 16 2053 %s = inttoptr i32 %r to <2 x i32>* 2054 %v = load <2 x i32>, <2 x i32>* %s 2055 ret <2 x i32> %v 2056} 2057 2058define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) { 2059; CHECK-LABEL: load_v2i64_with_folded_gep_offset: 2060; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128) 2061; CHECK-NEXT: # %bb.0: 2062; CHECK-NEXT: local.get 0 2063; CHECK-NEXT: v128.load 16 2064; CHECK-NEXT: # fallthrough-return 2065 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2066 %v = load <2 x i64>, <2 x i64>* %s 2067 ret <2 x i64> %v 2068} 2069 2070define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { 2071; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: 2072; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) 2073; CHECK-NEXT: # %bb.0: 2074; CHECK-NEXT: local.get 0 2075; CHECK-NEXT: v128.load64_splat 8 2076; CHECK-NEXT: # fallthrough-return 2077 %s = getelementptr inbounds i64, i64* %p, i32 1 2078 %e = load i64, i64* %s 2079 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2080 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2081 ret <2 x i64> %v2 2082} 2083 2084define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2085; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: 2086; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128) 2087; CHECK-NEXT: # %bb.0: 2088; CHECK-NEXT: local.get 0 2089; CHECK-NEXT: i64x2.load32x2_s 8 2090; CHECK-NEXT: # fallthrough-return 2091 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2092 %v = load <2 x i32>, <2 x i32>* %s 2093 %v2 = sext <2 x i32> %v to <2 x i64> 2094 ret <2 x i64> %v2 2095} 2096 2097define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2098; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: 2099; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128) 2100; CHECK-NEXT: # %bb.0: 2101; CHECK-NEXT: local.get 0 2102; CHECK-NEXT: i64x2.load32x2_u 8 2103; CHECK-NEXT: # fallthrough-return 2104 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2105 %v = load <2 x i32>, <2 x i32>* %s 2106 %v2 = zext <2 x i32> %v to <2 x i64> 2107 ret <2 x i64> %v2 2108} 2109 2110define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2111; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: 2112; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128) 2113; CHECK-NEXT: # %bb.0: 2114; CHECK-NEXT: local.get 0 2115; CHECK-NEXT: v128.load64_zero 8 2116; CHECK-NEXT: # fallthrough-return 2117 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2118 %v = load <2 x i32>, <2 x i32>* %s 2119 ret <2 x i32> %v 2120} 2121 2122define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) { 2123; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: 2124; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2125; CHECK-NEXT: # %bb.0: 2126; CHECK-NEXT: local.get 0 2127; CHECK-NEXT: i32.const -16 2128; CHECK-NEXT: i32.add 2129; CHECK-NEXT: v128.load 0 2130; CHECK-NEXT: # fallthrough-return 2131 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2132 %v = load <2 x i64>, <2 x i64>* %s 2133 ret <2 x i64> %v 2134} 2135 2136define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { 2137; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: 2138; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2139; CHECK-NEXT: # %bb.0: 2140; CHECK-NEXT: local.get 0 2141; CHECK-NEXT: i32.const -8 2142; CHECK-NEXT: i32.add 2143; CHECK-NEXT: v128.load64_splat 0 2144; CHECK-NEXT: # fallthrough-return 2145 %s = getelementptr inbounds i64, i64* %p, i32 -1 2146 %e = load i64, i64* %s 2147 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2148 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2149 ret <2 x i64> %v2 2150} 2151 2152define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2153; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: 2154; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2155; CHECK-NEXT: # %bb.0: 2156; CHECK-NEXT: local.get 0 2157; CHECK-NEXT: i32.const -8 2158; CHECK-NEXT: i32.add 2159; CHECK-NEXT: i64x2.load32x2_s 0 2160; CHECK-NEXT: # fallthrough-return 2161 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2162 %v = load <2 x i32>, <2 x i32>* %s 2163 %v2 = sext <2 x i32> %v to <2 x i64> 2164 ret <2 x i64> %v2 2165} 2166 2167define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2168; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: 2169; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2170; CHECK-NEXT: # %bb.0: 2171; CHECK-NEXT: local.get 0 2172; CHECK-NEXT: i32.const -8 2173; CHECK-NEXT: i32.add 2174; CHECK-NEXT: i64x2.load32x2_u 0 2175; CHECK-NEXT: # fallthrough-return 2176 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2177 %v = load <2 x i32>, <2 x i32>* %s 2178 %v2 = zext <2 x i32> %v to <2 x i64> 2179 ret <2 x i64> %v2 2180} 2181 2182define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2183; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: 2184; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2185; CHECK-NEXT: # %bb.0: 2186; CHECK-NEXT: local.get 0 2187; CHECK-NEXT: i32.const -8 2188; CHECK-NEXT: i32.add 2189; CHECK-NEXT: v128.load64_zero 0 2190; CHECK-NEXT: # fallthrough-return 2191 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2192 %v = load <2 x i32>, <2 x i32>* %s 2193 ret <2 x i32> %v 2194} 2195 2196define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { 2197; CHECK-LABEL: load_v2i64_with_unfolded_offset: 2198; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128) 2199; CHECK-NEXT: # %bb.0: 2200; CHECK-NEXT: local.get 0 2201; CHECK-NEXT: i32.const 16 2202; CHECK-NEXT: i32.add 2203; CHECK-NEXT: v128.load 0 2204; CHECK-NEXT: # fallthrough-return 2205 %q = ptrtoint <2 x i64>* %p to i32 2206 %r = add nsw i32 %q, 16 2207 %s = inttoptr i32 %r to <2 x i64>* 2208 %v = load <2 x i64>, <2 x i64>* %s 2209 ret <2 x i64> %v 2210} 2211 2212define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { 2213; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: 2214; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128) 2215; CHECK-NEXT: # %bb.0: 2216; CHECK-NEXT: local.get 0 2217; CHECK-NEXT: i32.const 16 2218; CHECK-NEXT: i32.add 2219; CHECK-NEXT: v128.load64_splat 0 2220; CHECK-NEXT: # fallthrough-return 2221 %q = ptrtoint i64* %p to i32 2222 %r = add nsw i32 %q, 16 2223 %s = inttoptr i32 %r to i64* 2224 %e = load i64, i64* %s 2225 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2226 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2227 ret <2 x i64> %v2 2228} 2229 2230define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2231; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: 2232; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128) 2233; CHECK-NEXT: # %bb.0: 2234; CHECK-NEXT: local.get 0 2235; CHECK-NEXT: i32.const 16 2236; CHECK-NEXT: i32.add 2237; CHECK-NEXT: i64x2.load32x2_s 0 2238; CHECK-NEXT: # fallthrough-return 2239 %q = ptrtoint <2 x i32>* %p to i32 2240 %r = add nsw i32 %q, 16 2241 %s = inttoptr i32 %r to <2 x i32>* 2242 %v = load <2 x i32>, <2 x i32>* %s 2243 %v2 = sext <2 x i32> %v to <2 x i64> 2244 ret <2 x i64> %v2 2245} 2246 2247define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2248; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: 2249; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128) 2250; CHECK-NEXT: # %bb.0: 2251; CHECK-NEXT: local.get 0 2252; CHECK-NEXT: i32.const 16 2253; CHECK-NEXT: i32.add 2254; CHECK-NEXT: i64x2.load32x2_u 0 2255; CHECK-NEXT: # fallthrough-return 2256 %q = ptrtoint <2 x i32>* %p to i32 2257 %r = add nsw i32 %q, 16 2258 %s = inttoptr i32 %r to <2 x i32>* 2259 %v = load <2 x i32>, <2 x i32>* %s 2260 %v2 = zext <2 x i32> %v to <2 x i64> 2261 ret <2 x i64> %v2 2262} 2263 2264define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2265; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: 2266; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128) 2267; CHECK-NEXT: # %bb.0: 2268; CHECK-NEXT: local.get 0 2269; CHECK-NEXT: i32.const 16 2270; CHECK-NEXT: i32.add 2271; CHECK-NEXT: v128.load64_zero 0 2272; CHECK-NEXT: # fallthrough-return 2273 %q = ptrtoint <2 x i32>* %p to i32 2274 %r = add nsw i32 %q, 16 2275 %s = inttoptr i32 %r to <2 x i32>* 2276 %v = load <2 x i32>, <2 x i32>* %s 2277 ret <2 x i32> %v 2278} 2279 2280define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) { 2281; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: 2282; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2283; CHECK-NEXT: # %bb.0: 2284; CHECK-NEXT: local.get 0 2285; CHECK-NEXT: i32.const 16 2286; CHECK-NEXT: i32.add 2287; CHECK-NEXT: v128.load 0 2288; CHECK-NEXT: # fallthrough-return 2289 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2290 %v = load <2 x i64>, <2 x i64>* %s 2291 ret <2 x i64> %v 2292} 2293 2294define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { 2295; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: 2296; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2297; CHECK-NEXT: # %bb.0: 2298; CHECK-NEXT: local.get 0 2299; CHECK-NEXT: i32.const 8 2300; CHECK-NEXT: i32.add 2301; CHECK-NEXT: v128.load64_splat 0 2302; CHECK-NEXT: # fallthrough-return 2303 %s = getelementptr i64, i64* %p, i32 1 2304 %e = load i64, i64* %s 2305 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2306 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2307 ret <2 x i64> %v2 2308} 2309 2310define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2311; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: 2312; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2313; CHECK-NEXT: # %bb.0: 2314; CHECK-NEXT: local.get 0 2315; CHECK-NEXT: i32.const 8 2316; CHECK-NEXT: i32.add 2317; CHECK-NEXT: i64x2.load32x2_s 0 2318; CHECK-NEXT: # fallthrough-return 2319 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2320 %v = load <2 x i32>, <2 x i32>* %s 2321 %v2 = sext <2 x i32> %v to <2 x i64> 2322 ret <2 x i64> %v2 2323} 2324 2325define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2326; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: 2327; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2328; CHECK-NEXT: # %bb.0: 2329; CHECK-NEXT: local.get 0 2330; CHECK-NEXT: i32.const 8 2331; CHECK-NEXT: i32.add 2332; CHECK-NEXT: i64x2.load32x2_u 0 2333; CHECK-NEXT: # fallthrough-return 2334 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2335 %v = load <2 x i32>, <2 x i32>* %s 2336 %v2 = zext <2 x i32> %v to <2 x i64> 2337 ret <2 x i64> %v2 2338} 2339 2340define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2341; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: 2342; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2343; CHECK-NEXT: # %bb.0: 2344; CHECK-NEXT: local.get 0 2345; CHECK-NEXT: i32.const 8 2346; CHECK-NEXT: i32.add 2347; CHECK-NEXT: v128.load64_zero 0 2348; CHECK-NEXT: # fallthrough-return 2349 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2350 %v = load <2 x i32>, <2 x i32>* %s 2351 ret <2 x i32> %v 2352} 2353 2354define <2 x i64> @load_v2i64_from_numeric_address() { 2355; CHECK-LABEL: load_v2i64_from_numeric_address: 2356; CHECK: .functype load_v2i64_from_numeric_address () -> (v128) 2357; CHECK-NEXT: # %bb.0: 2358; CHECK-NEXT: i32.const 0 2359; CHECK-NEXT: v128.load 32 2360; CHECK-NEXT: # fallthrough-return 2361 %s = inttoptr i32 32 to <2 x i64>* 2362 %v = load <2 x i64>, <2 x i64>* %s 2363 ret <2 x i64> %v 2364} 2365 2366define <2 x i64> @load_splat_v2i64_from_numeric_address() { 2367; CHECK-LABEL: load_splat_v2i64_from_numeric_address: 2368; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) 2369; CHECK-NEXT: # %bb.0: 2370; CHECK-NEXT: i32.const 0 2371; CHECK-NEXT: v128.load64_splat 32 2372; CHECK-NEXT: # fallthrough-return 2373 %s = inttoptr i32 32 to i64* 2374 %e = load i64, i64* %s 2375 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2376 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2377 ret <2 x i64> %v2 2378} 2379 2380define <2 x i64> @load_sext_v2i64_from_numeric_address() { 2381; CHECK-LABEL: load_sext_v2i64_from_numeric_address: 2382; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128) 2383; CHECK-NEXT: # %bb.0: 2384; CHECK-NEXT: i32.const 0 2385; CHECK-NEXT: i64x2.load32x2_s 32 2386; CHECK-NEXT: # fallthrough-return 2387 %s = inttoptr i32 32 to <2 x i32>* 2388 %v = load <2 x i32>, <2 x i32>* %s 2389 %v2 = sext <2 x i32> %v to <2 x i64> 2390 ret <2 x i64> %v2 2391} 2392 2393define <2 x i64> @load_zext_v2i64_from_numeric_address() { 2394; CHECK-LABEL: load_zext_v2i64_from_numeric_address: 2395; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128) 2396; CHECK-NEXT: # %bb.0: 2397; CHECK-NEXT: i32.const 0 2398; CHECK-NEXT: i64x2.load32x2_u 32 2399; CHECK-NEXT: # fallthrough-return 2400 %s = inttoptr i32 32 to <2 x i32>* 2401 %v = load <2 x i32>, <2 x i32>* %s 2402 %v2 = zext <2 x i32> %v to <2 x i64> 2403 ret <2 x i64> %v2 2404} 2405 2406define <2 x i32> @load_ext_v2i64_from_numeric_address() { 2407; CHECK-LABEL: load_ext_v2i64_from_numeric_address: 2408; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128) 2409; CHECK-NEXT: # %bb.0: 2410; CHECK-NEXT: i32.const 0 2411; CHECK-NEXT: v128.load64_zero 32 2412; CHECK-NEXT: # fallthrough-return 2413 %s = inttoptr i32 32 to <2 x i32>* 2414 %v = load <2 x i32>, <2 x i32>* %s 2415 ret <2 x i32> %v 2416} 2417 2418@gv_v2i64 = global <2 x i64> <i64 42, i64 42> 2419define <2 x i64> @load_v2i64_from_global_address() { 2420; CHECK-LABEL: load_v2i64_from_global_address: 2421; CHECK: .functype load_v2i64_from_global_address () -> (v128) 2422; CHECK-NEXT: # %bb.0: 2423; CHECK-NEXT: i32.const 0 2424; CHECK-NEXT: v128.load gv_v2i64 2425; CHECK-NEXT: # fallthrough-return 2426 %v = load <2 x i64>, <2 x i64>* @gv_v2i64 2427 ret <2 x i64> %v 2428} 2429 2430@gv_i64 = global i64 42 2431define <2 x i64> @load_splat_v2i64_from_global_address() { 2432; CHECK-LABEL: load_splat_v2i64_from_global_address: 2433; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) 2434; CHECK-NEXT: # %bb.0: 2435; CHECK-NEXT: i32.const 0 2436; CHECK-NEXT: v128.load64_splat gv_i64 2437; CHECK-NEXT: # fallthrough-return 2438 %e = load i64, i64* @gv_i64 2439 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2440 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2441 ret <2 x i64> %v2 2442} 2443 2444@gv_v2i32 = global <2 x i32> <i32 42, i32 42> 2445define <2 x i64> @load_sext_v2i64_from_global_address() { 2446; CHECK-LABEL: load_sext_v2i64_from_global_address: 2447; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128) 2448; CHECK-NEXT: # %bb.0: 2449; CHECK-NEXT: i32.const 0 2450; CHECK-NEXT: i64x2.load32x2_s gv_v2i32 2451; CHECK-NEXT: # fallthrough-return 2452 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2453 %v2 = sext <2 x i32> %v to <2 x i64> 2454 ret <2 x i64> %v2 2455} 2456 2457define <2 x i64> @load_zext_v2i64_from_global_address() { 2458; CHECK-LABEL: load_zext_v2i64_from_global_address: 2459; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128) 2460; CHECK-NEXT: # %bb.0: 2461; CHECK-NEXT: i32.const 0 2462; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2463; CHECK-NEXT: # fallthrough-return 2464 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2465 %v2 = zext <2 x i32> %v to <2 x i64> 2466 ret <2 x i64> %v2 2467} 2468 2469define <2 x i32> @load_ext_v2i64_from_global_address() { 2470; CHECK-LABEL: load_ext_v2i64_from_global_address: 2471; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128) 2472; CHECK-NEXT: # %bb.0: 2473; CHECK-NEXT: i32.const 0 2474; CHECK-NEXT: v128.load64_zero gv_v2i32 2475; CHECK-NEXT: # fallthrough-return 2476 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2477 ret <2 x i32> %v 2478} 2479 2480define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) { 2481; CHECK-LABEL: store_v2i64: 2482; CHECK: .functype store_v2i64 (v128, i32) -> () 2483; CHECK-NEXT: # %bb.0: 2484; CHECK-NEXT: local.get 1 2485; CHECK-NEXT: local.get 0 2486; CHECK-NEXT: v128.store 0 2487; CHECK-NEXT: # fallthrough-return 2488 store <2 x i64> %v , <2 x i64>* %p 2489 ret void 2490} 2491 2492define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { 2493; CHECK-LABEL: store_v2i64_with_folded_offset: 2494; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> () 2495; CHECK-NEXT: # %bb.0: 2496; CHECK-NEXT: local.get 1 2497; CHECK-NEXT: local.get 0 2498; CHECK-NEXT: v128.store 16 2499; CHECK-NEXT: # fallthrough-return 2500 %q = ptrtoint <2 x i64>* %p to i32 2501 %r = add nuw i32 %q, 16 2502 %s = inttoptr i32 %r to <2 x i64>* 2503 store <2 x i64> %v , <2 x i64>* %s 2504 ret void 2505} 2506 2507define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2508; CHECK-LABEL: store_v2i64_with_folded_gep_offset: 2509; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> () 2510; CHECK-NEXT: # %bb.0: 2511; CHECK-NEXT: local.get 1 2512; CHECK-NEXT: local.get 0 2513; CHECK-NEXT: v128.store 16 2514; CHECK-NEXT: # fallthrough-return 2515 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2516 store <2 x i64> %v , <2 x i64>* %s 2517 ret void 2518} 2519 2520define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) { 2521; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: 2522; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> () 2523; CHECK-NEXT: # %bb.0: 2524; CHECK-NEXT: local.get 1 2525; CHECK-NEXT: i32.const -16 2526; CHECK-NEXT: i32.add 2527; CHECK-NEXT: local.get 0 2528; CHECK-NEXT: v128.store 0 2529; CHECK-NEXT: # fallthrough-return 2530 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2531 store <2 x i64> %v , <2 x i64>* %s 2532 ret void 2533} 2534 2535define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) { 2536; CHECK-LABEL: store_v2i64_with_unfolded_offset: 2537; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> () 2538; CHECK-NEXT: # %bb.0: 2539; CHECK-NEXT: local.get 1 2540; CHECK-NEXT: i32.const 16 2541; CHECK-NEXT: i32.add 2542; CHECK-NEXT: local.get 0 2543; CHECK-NEXT: v128.store 0 2544; CHECK-NEXT: # fallthrough-return 2545 %q = ptrtoint <2 x i64>* %p to i32 2546 %r = add nsw i32 %q, 16 2547 %s = inttoptr i32 %r to <2 x i64>* 2548 store <2 x i64> %v , <2 x i64>* %s 2549 ret void 2550} 2551 2552define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2553; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: 2554; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> () 2555; CHECK-NEXT: # %bb.0: 2556; CHECK-NEXT: local.get 1 2557; CHECK-NEXT: i32.const 16 2558; CHECK-NEXT: i32.add 2559; CHECK-NEXT: local.get 0 2560; CHECK-NEXT: v128.store 0 2561; CHECK-NEXT: # fallthrough-return 2562 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2563 store <2 x i64> %v , <2 x i64>* %s 2564 ret void 2565} 2566 2567define void @store_v2i64_to_numeric_address(<2 x i64> %v) { 2568; CHECK-LABEL: store_v2i64_to_numeric_address: 2569; CHECK: .functype store_v2i64_to_numeric_address (v128) -> () 2570; CHECK-NEXT: # %bb.0: 2571; CHECK-NEXT: i32.const 0 2572; CHECK-NEXT: local.get 0 2573; CHECK-NEXT: v128.store 32 2574; CHECK-NEXT: # fallthrough-return 2575 %s = inttoptr i32 32 to <2 x i64>* 2576 store <2 x i64> %v , <2 x i64>* %s 2577 ret void 2578} 2579 2580define void @store_v2i64_to_global_address(<2 x i64> %v) { 2581; CHECK-LABEL: store_v2i64_to_global_address: 2582; CHECK: .functype store_v2i64_to_global_address (v128) -> () 2583; CHECK-NEXT: # %bb.0: 2584; CHECK-NEXT: i32.const 0 2585; CHECK-NEXT: local.get 0 2586; CHECK-NEXT: v128.store gv_v2i64 2587; CHECK-NEXT: # fallthrough-return 2588 store <2 x i64> %v , <2 x i64>* @gv_v2i64 2589 ret void 2590} 2591 2592; ============================================================================== 2593; 4 x float 2594; ============================================================================== 2595define <4 x float> @load_v4f32(<4 x float>* %p) { 2596; CHECK-LABEL: load_v4f32: 2597; CHECK: .functype load_v4f32 (i32) -> (v128) 2598; CHECK-NEXT: # %bb.0: 2599; CHECK-NEXT: local.get 0 2600; CHECK-NEXT: v128.load 0 2601; CHECK-NEXT: # fallthrough-return 2602 %v = load <4 x float>, <4 x float>* %p 2603 ret <4 x float> %v 2604} 2605 2606define <4 x float> @load_splat_v4f32(float* %p) { 2607; CHECK-LABEL: load_splat_v4f32: 2608; CHECK: .functype load_splat_v4f32 (i32) -> (v128) 2609; CHECK-NEXT: # %bb.0: 2610; CHECK-NEXT: local.get 0 2611; CHECK-NEXT: v128.load32_splat 0 2612; CHECK-NEXT: # fallthrough-return 2613 %e = load float, float* %p 2614 %v1 = insertelement <4 x float> undef, float %e, i32 0 2615 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2616 ret <4 x float> %v2 2617} 2618 2619define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { 2620; CHECK-LABEL: load_v4f32_with_folded_offset: 2621; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128) 2622; CHECK-NEXT: # %bb.0: 2623; CHECK-NEXT: local.get 0 2624; CHECK-NEXT: v128.load 16 2625; CHECK-NEXT: # fallthrough-return 2626 %q = ptrtoint <4 x float>* %p to i32 2627 %r = add nuw i32 %q, 16 2628 %s = inttoptr i32 %r to <4 x float>* 2629 %v = load <4 x float>, <4 x float>* %s 2630 ret <4 x float> %v 2631} 2632 2633define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { 2634; CHECK-LABEL: load_splat_v4f32_with_folded_offset: 2635; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) 2636; CHECK-NEXT: # %bb.0: 2637; CHECK-NEXT: local.get 0 2638; CHECK-NEXT: v128.load32_splat 16 2639; CHECK-NEXT: # fallthrough-return 2640 %q = ptrtoint float* %p to i32 2641 %r = add nuw i32 %q, 16 2642 %s = inttoptr i32 %r to float* 2643 %e = load float, float* %s 2644 %v1 = insertelement <4 x float> undef, float %e, i32 0 2645 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2646 ret <4 x float> %v2 2647} 2648 2649define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) { 2650; CHECK-LABEL: load_v4f32_with_folded_gep_offset: 2651; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128) 2652; CHECK-NEXT: # %bb.0: 2653; CHECK-NEXT: local.get 0 2654; CHECK-NEXT: v128.load 16 2655; CHECK-NEXT: # fallthrough-return 2656 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2657 %v = load <4 x float>, <4 x float>* %s 2658 ret <4 x float> %v 2659} 2660 2661define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { 2662; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: 2663; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) 2664; CHECK-NEXT: # %bb.0: 2665; CHECK-NEXT: local.get 0 2666; CHECK-NEXT: v128.load32_splat 4 2667; CHECK-NEXT: # fallthrough-return 2668 %s = getelementptr inbounds float, float* %p, i32 1 2669 %e = load float, float* %s 2670 %v1 = insertelement <4 x float> undef, float %e, i32 0 2671 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2672 ret <4 x float> %v2 2673} 2674 2675define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) { 2676; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: 2677; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2678; CHECK-NEXT: # %bb.0: 2679; CHECK-NEXT: local.get 0 2680; CHECK-NEXT: i32.const -16 2681; CHECK-NEXT: i32.add 2682; CHECK-NEXT: v128.load 0 2683; CHECK-NEXT: # fallthrough-return 2684 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2685 %v = load <4 x float>, <4 x float>* %s 2686 ret <4 x float> %v 2687} 2688 2689define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) { 2690; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: 2691; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2692; CHECK-NEXT: # %bb.0: 2693; CHECK-NEXT: local.get 0 2694; CHECK-NEXT: i32.const -4 2695; CHECK-NEXT: i32.add 2696; CHECK-NEXT: v128.load32_splat 0 2697; CHECK-NEXT: # fallthrough-return 2698 %s = getelementptr inbounds float, float* %p, i32 -1 2699 %e = load float, float* %s 2700 %v1 = insertelement <4 x float> undef, float %e, i32 0 2701 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2702 ret <4 x float> %v2 2703} 2704 2705define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { 2706; CHECK-LABEL: load_v4f32_with_unfolded_offset: 2707; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128) 2708; CHECK-NEXT: # %bb.0: 2709; CHECK-NEXT: local.get 0 2710; CHECK-NEXT: i32.const 16 2711; CHECK-NEXT: i32.add 2712; CHECK-NEXT: v128.load 0 2713; CHECK-NEXT: # fallthrough-return 2714 %q = ptrtoint <4 x float>* %p to i32 2715 %r = add nsw i32 %q, 16 2716 %s = inttoptr i32 %r to <4 x float>* 2717 %v = load <4 x float>, <4 x float>* %s 2718 ret <4 x float> %v 2719} 2720 2721define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { 2722; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: 2723; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128) 2724; CHECK-NEXT: # %bb.0: 2725; CHECK-NEXT: local.get 0 2726; CHECK-NEXT: i32.const 16 2727; CHECK-NEXT: i32.add 2728; CHECK-NEXT: v128.load32_splat 0 2729; CHECK-NEXT: # fallthrough-return 2730 %q = ptrtoint float* %p to i32 2731 %r = add nsw i32 %q, 16 2732 %s = inttoptr i32 %r to float* 2733 %e = load float, float* %s 2734 %v1 = insertelement <4 x float> undef, float %e, i32 0 2735 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2736 ret <4 x float> %v2 2737} 2738 2739define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) { 2740; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: 2741; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2742; CHECK-NEXT: # %bb.0: 2743; CHECK-NEXT: local.get 0 2744; CHECK-NEXT: i32.const 16 2745; CHECK-NEXT: i32.add 2746; CHECK-NEXT: v128.load 0 2747; CHECK-NEXT: # fallthrough-return 2748 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2749 %v = load <4 x float>, <4 x float>* %s 2750 ret <4 x float> %v 2751} 2752 2753define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { 2754; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: 2755; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2756; CHECK-NEXT: # %bb.0: 2757; CHECK-NEXT: local.get 0 2758; CHECK-NEXT: i32.const 4 2759; CHECK-NEXT: i32.add 2760; CHECK-NEXT: v128.load32_splat 0 2761; CHECK-NEXT: # fallthrough-return 2762 %s = getelementptr float, float* %p, i32 1 2763 %e = load float, float* %s 2764 %v1 = insertelement <4 x float> undef, float %e, i32 0 2765 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2766 ret <4 x float> %v2 2767} 2768 2769define <4 x float> @load_v4f32_from_numeric_address() { 2770; CHECK-LABEL: load_v4f32_from_numeric_address: 2771; CHECK: .functype load_v4f32_from_numeric_address () -> (v128) 2772; CHECK-NEXT: # %bb.0: 2773; CHECK-NEXT: i32.const 0 2774; CHECK-NEXT: v128.load 32 2775; CHECK-NEXT: # fallthrough-return 2776 %s = inttoptr i32 32 to <4 x float>* 2777 %v = load <4 x float>, <4 x float>* %s 2778 ret <4 x float> %v 2779} 2780 2781define <4 x float> @load_splat_v4f32_from_numeric_address() { 2782; CHECK-LABEL: load_splat_v4f32_from_numeric_address: 2783; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) 2784; CHECK-NEXT: # %bb.0: 2785; CHECK-NEXT: i32.const 0 2786; CHECK-NEXT: v128.load32_splat 32 2787; CHECK-NEXT: # fallthrough-return 2788 %s = inttoptr i32 32 to float* 2789 %e = load float, float* %s 2790 %v1 = insertelement <4 x float> undef, float %e, i32 0 2791 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2792 ret <4 x float> %v2 2793} 2794 2795@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.> 2796define <4 x float> @load_v4f32_from_global_address() { 2797; CHECK-LABEL: load_v4f32_from_global_address: 2798; CHECK: .functype load_v4f32_from_global_address () -> (v128) 2799; CHECK-NEXT: # %bb.0: 2800; CHECK-NEXT: i32.const 0 2801; CHECK-NEXT: v128.load gv_v4f32 2802; CHECK-NEXT: # fallthrough-return 2803 %v = load <4 x float>, <4 x float>* @gv_v4f32 2804 ret <4 x float> %v 2805} 2806 2807@gv_f32 = global float 42. 2808define <4 x float> @load_splat_v4f32_from_global_address() { 2809; CHECK-LABEL: load_splat_v4f32_from_global_address: 2810; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) 2811; CHECK-NEXT: # %bb.0: 2812; CHECK-NEXT: i32.const 0 2813; CHECK-NEXT: v128.load32_splat gv_f32 2814; CHECK-NEXT: # fallthrough-return 2815 %e = load float, float* @gv_f32 2816 %v1 = insertelement <4 x float> undef, float %e, i32 0 2817 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2818 ret <4 x float> %v2 2819} 2820 2821define void @store_v4f32(<4 x float> %v, <4 x float>* %p) { 2822; CHECK-LABEL: store_v4f32: 2823; CHECK: .functype store_v4f32 (v128, i32) -> () 2824; CHECK-NEXT: # %bb.0: 2825; CHECK-NEXT: local.get 1 2826; CHECK-NEXT: local.get 0 2827; CHECK-NEXT: v128.store 0 2828; CHECK-NEXT: # fallthrough-return 2829 store <4 x float> %v , <4 x float>* %p 2830 ret void 2831} 2832 2833define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { 2834; CHECK-LABEL: store_v4f32_with_folded_offset: 2835; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> () 2836; CHECK-NEXT: # %bb.0: 2837; CHECK-NEXT: local.get 1 2838; CHECK-NEXT: local.get 0 2839; CHECK-NEXT: v128.store 16 2840; CHECK-NEXT: # fallthrough-return 2841 %q = ptrtoint <4 x float>* %p to i32 2842 %r = add nuw i32 %q, 16 2843 %s = inttoptr i32 %r to <4 x float>* 2844 store <4 x float> %v , <4 x float>* %s 2845 ret void 2846} 2847 2848define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2849; CHECK-LABEL: store_v4f32_with_folded_gep_offset: 2850; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> () 2851; CHECK-NEXT: # %bb.0: 2852; CHECK-NEXT: local.get 1 2853; CHECK-NEXT: local.get 0 2854; CHECK-NEXT: v128.store 16 2855; CHECK-NEXT: # fallthrough-return 2856 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2857 store <4 x float> %v , <4 x float>* %s 2858 ret void 2859} 2860 2861define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) { 2862; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: 2863; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> () 2864; CHECK-NEXT: # %bb.0: 2865; CHECK-NEXT: local.get 1 2866; CHECK-NEXT: i32.const -16 2867; CHECK-NEXT: i32.add 2868; CHECK-NEXT: local.get 0 2869; CHECK-NEXT: v128.store 0 2870; CHECK-NEXT: # fallthrough-return 2871 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2872 store <4 x float> %v , <4 x float>* %s 2873 ret void 2874} 2875 2876define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) { 2877; CHECK-LABEL: store_v4f32_with_unfolded_offset: 2878; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> () 2879; CHECK-NEXT: # %bb.0: 2880; CHECK-NEXT: local.get 1 2881; CHECK-NEXT: i32.const 16 2882; CHECK-NEXT: i32.add 2883; CHECK-NEXT: local.get 0 2884; CHECK-NEXT: v128.store 0 2885; CHECK-NEXT: # fallthrough-return 2886 %q = ptrtoint <4 x float>* %p to i32 2887 %r = add nsw i32 %q, 16 2888 %s = inttoptr i32 %r to <4 x float>* 2889 store <4 x float> %v , <4 x float>* %s 2890 ret void 2891} 2892 2893define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2894; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: 2895; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> () 2896; CHECK-NEXT: # %bb.0: 2897; CHECK-NEXT: local.get 1 2898; CHECK-NEXT: i32.const 16 2899; CHECK-NEXT: i32.add 2900; CHECK-NEXT: local.get 0 2901; CHECK-NEXT: v128.store 0 2902; CHECK-NEXT: # fallthrough-return 2903 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2904 store <4 x float> %v , <4 x float>* %s 2905 ret void 2906} 2907 2908define void @store_v4f32_to_numeric_address(<4 x float> %v) { 2909; CHECK-LABEL: store_v4f32_to_numeric_address: 2910; CHECK: .functype store_v4f32_to_numeric_address (v128) -> () 2911; CHECK-NEXT: # %bb.0: 2912; CHECK-NEXT: i32.const 0 2913; CHECK-NEXT: local.get 0 2914; CHECK-NEXT: v128.store 32 2915; CHECK-NEXT: # fallthrough-return 2916 %s = inttoptr i32 32 to <4 x float>* 2917 store <4 x float> %v , <4 x float>* %s 2918 ret void 2919} 2920 2921define void @store_v4f32_to_global_address(<4 x float> %v) { 2922; CHECK-LABEL: store_v4f32_to_global_address: 2923; CHECK: .functype store_v4f32_to_global_address (v128) -> () 2924; CHECK-NEXT: # %bb.0: 2925; CHECK-NEXT: i32.const 0 2926; CHECK-NEXT: local.get 0 2927; CHECK-NEXT: v128.store gv_v4f32 2928; CHECK-NEXT: # fallthrough-return 2929 store <4 x float> %v , <4 x float>* @gv_v4f32 2930 ret void 2931} 2932 2933; ============================================================================== 2934; 2 x double 2935; ============================================================================== 2936define <2 x double> @load_v2f64(<2 x double>* %p) { 2937; CHECK-LABEL: load_v2f64: 2938; CHECK: .functype load_v2f64 (i32) -> (v128) 2939; CHECK-NEXT: # %bb.0: 2940; CHECK-NEXT: local.get 0 2941; CHECK-NEXT: v128.load 0 2942; CHECK-NEXT: # fallthrough-return 2943 %v = load <2 x double>, <2 x double>* %p 2944 ret <2 x double> %v 2945} 2946 2947define <2 x double> @load_splat_v2f64(double* %p) { 2948; CHECK-LABEL: load_splat_v2f64: 2949; CHECK: .functype load_splat_v2f64 (i32) -> (v128) 2950; CHECK-NEXT: # %bb.0: 2951; CHECK-NEXT: local.get 0 2952; CHECK-NEXT: v128.load64_splat 0 2953; CHECK-NEXT: # fallthrough-return 2954 %e = load double, double* %p 2955 %v1 = insertelement <2 x double> undef, double %e, i32 0 2956 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2957 ret <2 x double> %v2 2958} 2959 2960define <2 x double> @load_promote_v2f64(<2 x float>* %p) { 2961; CHECK-LABEL: load_promote_v2f64: 2962; CHECK: .functype load_promote_v2f64 (i32) -> (v128) 2963; CHECK-NEXT: # %bb.0: 2964; CHECK-NEXT: local.get 0 2965; CHECK-NEXT: v128.load64_zero 0 2966; CHECK-NEXT: f64x2.promote_low_f32x4 2967; CHECK-NEXT: # fallthrough-return 2968 %e = load <2 x float>, <2 x float>* %p 2969 %v = fpext <2 x float> %e to <2 x double> 2970 ret <2 x double> %v 2971} 2972 2973define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { 2974; CHECK-LABEL: load_v2f64_with_folded_offset: 2975; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128) 2976; CHECK-NEXT: # %bb.0: 2977; CHECK-NEXT: local.get 0 2978; CHECK-NEXT: v128.load 16 2979; CHECK-NEXT: # fallthrough-return 2980 %q = ptrtoint <2 x double>* %p to i32 2981 %r = add nuw i32 %q, 16 2982 %s = inttoptr i32 %r to <2 x double>* 2983 %v = load <2 x double>, <2 x double>* %s 2984 ret <2 x double> %v 2985} 2986 2987define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { 2988; CHECK-LABEL: load_splat_v2f64_with_folded_offset: 2989; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) 2990; CHECK-NEXT: # %bb.0: 2991; CHECK-NEXT: local.get 0 2992; CHECK-NEXT: v128.load64_splat 16 2993; CHECK-NEXT: # fallthrough-return 2994 %q = ptrtoint double* %p to i32 2995 %r = add nuw i32 %q, 16 2996 %s = inttoptr i32 %r to double* 2997 %e = load double, double* %s 2998 %v1 = insertelement <2 x double> undef, double %e, i32 0 2999 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3000 ret <2 x double> %v2 3001} 3002 3003define <2 x double> @load_promote_v2f64_with_folded_offset(<2 x float>* %p) { 3004; CHECK-LABEL: load_promote_v2f64_with_folded_offset: 3005; CHECK: .functype load_promote_v2f64_with_folded_offset (i32) -> (v128) 3006; CHECK-NEXT: # %bb.0: 3007; CHECK-NEXT: local.get 0 3008; CHECK-NEXT: i32.const 16 3009; CHECK-NEXT: i32.add 3010; CHECK-NEXT: v128.load64_zero 0 3011; CHECK-NEXT: f64x2.promote_low_f32x4 3012; CHECK-NEXT: # fallthrough-return 3013 %q = ptrtoint <2 x float>* %p to i32 3014 %r = add nuw i32 %q, 16 3015 %s = inttoptr i32 %r to <2 x float>* 3016 %e = load <2 x float>, <2 x float>* %s 3017 %v = fpext <2 x float> %e to <2 x double> 3018 ret <2 x double> %v 3019} 3020 3021define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) { 3022; CHECK-LABEL: load_v2f64_with_folded_gep_offset: 3023; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128) 3024; CHECK-NEXT: # %bb.0: 3025; CHECK-NEXT: local.get 0 3026; CHECK-NEXT: v128.load 16 3027; CHECK-NEXT: # fallthrough-return 3028 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 3029 %v = load <2 x double>, <2 x double>* %s 3030 ret <2 x double> %v 3031} 3032 3033define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { 3034; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: 3035; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) 3036; CHECK-NEXT: # %bb.0: 3037; CHECK-NEXT: local.get 0 3038; CHECK-NEXT: v128.load64_splat 8 3039; CHECK-NEXT: # fallthrough-return 3040 %s = getelementptr inbounds double, double* %p, i32 1 3041 %e = load double, double* %s 3042 %v1 = insertelement <2 x double> undef, double %e, i32 0 3043 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3044 ret <2 x double> %v2 3045} 3046 3047define <2 x double> @load_promote_v2f64_with_folded_gep_offset(<2 x float>* %p) { 3048; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset: 3049; CHECK: .functype load_promote_v2f64_with_folded_gep_offset (i32) -> (v128) 3050; CHECK-NEXT: # %bb.0: 3051; CHECK-NEXT: local.get 0 3052; CHECK-NEXT: i32.const 8 3053; CHECK-NEXT: i32.add 3054; CHECK-NEXT: v128.load64_zero 0 3055; CHECK-NEXT: f64x2.promote_low_f32x4 3056; CHECK-NEXT: # fallthrough-return 3057 %s = getelementptr inbounds <2 x float>, <2 x float>* %p, i32 1 3058 %e = load <2 x float>, <2 x float>* %s 3059 %v = fpext <2 x float> %e to <2 x double> 3060 ret <2 x double> %v 3061} 3062 3063define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) { 3064; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: 3065; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3066; CHECK-NEXT: # %bb.0: 3067; CHECK-NEXT: local.get 0 3068; CHECK-NEXT: i32.const -16 3069; CHECK-NEXT: i32.add 3070; CHECK-NEXT: v128.load 0 3071; CHECK-NEXT: # fallthrough-return 3072 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 3073 %v = load <2 x double>, <2 x double>* %s 3074 ret <2 x double> %v 3075} 3076 3077define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) { 3078; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: 3079; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3080; CHECK-NEXT: # %bb.0: 3081; CHECK-NEXT: local.get 0 3082; CHECK-NEXT: i32.const -8 3083; CHECK-NEXT: i32.add 3084; CHECK-NEXT: v128.load64_splat 0 3085; CHECK-NEXT: # fallthrough-return 3086 %s = getelementptr inbounds double, double* %p, i32 -1 3087 %e = load double, double* %s 3088 %v1 = insertelement <2 x double> undef, double %e, i32 0 3089 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3090 ret <2 x double> %v2 3091} 3092 3093define <2 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(<2 x float>* %p) { 3094; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset: 3095; CHECK: .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3096; CHECK-NEXT: # %bb.0: 3097; CHECK-NEXT: local.get 0 3098; CHECK-NEXT: i32.const -8 3099; CHECK-NEXT: i32.add 3100; CHECK-NEXT: v128.load64_zero 0 3101; CHECK-NEXT: f64x2.promote_low_f32x4 3102; CHECK-NEXT: # fallthrough-return 3103 %s = getelementptr inbounds <2 x float>, <2 x float>* %p, i32 -1 3104 %e = load <2 x float>, <2 x float>* %s 3105 %v = fpext <2 x float> %e to <2 x double> 3106 ret <2 x double> %v 3107} 3108 3109define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { 3110; CHECK-LABEL: load_v2f64_with_unfolded_offset: 3111; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128) 3112; CHECK-NEXT: # %bb.0: 3113; CHECK-NEXT: local.get 0 3114; CHECK-NEXT: i32.const 16 3115; CHECK-NEXT: i32.add 3116; CHECK-NEXT: v128.load 0 3117; CHECK-NEXT: # fallthrough-return 3118 %q = ptrtoint <2 x double>* %p to i32 3119 %r = add nsw i32 %q, 16 3120 %s = inttoptr i32 %r to <2 x double>* 3121 %v = load <2 x double>, <2 x double>* %s 3122 ret <2 x double> %v 3123} 3124 3125define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { 3126; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: 3127; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128) 3128; CHECK-NEXT: # %bb.0: 3129; CHECK-NEXT: local.get 0 3130; CHECK-NEXT: i32.const 16 3131; CHECK-NEXT: i32.add 3132; CHECK-NEXT: v128.load64_splat 0 3133; CHECK-NEXT: # fallthrough-return 3134 %q = ptrtoint double* %p to i32 3135 %r = add nsw i32 %q, 16 3136 %s = inttoptr i32 %r to double* 3137 %e = load double, double* %s 3138 %v1 = insertelement <2 x double> undef, double %e, i32 0 3139 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3140 ret <2 x double> %v2 3141} 3142 3143define <2 x double> @load_promote_v2f64_with_unfolded_offset(<2 x float>* %p) { 3144; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset: 3145; CHECK: .functype load_promote_v2f64_with_unfolded_offset (i32) -> (v128) 3146; CHECK-NEXT: # %bb.0: 3147; CHECK-NEXT: local.get 0 3148; CHECK-NEXT: i32.const 16 3149; CHECK-NEXT: i32.add 3150; CHECK-NEXT: v128.load64_zero 0 3151; CHECK-NEXT: f64x2.promote_low_f32x4 3152; CHECK-NEXT: # fallthrough-return 3153 %q = ptrtoint <2 x float>* %p to i32 3154 %r = add nsw i32 %q, 16 3155 %s = inttoptr i32 %r to <2 x float>* 3156 %e = load <2 x float>, <2 x float>* %s 3157 %v = fpext <2 x float> %e to <2 x double> 3158 ret <2 x double> %v 3159} 3160 3161define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) { 3162; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: 3163; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3164; CHECK-NEXT: # %bb.0: 3165; CHECK-NEXT: local.get 0 3166; CHECK-NEXT: i32.const 16 3167; CHECK-NEXT: i32.add 3168; CHECK-NEXT: v128.load 0 3169; CHECK-NEXT: # fallthrough-return 3170 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3171 %v = load <2 x double>, <2 x double>* %s 3172 ret <2 x double> %v 3173} 3174 3175define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { 3176; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: 3177; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3178; CHECK-NEXT: # %bb.0: 3179; CHECK-NEXT: local.get 0 3180; CHECK-NEXT: i32.const 8 3181; CHECK-NEXT: i32.add 3182; CHECK-NEXT: v128.load64_splat 0 3183; CHECK-NEXT: # fallthrough-return 3184 %s = getelementptr double, double* %p, i32 1 3185 %e = load double, double* %s 3186 %v1 = insertelement <2 x double> undef, double %e, i32 0 3187 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3188 ret <2 x double> %v2 3189} 3190 3191define <2 x double> @load_promote_v2f64_with_unfolded_gep_offset(<2 x float>* %p) { 3192; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset: 3193; CHECK: .functype load_promote_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3194; CHECK-NEXT: # %bb.0: 3195; CHECK-NEXT: local.get 0 3196; CHECK-NEXT: i32.const 8 3197; CHECK-NEXT: i32.add 3198; CHECK-NEXT: v128.load64_zero 0 3199; CHECK-NEXT: f64x2.promote_low_f32x4 3200; CHECK-NEXT: # fallthrough-return 3201 %s = getelementptr <2 x float>, <2 x float>* %p, i32 1 3202 %e = load <2 x float>, <2 x float>* %s 3203 %v = fpext <2 x float> %e to <2 x double> 3204 ret <2 x double> %v 3205} 3206 3207define <2 x double> @load_v2f64_from_numeric_address() { 3208; CHECK-LABEL: load_v2f64_from_numeric_address: 3209; CHECK: .functype load_v2f64_from_numeric_address () -> (v128) 3210; CHECK-NEXT: # %bb.0: 3211; CHECK-NEXT: i32.const 0 3212; CHECK-NEXT: v128.load 32 3213; CHECK-NEXT: # fallthrough-return 3214 %s = inttoptr i32 32 to <2 x double>* 3215 %v = load <2 x double>, <2 x double>* %s 3216 ret <2 x double> %v 3217} 3218 3219define <2 x double> @load_splat_v2f64_from_numeric_address() { 3220; CHECK-LABEL: load_splat_v2f64_from_numeric_address: 3221; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) 3222; CHECK-NEXT: # %bb.0: 3223; CHECK-NEXT: i32.const 0 3224; CHECK-NEXT: v128.load64_splat 32 3225; CHECK-NEXT: # fallthrough-return 3226 %s = inttoptr i32 32 to double* 3227 %e = load double, double* %s 3228 %v1 = insertelement <2 x double> undef, double %e, i32 0 3229 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3230 ret <2 x double> %v2 3231} 3232 3233define <2 x double> @load_promote_v2f64_from_numeric_address() { 3234; CHECK-LABEL: load_promote_v2f64_from_numeric_address: 3235; CHECK: .functype load_promote_v2f64_from_numeric_address () -> (v128) 3236; CHECK-NEXT: # %bb.0: 3237; CHECK-NEXT: i32.const 32 3238; CHECK-NEXT: v128.load64_zero 0 3239; CHECK-NEXT: f64x2.promote_low_f32x4 3240; CHECK-NEXT: # fallthrough-return 3241 %s = inttoptr i32 32 to <2 x float>* 3242 %e = load <2 x float>, <2 x float>* %s 3243 %v = fpext <2 x float> %e to <2 x double> 3244 ret <2 x double> %v 3245} 3246 3247@gv_v2f64 = global <2 x double> <double 42., double 42.> 3248define <2 x double> @load_v2f64_from_global_address() { 3249; CHECK-LABEL: load_v2f64_from_global_address: 3250; CHECK: .functype load_v2f64_from_global_address () -> (v128) 3251; CHECK-NEXT: # %bb.0: 3252; CHECK-NEXT: i32.const 0 3253; CHECK-NEXT: v128.load gv_v2f64 3254; CHECK-NEXT: # fallthrough-return 3255 %v = load <2 x double>, <2 x double>* @gv_v2f64 3256 ret <2 x double> %v 3257} 3258 3259@gv_f64 = global double 42. 3260define <2 x double> @load_splat_v2f64_from_global_address() { 3261; CHECK-LABEL: load_splat_v2f64_from_global_address: 3262; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) 3263; CHECK-NEXT: # %bb.0: 3264; CHECK-NEXT: i32.const 0 3265; CHECK-NEXT: v128.load64_splat gv_f64 3266; CHECK-NEXT: # fallthrough-return 3267 %e = load double, double* @gv_f64 3268 %v1 = insertelement <2 x double> undef, double %e, i32 0 3269 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3270 ret <2 x double> %v2 3271} 3272 3273@gv_v2f32 = global <2 x float> <float 42., float 42.> 3274define <2 x double> @load_promote_v2f64_from_global_address() { 3275; CHECK-LABEL: load_promote_v2f64_from_global_address: 3276; CHECK: .functype load_promote_v2f64_from_global_address () -> (v128) 3277; CHECK-NEXT: # %bb.0: 3278; CHECK-NEXT: i32.const gv_v2f32 3279; CHECK-NEXT: v128.load64_zero 0 3280; CHECK-NEXT: f64x2.promote_low_f32x4 3281; CHECK-NEXT: # fallthrough-return 3282 %e = load <2 x float>, <2 x float>* @gv_v2f32 3283 %v = fpext <2 x float> %e to <2 x double> 3284 ret <2 x double> %v 3285} 3286 3287define void @store_v2f64(<2 x double> %v, <2 x double>* %p) { 3288; CHECK-LABEL: store_v2f64: 3289; CHECK: .functype store_v2f64 (v128, i32) -> () 3290; CHECK-NEXT: # %bb.0: 3291; CHECK-NEXT: local.get 1 3292; CHECK-NEXT: local.get 0 3293; CHECK-NEXT: v128.store 0 3294; CHECK-NEXT: # fallthrough-return 3295 store <2 x double> %v , <2 x double>* %p 3296 ret void 3297} 3298 3299define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { 3300; CHECK-LABEL: store_v2f64_with_folded_offset: 3301; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> () 3302; CHECK-NEXT: # %bb.0: 3303; CHECK-NEXT: local.get 1 3304; CHECK-NEXT: local.get 0 3305; CHECK-NEXT: v128.store 16 3306; CHECK-NEXT: # fallthrough-return 3307 %q = ptrtoint <2 x double>* %p to i32 3308 %r = add nuw i32 %q, 16 3309 %s = inttoptr i32 %r to <2 x double>* 3310 store <2 x double> %v , <2 x double>* %s 3311 ret void 3312} 3313 3314define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3315; CHECK-LABEL: store_v2f64_with_folded_gep_offset: 3316; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> () 3317; CHECK-NEXT: # %bb.0: 3318; CHECK-NEXT: local.get 1 3319; CHECK-NEXT: local.get 0 3320; CHECK-NEXT: v128.store 16 3321; CHECK-NEXT: # fallthrough-return 3322 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 3323 store <2 x double> %v , <2 x double>* %s 3324 ret void 3325} 3326 3327define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) { 3328; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: 3329; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> () 3330; CHECK-NEXT: # %bb.0: 3331; CHECK-NEXT: local.get 1 3332; CHECK-NEXT: i32.const -16 3333; CHECK-NEXT: i32.add 3334; CHECK-NEXT: local.get 0 3335; CHECK-NEXT: v128.store 0 3336; CHECK-NEXT: # fallthrough-return 3337 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 3338 store <2 x double> %v , <2 x double>* %s 3339 ret void 3340} 3341 3342define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) { 3343; CHECK-LABEL: store_v2f64_with_unfolded_offset: 3344; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> () 3345; CHECK-NEXT: # %bb.0: 3346; CHECK-NEXT: local.get 1 3347; CHECK-NEXT: i32.const 16 3348; CHECK-NEXT: i32.add 3349; CHECK-NEXT: local.get 0 3350; CHECK-NEXT: v128.store 0 3351; CHECK-NEXT: # fallthrough-return 3352 %q = ptrtoint <2 x double>* %p to i32 3353 %r = add nsw i32 %q, 16 3354 %s = inttoptr i32 %r to <2 x double>* 3355 store <2 x double> %v , <2 x double>* %s 3356 ret void 3357} 3358 3359define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3360; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: 3361; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> () 3362; CHECK-NEXT: # %bb.0: 3363; CHECK-NEXT: local.get 1 3364; CHECK-NEXT: i32.const 16 3365; CHECK-NEXT: i32.add 3366; CHECK-NEXT: local.get 0 3367; CHECK-NEXT: v128.store 0 3368; CHECK-NEXT: # fallthrough-return 3369 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3370 store <2 x double> %v , <2 x double>* %s 3371 ret void 3372} 3373 3374define void @store_v2f64_to_numeric_address(<2 x double> %v) { 3375; CHECK-LABEL: store_v2f64_to_numeric_address: 3376; CHECK: .functype store_v2f64_to_numeric_address (v128) -> () 3377; CHECK-NEXT: # %bb.0: 3378; CHECK-NEXT: i32.const 0 3379; CHECK-NEXT: local.get 0 3380; CHECK-NEXT: v128.store 32 3381; CHECK-NEXT: # fallthrough-return 3382 %s = inttoptr i32 32 to <2 x double>* 3383 store <2 x double> %v , <2 x double>* %s 3384 ret void 3385} 3386 3387define void @store_v2f64_to_global_address(<2 x double> %v) { 3388; CHECK-LABEL: store_v2f64_to_global_address: 3389; CHECK: .functype store_v2f64_to_global_address (v128) -> () 3390; CHECK-NEXT: # %bb.0: 3391; CHECK-NEXT: i32.const 0 3392; CHECK-NEXT: local.get 0 3393; CHECK-NEXT: v128.store gv_v2f64 3394; CHECK-NEXT: # fallthrough-return 3395 store <2 x double> %v , <2 x double>* @gv_v2f64 3396 ret void 3397} 3398