1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test SIMD loads and stores 5 6target triple = "wasm32-unknown-unknown" 7 8; ============================================================================== 9; 16 x i8 10; ============================================================================== 11define <16 x i8> @load_v16i8(<16 x i8>* %p) { 12; CHECK-LABEL: load_v16i8: 13; CHECK: .functype load_v16i8 (i32) -> (v128) 14; CHECK-NEXT: # %bb.0: 15; CHECK-NEXT: local.get 0 16; CHECK-NEXT: v128.load 0 17; CHECK-NEXT: # fallthrough-return 18 %v = load <16 x i8>, <16 x i8>* %p 19 ret <16 x i8> %v 20} 21 22define <16 x i8> @load_splat_v16i8(i8* %p) { 23; CHECK-LABEL: load_splat_v16i8: 24; CHECK: .functype load_splat_v16i8 (i32) -> (v128) 25; CHECK-NEXT: # %bb.0: 26; CHECK-NEXT: local.get 0 27; CHECK-NEXT: v128.load8_splat 0 28; CHECK-NEXT: # fallthrough-return 29 %e = load i8, i8* %p 30 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 31 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 32 ret <16 x i8> %v2 33} 34 35define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { 36; CHECK-LABEL: load_v16i8_with_folded_offset: 37; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128) 38; CHECK-NEXT: # %bb.0: 39; CHECK-NEXT: local.get 0 40; CHECK-NEXT: v128.load 16 41; CHECK-NEXT: # fallthrough-return 42 %q = ptrtoint <16 x i8>* %p to i32 43 %r = add nuw i32 %q, 16 44 %s = inttoptr i32 %r to <16 x i8>* 45 %v = load <16 x i8>, <16 x i8>* %s 46 ret <16 x i8> %v 47} 48 49define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { 50; CHECK-LABEL: load_splat_v16i8_with_folded_offset: 51; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) 52; CHECK-NEXT: # %bb.0: 53; CHECK-NEXT: local.get 0 54; CHECK-NEXT: v128.load8_splat 16 55; CHECK-NEXT: # fallthrough-return 56 %q = ptrtoint i8* %p to i32 57 %r = add nuw i32 %q, 16 58 %s = inttoptr i32 %r to i8* 59 %e = load i8, i8* %s 60 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 61 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 62 ret <16 x i8> %v2 63} 64 65define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) { 66; CHECK-LABEL: load_v16i8_with_folded_gep_offset: 67; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128) 68; CHECK-NEXT: # %bb.0: 69; CHECK-NEXT: local.get 0 70; CHECK-NEXT: v128.load 16 71; CHECK-NEXT: # fallthrough-return 72 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 73 %v = load <16 x i8>, <16 x i8>* %s 74 ret <16 x i8> %v 75} 76 77define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { 78; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: 79; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) 80; CHECK-NEXT: # %bb.0: 81; CHECK-NEXT: local.get 0 82; CHECK-NEXT: v128.load8_splat 1 83; CHECK-NEXT: # fallthrough-return 84 %s = getelementptr inbounds i8, i8* %p, i32 1 85 %e = load i8, i8* %s 86 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 87 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 88 ret <16 x i8> %v2 89} 90 91define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) { 92; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: 93; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 94; CHECK-NEXT: # %bb.0: 95; CHECK-NEXT: local.get 0 96; CHECK-NEXT: i32.const -16 97; CHECK-NEXT: i32.add 98; CHECK-NEXT: v128.load 0 99; CHECK-NEXT: # fallthrough-return 100 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 101 %v = load <16 x i8>, <16 x i8>* %s 102 ret <16 x i8> %v 103} 104 105define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { 106; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: 107; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 108; CHECK-NEXT: # %bb.0: 109; CHECK-NEXT: local.get 0 110; CHECK-NEXT: i32.const -1 111; CHECK-NEXT: i32.add 112; CHECK-NEXT: v128.load8_splat 0 113; CHECK-NEXT: # fallthrough-return 114 %s = getelementptr inbounds i8, i8* %p, i32 -1 115 %e = load i8, i8* %s 116 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 117 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 118 ret <16 x i8> %v2 119} 120 121define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { 122; CHECK-LABEL: load_v16i8_with_unfolded_offset: 123; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128) 124; CHECK-NEXT: # %bb.0: 125; CHECK-NEXT: local.get 0 126; CHECK-NEXT: i32.const 16 127; CHECK-NEXT: i32.add 128; CHECK-NEXT: v128.load 0 129; CHECK-NEXT: # fallthrough-return 130 %q = ptrtoint <16 x i8>* %p to i32 131 %r = add nsw i32 %q, 16 132 %s = inttoptr i32 %r to <16 x i8>* 133 %v = load <16 x i8>, <16 x i8>* %s 134 ret <16 x i8> %v 135} 136 137define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { 138; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: 139; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128) 140; CHECK-NEXT: # %bb.0: 141; CHECK-NEXT: local.get 0 142; CHECK-NEXT: i32.const 16 143; CHECK-NEXT: i32.add 144; CHECK-NEXT: v128.load8_splat 0 145; CHECK-NEXT: # fallthrough-return 146 %q = ptrtoint i8* %p to i32 147 %r = add nsw i32 %q, 16 148 %s = inttoptr i32 %r to i8* 149 %e = load i8, i8* %s 150 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 151 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 152 ret <16 x i8> %v2 153} 154 155define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) { 156; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: 157; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128) 158; CHECK-NEXT: # %bb.0: 159; CHECK-NEXT: local.get 0 160; CHECK-NEXT: i32.const 16 161; CHECK-NEXT: i32.add 162; CHECK-NEXT: v128.load 0 163; CHECK-NEXT: # fallthrough-return 164 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 165 %v = load <16 x i8>, <16 x i8>* %s 166 ret <16 x i8> %v 167} 168 169define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { 170; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: 171; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128) 172; CHECK-NEXT: # %bb.0: 173; CHECK-NEXT: local.get 0 174; CHECK-NEXT: i32.const 1 175; CHECK-NEXT: i32.add 176; CHECK-NEXT: v128.load8_splat 0 177; CHECK-NEXT: # fallthrough-return 178 %s = getelementptr i8, i8* %p, i32 1 179 %e = load i8, i8* %s 180 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 181 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 182 ret <16 x i8> %v2 183} 184 185define <16 x i8> @load_v16i8_from_numeric_address() { 186; CHECK-LABEL: load_v16i8_from_numeric_address: 187; CHECK: .functype load_v16i8_from_numeric_address () -> (v128) 188; CHECK-NEXT: # %bb.0: 189; CHECK-NEXT: i32.const 0 190; CHECK-NEXT: v128.load 32 191; CHECK-NEXT: # fallthrough-return 192 %s = inttoptr i32 32 to <16 x i8>* 193 %v = load <16 x i8>, <16 x i8>* %s 194 ret <16 x i8> %v 195} 196 197define <16 x i8> @load_splat_v16i8_from_numeric_address() { 198; CHECK-LABEL: load_splat_v16i8_from_numeric_address: 199; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) 200; CHECK-NEXT: # %bb.0: 201; CHECK-NEXT: i32.const 0 202; CHECK-NEXT: v128.load8_splat 32 203; CHECK-NEXT: # fallthrough-return 204 %s = inttoptr i32 32 to i8* 205 %e = load i8, i8* %s 206 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 207 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 208 ret <16 x i8> %v2 209} 210 211@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 212define <16 x i8> @load_v16i8_from_global_address() { 213; CHECK-LABEL: load_v16i8_from_global_address: 214; CHECK: .functype load_v16i8_from_global_address () -> (v128) 215; CHECK-NEXT: # %bb.0: 216; CHECK-NEXT: i32.const 0 217; CHECK-NEXT: v128.load gv_v16i8 218; CHECK-NEXT: # fallthrough-return 219 %v = load <16 x i8>, <16 x i8>* @gv_v16i8 220 ret <16 x i8> %v 221} 222 223@gv_i8 = global i8 42 224define <16 x i8> @load_splat_v16i8_from_global_address() { 225; CHECK-LABEL: load_splat_v16i8_from_global_address: 226; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) 227; CHECK-NEXT: # %bb.0: 228; CHECK-NEXT: i32.const 0 229; CHECK-NEXT: v128.load8_splat gv_i8 230; CHECK-NEXT: # fallthrough-return 231 %e = load i8, i8* @gv_i8 232 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 233 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 234 ret <16 x i8> %v2 235} 236 237define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) { 238; CHECK-LABEL: store_v16i8: 239; CHECK: .functype store_v16i8 (v128, i32) -> () 240; CHECK-NEXT: # %bb.0: 241; CHECK-NEXT: local.get 1 242; CHECK-NEXT: local.get 0 243; CHECK-NEXT: v128.store 0 244; CHECK-NEXT: # fallthrough-return 245 store <16 x i8> %v , <16 x i8>* %p 246 ret void 247} 248 249define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { 250; CHECK-LABEL: store_v16i8_with_folded_offset: 251; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> () 252; CHECK-NEXT: # %bb.0: 253; CHECK-NEXT: local.get 1 254; CHECK-NEXT: local.get 0 255; CHECK-NEXT: v128.store 16 256; CHECK-NEXT: # fallthrough-return 257 %q = ptrtoint <16 x i8>* %p to i32 258 %r = add nuw i32 %q, 16 259 %s = inttoptr i32 %r to <16 x i8>* 260 store <16 x i8> %v , <16 x i8>* %s 261 ret void 262} 263 264define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 265; CHECK-LABEL: store_v16i8_with_folded_gep_offset: 266; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> () 267; CHECK-NEXT: # %bb.0: 268; CHECK-NEXT: local.get 1 269; CHECK-NEXT: local.get 0 270; CHECK-NEXT: v128.store 16 271; CHECK-NEXT: # fallthrough-return 272 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 273 store <16 x i8> %v , <16 x i8>* %s 274 ret void 275} 276 277define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) { 278; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: 279; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> () 280; CHECK-NEXT: # %bb.0: 281; CHECK-NEXT: local.get 1 282; CHECK-NEXT: i32.const -16 283; CHECK-NEXT: i32.add 284; CHECK-NEXT: local.get 0 285; CHECK-NEXT: v128.store 0 286; CHECK-NEXT: # fallthrough-return 287 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 288 store <16 x i8> %v , <16 x i8>* %s 289 ret void 290} 291 292define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) { 293; CHECK-LABEL: store_v16i8_with_unfolded_offset: 294; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> () 295; CHECK-NEXT: # %bb.0: 296; CHECK-NEXT: local.get 1 297; CHECK-NEXT: i32.const 16 298; CHECK-NEXT: i32.add 299; CHECK-NEXT: local.get 0 300; CHECK-NEXT: v128.store 0 301; CHECK-NEXT: # fallthrough-return 302 %q = ptrtoint <16 x i8>* %p to i32 303 %r = add nsw i32 %q, 16 304 %s = inttoptr i32 %r to <16 x i8>* 305 store <16 x i8> %v , <16 x i8>* %s 306 ret void 307} 308 309define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 310; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: 311; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> () 312; CHECK-NEXT: # %bb.0: 313; CHECK-NEXT: local.get 1 314; CHECK-NEXT: i32.const 16 315; CHECK-NEXT: i32.add 316; CHECK-NEXT: local.get 0 317; CHECK-NEXT: v128.store 0 318; CHECK-NEXT: # fallthrough-return 319 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 320 store <16 x i8> %v , <16 x i8>* %s 321 ret void 322} 323 324define void @store_v16i8_to_numeric_address(<16 x i8> %v) { 325; CHECK-LABEL: store_v16i8_to_numeric_address: 326; CHECK: .functype store_v16i8_to_numeric_address (v128) -> () 327; CHECK-NEXT: # %bb.0: 328; CHECK-NEXT: i32.const 0 329; CHECK-NEXT: local.get 0 330; CHECK-NEXT: v128.store 32 331; CHECK-NEXT: # fallthrough-return 332 %s = inttoptr i32 32 to <16 x i8>* 333 store <16 x i8> %v , <16 x i8>* %s 334 ret void 335} 336 337define void @store_v16i8_to_global_address(<16 x i8> %v) { 338; CHECK-LABEL: store_v16i8_to_global_address: 339; CHECK: .functype store_v16i8_to_global_address (v128) -> () 340; CHECK-NEXT: # %bb.0: 341; CHECK-NEXT: i32.const 0 342; CHECK-NEXT: local.get 0 343; CHECK-NEXT: v128.store gv_v16i8 344; CHECK-NEXT: # fallthrough-return 345 store <16 x i8> %v , <16 x i8>* @gv_v16i8 346 ret void 347} 348 349; ============================================================================== 350; 8 x i16 351; ============================================================================== 352define <8 x i16> @load_v8i16(<8 x i16>* %p) { 353; CHECK-LABEL: load_v8i16: 354; CHECK: .functype load_v8i16 (i32) -> (v128) 355; CHECK-NEXT: # %bb.0: 356; CHECK-NEXT: local.get 0 357; CHECK-NEXT: v128.load 0 358; CHECK-NEXT: # fallthrough-return 359 %v = load <8 x i16>, <8 x i16>* %p 360 ret <8 x i16> %v 361} 362 363define <8 x i16> @load_splat_v8i16(i16* %p) { 364; CHECK-LABEL: load_splat_v8i16: 365; CHECK: .functype load_splat_v8i16 (i32) -> (v128) 366; CHECK-NEXT: # %bb.0: 367; CHECK-NEXT: local.get 0 368; CHECK-NEXT: v128.load16_splat 0 369; CHECK-NEXT: # fallthrough-return 370 %e = load i16, i16* %p 371 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 372 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 373 ret <8 x i16> %v2 374} 375 376define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) { 377; CHECK-LABEL: load_sext_v8i16: 378; CHECK: .functype load_sext_v8i16 (i32) -> (v128) 379; CHECK-NEXT: # %bb.0: 380; CHECK-NEXT: local.get 0 381; CHECK-NEXT: i16x8.load8x8_s 0 382; CHECK-NEXT: # fallthrough-return 383 %v = load <8 x i8>, <8 x i8>* %p 384 %v2 = sext <8 x i8> %v to <8 x i16> 385 ret <8 x i16> %v2 386} 387 388define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) { 389; CHECK-LABEL: load_zext_v8i16: 390; CHECK: .functype load_zext_v8i16 (i32) -> (v128) 391; CHECK-NEXT: # %bb.0: 392; CHECK-NEXT: local.get 0 393; CHECK-NEXT: i16x8.load8x8_u 0 394; CHECK-NEXT: # fallthrough-return 395 %v = load <8 x i8>, <8 x i8>* %p 396 %v2 = zext <8 x i8> %v to <8 x i16> 397 ret <8 x i16> %v2 398} 399 400define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) { 401; CHECK-LABEL: load_ext_v8i16: 402; CHECK: .functype load_ext_v8i16 (i32) -> (v128) 403; CHECK-NEXT: # %bb.0: 404; CHECK-NEXT: local.get 0 405; CHECK-NEXT: i16x8.load8x8_u 0 406; CHECK-NEXT: # fallthrough-return 407 %v = load <8 x i8>, <8 x i8>* %p 408 ret <8 x i8> %v 409} 410 411define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { 412; CHECK-LABEL: load_v8i16_with_folded_offset: 413; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128) 414; CHECK-NEXT: # %bb.0: 415; CHECK-NEXT: local.get 0 416; CHECK-NEXT: v128.load 16 417; CHECK-NEXT: # fallthrough-return 418 %q = ptrtoint <8 x i16>* %p to i32 419 %r = add nuw i32 %q, 16 420 %s = inttoptr i32 %r to <8 x i16>* 421 %v = load <8 x i16>, <8 x i16>* %s 422 ret <8 x i16> %v 423} 424 425define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { 426; CHECK-LABEL: load_splat_v8i16_with_folded_offset: 427; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) 428; CHECK-NEXT: # %bb.0: 429; CHECK-NEXT: local.get 0 430; CHECK-NEXT: v128.load16_splat 16 431; CHECK-NEXT: # fallthrough-return 432 %q = ptrtoint i16* %p to i32 433 %r = add nuw i32 %q, 16 434 %s = inttoptr i32 %r to i16* 435 %e = load i16, i16* %s 436 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 437 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 438 ret <8 x i16> %v2 439} 440 441define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) { 442; CHECK-LABEL: load_sext_v8i16_with_folded_offset: 443; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128) 444; CHECK-NEXT: # %bb.0: 445; CHECK-NEXT: local.get 0 446; CHECK-NEXT: i16x8.load8x8_s 16 447; CHECK-NEXT: # fallthrough-return 448 %q = ptrtoint <8 x i8>* %p to i32 449 %r = add nuw i32 %q, 16 450 %s = inttoptr i32 %r to <8 x i8>* 451 %v = load <8 x i8>, <8 x i8>* %s 452 %v2 = sext <8 x i8> %v to <8 x i16> 453 ret <8 x i16> %v2 454} 455 456define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) { 457; CHECK-LABEL: load_zext_v8i16_with_folded_offset: 458; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128) 459; CHECK-NEXT: # %bb.0: 460; CHECK-NEXT: local.get 0 461; CHECK-NEXT: i16x8.load8x8_u 16 462; CHECK-NEXT: # fallthrough-return 463 %q = ptrtoint <8 x i8>* %p to i32 464 %r = add nuw i32 %q, 16 465 %s = inttoptr i32 %r to <8 x i8>* 466 %v = load <8 x i8>, <8 x i8>* %s 467 %v2 = zext <8 x i8> %v to <8 x i16> 468 ret <8 x i16> %v2 469} 470 471define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) { 472; CHECK-LABEL: load_ext_v8i16_with_folded_offset: 473; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128) 474; CHECK-NEXT: # %bb.0: 475; CHECK-NEXT: local.get 0 476; CHECK-NEXT: i16x8.load8x8_u 16 477; CHECK-NEXT: # fallthrough-return 478 %q = ptrtoint <8 x i8>* %p to i32 479 %r = add nuw i32 %q, 16 480 %s = inttoptr i32 %r to <8 x i8>* 481 %v = load <8 x i8>, <8 x i8>* %s 482 ret <8 x i8> %v 483} 484 485define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) { 486; CHECK-LABEL: load_v8i16_with_folded_gep_offset: 487; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128) 488; CHECK-NEXT: # %bb.0: 489; CHECK-NEXT: local.get 0 490; CHECK-NEXT: v128.load 16 491; CHECK-NEXT: # fallthrough-return 492 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 493 %v = load <8 x i16>, <8 x i16>* %s 494 ret <8 x i16> %v 495} 496 497define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { 498; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: 499; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) 500; CHECK-NEXT: # %bb.0: 501; CHECK-NEXT: local.get 0 502; CHECK-NEXT: v128.load16_splat 2 503; CHECK-NEXT: # fallthrough-return 504 %s = getelementptr inbounds i16, i16* %p, i32 1 505 %e = load i16, i16* %s 506 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 507 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 508 ret <8 x i16> %v2 509} 510 511define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 512; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: 513; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128) 514; CHECK-NEXT: # %bb.0: 515; CHECK-NEXT: local.get 0 516; CHECK-NEXT: i16x8.load8x8_s 8 517; CHECK-NEXT: # fallthrough-return 518 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 519 %v = load <8 x i8>, <8 x i8>* %s 520 %v2 = sext <8 x i8> %v to <8 x i16> 521 ret <8 x i16> %v2 522} 523 524define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 525; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: 526; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128) 527; CHECK-NEXT: # %bb.0: 528; CHECK-NEXT: local.get 0 529; CHECK-NEXT: i16x8.load8x8_u 8 530; CHECK-NEXT: # fallthrough-return 531 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 532 %v = load <8 x i8>, <8 x i8>* %s 533 %v2 = zext <8 x i8> %v to <8 x i16> 534 ret <8 x i16> %v2 535} 536 537define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 538; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: 539; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128) 540; CHECK-NEXT: # %bb.0: 541; CHECK-NEXT: local.get 0 542; CHECK-NEXT: i16x8.load8x8_u 8 543; CHECK-NEXT: # fallthrough-return 544 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 545 %v = load <8 x i8>, <8 x i8>* %s 546 ret <8 x i8> %v 547} 548 549define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) { 550; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: 551; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 552; CHECK-NEXT: # %bb.0: 553; CHECK-NEXT: local.get 0 554; CHECK-NEXT: i32.const -16 555; CHECK-NEXT: i32.add 556; CHECK-NEXT: v128.load 0 557; CHECK-NEXT: # fallthrough-return 558 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 559 %v = load <8 x i16>, <8 x i16>* %s 560 ret <8 x i16> %v 561} 562 563define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { 564; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: 565; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 566; CHECK-NEXT: # %bb.0: 567; CHECK-NEXT: local.get 0 568; CHECK-NEXT: i32.const -2 569; CHECK-NEXT: i32.add 570; CHECK-NEXT: v128.load16_splat 0 571; CHECK-NEXT: # fallthrough-return 572 %s = getelementptr inbounds i16, i16* %p, i32 -1 573 %e = load i16, i16* %s 574 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 575 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 576 ret <8 x i16> %v2 577} 578 579define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 580; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: 581; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 582; CHECK-NEXT: # %bb.0: 583; CHECK-NEXT: local.get 0 584; CHECK-NEXT: i32.const -8 585; CHECK-NEXT: i32.add 586; CHECK-NEXT: i16x8.load8x8_s 0 587; CHECK-NEXT: # fallthrough-return 588 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 589 %v = load <8 x i8>, <8 x i8>* %s 590 %v2 = sext <8 x i8> %v to <8 x i16> 591 ret <8 x i16> %v2 592} 593 594define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 595; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: 596; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 597; CHECK-NEXT: # %bb.0: 598; CHECK-NEXT: local.get 0 599; CHECK-NEXT: i32.const -8 600; CHECK-NEXT: i32.add 601; CHECK-NEXT: i16x8.load8x8_u 0 602; CHECK-NEXT: # fallthrough-return 603 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 604 %v = load <8 x i8>, <8 x i8>* %s 605 %v2 = zext <8 x i8> %v to <8 x i16> 606 ret <8 x i16> %v2 607} 608 609define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 610; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: 611; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 612; CHECK-NEXT: # %bb.0: 613; CHECK-NEXT: local.get 0 614; CHECK-NEXT: i32.const -8 615; CHECK-NEXT: i32.add 616; CHECK-NEXT: i16x8.load8x8_u 0 617; CHECK-NEXT: # fallthrough-return 618 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 619 %v = load <8 x i8>, <8 x i8>* %s 620 ret <8 x i8> %v 621} 622 623define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { 624; CHECK-LABEL: load_v8i16_with_unfolded_offset: 625; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128) 626; CHECK-NEXT: # %bb.0: 627; CHECK-NEXT: local.get 0 628; CHECK-NEXT: i32.const 16 629; CHECK-NEXT: i32.add 630; CHECK-NEXT: v128.load 0 631; CHECK-NEXT: # fallthrough-return 632 %q = ptrtoint <8 x i16>* %p to i32 633 %r = add nsw i32 %q, 16 634 %s = inttoptr i32 %r to <8 x i16>* 635 %v = load <8 x i16>, <8 x i16>* %s 636 ret <8 x i16> %v 637} 638 639define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { 640; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: 641; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128) 642; CHECK-NEXT: # %bb.0: 643; CHECK-NEXT: local.get 0 644; CHECK-NEXT: i32.const 16 645; CHECK-NEXT: i32.add 646; CHECK-NEXT: v128.load16_splat 0 647; CHECK-NEXT: # fallthrough-return 648 %q = ptrtoint i16* %p to i32 649 %r = add nsw i32 %q, 16 650 %s = inttoptr i32 %r to i16* 651 %e = load i16, i16* %s 652 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 653 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 654 ret <8 x i16> %v2 655} 656 657define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 658; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: 659; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128) 660; CHECK-NEXT: # %bb.0: 661; CHECK-NEXT: local.get 0 662; CHECK-NEXT: i32.const 16 663; CHECK-NEXT: i32.add 664; CHECK-NEXT: i16x8.load8x8_s 0 665; CHECK-NEXT: # fallthrough-return 666 %q = ptrtoint <8 x i8>* %p to i32 667 %r = add nsw i32 %q, 16 668 %s = inttoptr i32 %r to <8 x i8>* 669 %v = load <8 x i8>, <8 x i8>* %s 670 %v2 = sext <8 x i8> %v to <8 x i16> 671 ret <8 x i16> %v2 672} 673 674define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 675; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: 676; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128) 677; CHECK-NEXT: # %bb.0: 678; CHECK-NEXT: local.get 0 679; CHECK-NEXT: i32.const 16 680; CHECK-NEXT: i32.add 681; CHECK-NEXT: i16x8.load8x8_u 0 682; CHECK-NEXT: # fallthrough-return 683 %q = ptrtoint <8 x i8>* %p to i32 684 %r = add nsw i32 %q, 16 685 %s = inttoptr i32 %r to <8 x i8>* 686 %v = load <8 x i8>, <8 x i8>* %s 687 %v2 = zext <8 x i8> %v to <8 x i16> 688 ret <8 x i16> %v2 689} 690 691define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 692; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: 693; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128) 694; CHECK-NEXT: # %bb.0: 695; CHECK-NEXT: local.get 0 696; CHECK-NEXT: i32.const 16 697; CHECK-NEXT: i32.add 698; CHECK-NEXT: i16x8.load8x8_u 0 699; CHECK-NEXT: # fallthrough-return 700 %q = ptrtoint <8 x i8>* %p to i32 701 %r = add nsw i32 %q, 16 702 %s = inttoptr i32 %r to <8 x i8>* 703 %v = load <8 x i8>, <8 x i8>* %s 704 ret <8 x i8> %v 705} 706 707define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) { 708; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: 709; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128) 710; CHECK-NEXT: # %bb.0: 711; CHECK-NEXT: local.get 0 712; CHECK-NEXT: i32.const 16 713; CHECK-NEXT: i32.add 714; CHECK-NEXT: v128.load 0 715; CHECK-NEXT: # fallthrough-return 716 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 717 %v = load <8 x i16>, <8 x i16>* %s 718 ret <8 x i16> %v 719} 720 721define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { 722; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: 723; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128) 724; CHECK-NEXT: # %bb.0: 725; CHECK-NEXT: local.get 0 726; CHECK-NEXT: i32.const 2 727; CHECK-NEXT: i32.add 728; CHECK-NEXT: v128.load16_splat 0 729; CHECK-NEXT: # fallthrough-return 730 %s = getelementptr i16, i16* %p, i32 1 731 %e = load i16, i16* %s 732 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 733 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 734 ret <8 x i16> %v2 735} 736 737define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 738; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: 739; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 740; CHECK-NEXT: # %bb.0: 741; CHECK-NEXT: local.get 0 742; CHECK-NEXT: i32.const 8 743; CHECK-NEXT: i32.add 744; CHECK-NEXT: i16x8.load8x8_s 0 745; CHECK-NEXT: # fallthrough-return 746 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 747 %v = load <8 x i8>, <8 x i8>* %s 748 %v2 = sext <8 x i8> %v to <8 x i16> 749 ret <8 x i16> %v2 750} 751 752define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 753; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: 754; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 755; CHECK-NEXT: # %bb.0: 756; CHECK-NEXT: local.get 0 757; CHECK-NEXT: i32.const 8 758; CHECK-NEXT: i32.add 759; CHECK-NEXT: i16x8.load8x8_u 0 760; CHECK-NEXT: # fallthrough-return 761 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 762 %v = load <8 x i8>, <8 x i8>* %s 763 %v2 = zext <8 x i8> %v to <8 x i16> 764 ret <8 x i16> %v2 765} 766 767define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 768; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: 769; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 770; CHECK-NEXT: # %bb.0: 771; CHECK-NEXT: local.get 0 772; CHECK-NEXT: i32.const 8 773; CHECK-NEXT: i32.add 774; CHECK-NEXT: i16x8.load8x8_u 0 775; CHECK-NEXT: # fallthrough-return 776 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 777 %v = load <8 x i8>, <8 x i8>* %s 778 ret <8 x i8> %v 779} 780 781define <8 x i16> @load_v8i16_from_numeric_address() { 782; CHECK-LABEL: load_v8i16_from_numeric_address: 783; CHECK: .functype load_v8i16_from_numeric_address () -> (v128) 784; CHECK-NEXT: # %bb.0: 785; CHECK-NEXT: i32.const 0 786; CHECK-NEXT: v128.load 32 787; CHECK-NEXT: # fallthrough-return 788 %s = inttoptr i32 32 to <8 x i16>* 789 %v = load <8 x i16>, <8 x i16>* %s 790 ret <8 x i16> %v 791} 792 793define <8 x i16> @load_splat_v8i16_from_numeric_address() { 794; CHECK-LABEL: load_splat_v8i16_from_numeric_address: 795; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) 796; CHECK-NEXT: # %bb.0: 797; CHECK-NEXT: i32.const 0 798; CHECK-NEXT: v128.load16_splat 32 799; CHECK-NEXT: # fallthrough-return 800 %s = inttoptr i32 32 to i16* 801 %e = load i16, i16* %s 802 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 803 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 804 ret <8 x i16> %v2 805} 806 807define <8 x i16> @load_sext_v8i16_from_numeric_address() { 808; CHECK-LABEL: load_sext_v8i16_from_numeric_address: 809; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128) 810; CHECK-NEXT: # %bb.0: 811; CHECK-NEXT: i32.const 0 812; CHECK-NEXT: i16x8.load8x8_s 32 813; CHECK-NEXT: # fallthrough-return 814 %s = inttoptr i32 32 to <8 x i8>* 815 %v = load <8 x i8>, <8 x i8>* %s 816 %v2 = sext <8 x i8> %v to <8 x i16> 817 ret <8 x i16> %v2 818} 819 820define <8 x i16> @load_zext_v8i16_from_numeric_address() { 821; CHECK-LABEL: load_zext_v8i16_from_numeric_address: 822; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128) 823; CHECK-NEXT: # %bb.0: 824; CHECK-NEXT: i32.const 0 825; CHECK-NEXT: i16x8.load8x8_u 32 826; CHECK-NEXT: # fallthrough-return 827 %s = inttoptr i32 32 to <8 x i8>* 828 %v = load <8 x i8>, <8 x i8>* %s 829 %v2 = zext <8 x i8> %v to <8 x i16> 830 ret <8 x i16> %v2 831} 832 833define <8 x i8> @load_ext_v8i16_from_numeric_address() { 834; CHECK-LABEL: load_ext_v8i16_from_numeric_address: 835; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128) 836; CHECK-NEXT: # %bb.0: 837; CHECK-NEXT: i32.const 0 838; CHECK-NEXT: i16x8.load8x8_u 32 839; CHECK-NEXT: # fallthrough-return 840 %s = inttoptr i32 32 to <8 x i8>* 841 %v = load <8 x i8>, <8 x i8>* %s 842 ret <8 x i8> %v 843} 844 845@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 846define <8 x i16> @load_v8i16_from_global_address() { 847; CHECK-LABEL: load_v8i16_from_global_address: 848; CHECK: .functype load_v8i16_from_global_address () -> (v128) 849; CHECK-NEXT: # %bb.0: 850; CHECK-NEXT: i32.const 0 851; CHECK-NEXT: v128.load gv_v8i16 852; CHECK-NEXT: # fallthrough-return 853 %v = load <8 x i16>, <8 x i16>* @gv_v8i16 854 ret <8 x i16> %v 855} 856 857@gv_i16 = global i16 42 858define <8 x i16> @load_splat_v8i16_from_global_address() { 859; CHECK-LABEL: load_splat_v8i16_from_global_address: 860; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) 861; CHECK-NEXT: # %bb.0: 862; CHECK-NEXT: i32.const 0 863; CHECK-NEXT: v128.load16_splat gv_i16 864; CHECK-NEXT: # fallthrough-return 865 %e = load i16, i16* @gv_i16 866 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 867 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 868 ret <8 x i16> %v2 869} 870 871@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 872define <8 x i16> @load_sext_v8i16_from_global_address() { 873; CHECK-LABEL: load_sext_v8i16_from_global_address: 874; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128) 875; CHECK-NEXT: # %bb.0: 876; CHECK-NEXT: i32.const 0 877; CHECK-NEXT: i16x8.load8x8_s gv_v8i8 878; CHECK-NEXT: # fallthrough-return 879 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 880 %v2 = sext <8 x i8> %v to <8 x i16> 881 ret <8 x i16> %v2 882} 883 884define <8 x i16> @load_zext_v8i16_from_global_address() { 885; CHECK-LABEL: load_zext_v8i16_from_global_address: 886; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128) 887; CHECK-NEXT: # %bb.0: 888; CHECK-NEXT: i32.const 0 889; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 890; CHECK-NEXT: # fallthrough-return 891 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 892 %v2 = zext <8 x i8> %v to <8 x i16> 893 ret <8 x i16> %v2 894} 895 896define <8 x i8> @load_ext_v8i16_from_global_address() { 897; CHECK-LABEL: load_ext_v8i16_from_global_address: 898; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128) 899; CHECK-NEXT: # %bb.0: 900; CHECK-NEXT: i32.const 0 901; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 902; CHECK-NEXT: # fallthrough-return 903 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 904 ret <8 x i8> %v 905} 906 907 908define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) { 909; CHECK-LABEL: store_v8i16: 910; CHECK: .functype store_v8i16 (v128, i32) -> () 911; CHECK-NEXT: # %bb.0: 912; CHECK-NEXT: local.get 1 913; CHECK-NEXT: local.get 0 914; CHECK-NEXT: v128.store 0 915; CHECK-NEXT: # fallthrough-return 916 store <8 x i16> %v , <8 x i16>* %p 917 ret void 918} 919 920define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) { 921; CHECK-LABEL: store_narrowing_v8i16: 922; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> () 923; CHECK-NEXT: # %bb.0: 924; CHECK-NEXT: local.get 1 925; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 926; CHECK-NEXT: local.get 0 927; CHECK-NEXT: v128.and 928; CHECK-NEXT: local.get 0 929; CHECK-NEXT: i8x16.narrow_i16x8_u 930; CHECK-NEXT: i64x2.extract_lane 0 931; CHECK-NEXT: i64.store 0 932; CHECK-NEXT: # fallthrough-return 933 store <8 x i8> %v, <8 x i8>* %p 934 ret void 935} 936 937define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { 938; CHECK-LABEL: store_v8i16_with_folded_offset: 939; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () 940; CHECK-NEXT: # %bb.0: 941; CHECK-NEXT: local.get 1 942; CHECK-NEXT: local.get 0 943; CHECK-NEXT: v128.store 16 944; CHECK-NEXT: # fallthrough-return 945 %q = ptrtoint <8 x i16>* %p to i32 946 %r = add nuw i32 %q, 16 947 %s = inttoptr i32 %r to <8 x i16>* 948 store <8 x i16> %v , <8 x i16>* %s 949 ret void 950} 951 952define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) { 953; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset: 954; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> () 955; CHECK-NEXT: # %bb.0: 956; CHECK-NEXT: local.get 1 957; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 958; CHECK-NEXT: local.get 0 959; CHECK-NEXT: v128.and 960; CHECK-NEXT: local.get 0 961; CHECK-NEXT: i8x16.narrow_i16x8_u 962; CHECK-NEXT: i64x2.extract_lane 0 963; CHECK-NEXT: i64.store 16 964; CHECK-NEXT: # fallthrough-return 965 %q = ptrtoint <8 x i8>* %p to i32 966 %r = add nuw i32 %q, 16 967 %s = inttoptr i32 %r to <8 x i8>* 968 store <8 x i8> %v , <8 x i8>* %s 969 ret void 970} 971 972define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 973; CHECK-LABEL: store_v8i16_with_folded_gep_offset: 974; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () 975; CHECK-NEXT: # %bb.0: 976; CHECK-NEXT: local.get 1 977; CHECK-NEXT: local.get 0 978; CHECK-NEXT: v128.store 16 979; CHECK-NEXT: # fallthrough-return 980 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 981 store <8 x i16> %v , <8 x i16>* %s 982 ret void 983} 984 985define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { 986; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset: 987; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> () 988; CHECK-NEXT: # %bb.0: 989; CHECK-NEXT: local.get 1 990; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 991; CHECK-NEXT: local.get 0 992; CHECK-NEXT: v128.and 993; CHECK-NEXT: local.get 0 994; CHECK-NEXT: i8x16.narrow_i16x8_u 995; CHECK-NEXT: i64x2.extract_lane 0 996; CHECK-NEXT: i64.store 8 997; CHECK-NEXT: # fallthrough-return 998 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 999 store <8 x i8> %v , <8 x i8>* %s 1000 ret void 1001} 1002 1003define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { 1004; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: 1005; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 1006; CHECK-NEXT: # %bb.0: 1007; CHECK-NEXT: local.get 1 1008; CHECK-NEXT: i32.const -16 1009; CHECK-NEXT: i32.add 1010; CHECK-NEXT: local.get 0 1011; CHECK-NEXT: v128.store 0 1012; CHECK-NEXT: # fallthrough-return 1013 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 1014 store <8 x i16> %v , <8 x i16>* %s 1015 ret void 1016} 1017 1018define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) { 1019; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset: 1020; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 1021; CHECK-NEXT: # %bb.0: 1022; CHECK-NEXT: local.get 1 1023; CHECK-NEXT: i32.const -8 1024; CHECK-NEXT: i32.add 1025; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1026; CHECK-NEXT: local.get 0 1027; CHECK-NEXT: v128.and 1028; CHECK-NEXT: local.get 0 1029; CHECK-NEXT: i8x16.narrow_i16x8_u 1030; CHECK-NEXT: i64x2.extract_lane 0 1031; CHECK-NEXT: i64.store 0 1032; CHECK-NEXT: # fallthrough-return 1033 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 1034 store <8 x i8> %v , <8 x i8>* %s 1035 ret void 1036} 1037 1038define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { 1039; CHECK-LABEL: store_v8i16_with_unfolded_offset: 1040; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () 1041; CHECK-NEXT: # %bb.0: 1042; CHECK-NEXT: local.get 1 1043; CHECK-NEXT: i32.const 16 1044; CHECK-NEXT: i32.add 1045; CHECK-NEXT: local.get 0 1046; CHECK-NEXT: v128.store 0 1047; CHECK-NEXT: # fallthrough-return 1048 %q = ptrtoint <8 x i16>* %p to i32 1049 %r = add nsw i32 %q, 16 1050 %s = inttoptr i32 %r to <8 x i16>* 1051 store <8 x i16> %v , <8 x i16>* %s 1052 ret void 1053} 1054 1055define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) { 1056; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset: 1057; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> () 1058; CHECK-NEXT: # %bb.0: 1059; CHECK-NEXT: local.get 1 1060; CHECK-NEXT: i32.const 16 1061; CHECK-NEXT: i32.add 1062; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1063; CHECK-NEXT: local.get 0 1064; CHECK-NEXT: v128.and 1065; CHECK-NEXT: local.get 0 1066; CHECK-NEXT: i8x16.narrow_i16x8_u 1067; CHECK-NEXT: i64x2.extract_lane 0 1068; CHECK-NEXT: i64.store 0 1069; CHECK-NEXT: # fallthrough-return 1070 %q = ptrtoint <8 x i8>* %p to i32 1071 %r = add nsw i32 %q, 16 1072 %s = inttoptr i32 %r to <8 x i8>* 1073 store <8 x i8> %v , <8 x i8>* %s 1074 ret void 1075} 1076 1077define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 1078; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: 1079; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1080; CHECK-NEXT: # %bb.0: 1081; CHECK-NEXT: local.get 1 1082; CHECK-NEXT: i32.const 16 1083; CHECK-NEXT: i32.add 1084; CHECK-NEXT: local.get 0 1085; CHECK-NEXT: v128.store 0 1086; CHECK-NEXT: # fallthrough-return 1087 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 1088 store <8 x i16> %v , <8 x i16>* %s 1089 ret void 1090} 1091 1092define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { 1093; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset: 1094; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1095; CHECK-NEXT: # %bb.0: 1096; CHECK-NEXT: local.get 1 1097; CHECK-NEXT: i32.const 8 1098; CHECK-NEXT: i32.add 1099; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1100; CHECK-NEXT: local.get 0 1101; CHECK-NEXT: v128.and 1102; CHECK-NEXT: local.get 0 1103; CHECK-NEXT: i8x16.narrow_i16x8_u 1104; CHECK-NEXT: i64x2.extract_lane 0 1105; CHECK-NEXT: i64.store 0 1106; CHECK-NEXT: # fallthrough-return 1107 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 1108 store <8 x i8> %v , <8 x i8>* %s 1109 ret void 1110} 1111 1112define void @store_v8i16_to_numeric_address(<8 x i16> %v) { 1113; CHECK-LABEL: store_v8i16_to_numeric_address: 1114; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () 1115; CHECK-NEXT: # %bb.0: 1116; CHECK-NEXT: i32.const 0 1117; CHECK-NEXT: local.get 0 1118; CHECK-NEXT: v128.store 32 1119; CHECK-NEXT: # fallthrough-return 1120 %s = inttoptr i32 32 to <8 x i16>* 1121 store <8 x i16> %v , <8 x i16>* %s 1122 ret void 1123} 1124 1125define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) { 1126; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address: 1127; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> () 1128; CHECK-NEXT: # %bb.0: 1129; CHECK-NEXT: i32.const 0 1130; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1131; CHECK-NEXT: local.get 0 1132; CHECK-NEXT: v128.and 1133; CHECK-NEXT: local.get 0 1134; CHECK-NEXT: i8x16.narrow_i16x8_u 1135; CHECK-NEXT: i64x2.extract_lane 0 1136; CHECK-NEXT: i64.store 32 1137; CHECK-NEXT: # fallthrough-return 1138 %s = inttoptr i32 32 to <8 x i8>* 1139 store <8 x i8> %v , <8 x i8>* %s 1140 ret void 1141} 1142 1143define void @store_v8i16_to_global_address(<8 x i16> %v) { 1144; CHECK-LABEL: store_v8i16_to_global_address: 1145; CHECK: .functype store_v8i16_to_global_address (v128) -> () 1146; CHECK-NEXT: # %bb.0: 1147; CHECK-NEXT: i32.const 0 1148; CHECK-NEXT: local.get 0 1149; CHECK-NEXT: v128.store gv_v8i16 1150; CHECK-NEXT: # fallthrough-return 1151 store <8 x i16> %v , <8 x i16>* @gv_v8i16 1152 ret void 1153} 1154 1155define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) { 1156; CHECK-LABEL: store_narrowing_v8i16_to_global_address: 1157; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> () 1158; CHECK-NEXT: # %bb.0: 1159; CHECK-NEXT: i32.const 0 1160; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1161; CHECK-NEXT: local.get 0 1162; CHECK-NEXT: v128.and 1163; CHECK-NEXT: local.get 0 1164; CHECK-NEXT: i8x16.narrow_i16x8_u 1165; CHECK-NEXT: i64x2.extract_lane 0 1166; CHECK-NEXT: i64.store gv_v8i8 1167; CHECK-NEXT: # fallthrough-return 1168 store <8 x i8> %v , <8 x i8>* @gv_v8i8 1169 ret void 1170} 1171 1172; ============================================================================== 1173; 4 x i32 1174; ============================================================================== 1175define <4 x i32> @load_v4i32(<4 x i32>* %p) { 1176; CHECK-LABEL: load_v4i32: 1177; CHECK: .functype load_v4i32 (i32) -> (v128) 1178; CHECK-NEXT: # %bb.0: 1179; CHECK-NEXT: local.get 0 1180; CHECK-NEXT: v128.load 0 1181; CHECK-NEXT: # fallthrough-return 1182 %v = load <4 x i32>, <4 x i32>* %p 1183 ret <4 x i32> %v 1184} 1185 1186define <4 x i32> @load_splat_v4i32(i32* %addr) { 1187; CHECK-LABEL: load_splat_v4i32: 1188; CHECK: .functype load_splat_v4i32 (i32) -> (v128) 1189; CHECK-NEXT: # %bb.0: 1190; CHECK-NEXT: local.get 0 1191; CHECK-NEXT: v128.load32_splat 0 1192; CHECK-NEXT: # fallthrough-return 1193 %e = load i32, i32* %addr, align 4 1194 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1195 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1196 ret <4 x i32> %v2 1197} 1198 1199define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) { 1200; CHECK-LABEL: load_sext_v4i32: 1201; CHECK: .functype load_sext_v4i32 (i32) -> (v128) 1202; CHECK-NEXT: # %bb.0: 1203; CHECK-NEXT: local.get 0 1204; CHECK-NEXT: i32x4.load16x4_s 0 1205; CHECK-NEXT: # fallthrough-return 1206 %v = load <4 x i16>, <4 x i16>* %p 1207 %v2 = sext <4 x i16> %v to <4 x i32> 1208 ret <4 x i32> %v2 1209} 1210 1211define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) { 1212; CHECK-LABEL: load_zext_v4i32: 1213; CHECK: .functype load_zext_v4i32 (i32) -> (v128) 1214; CHECK-NEXT: # %bb.0: 1215; CHECK-NEXT: local.get 0 1216; CHECK-NEXT: i32x4.load16x4_u 0 1217; CHECK-NEXT: # fallthrough-return 1218 %v = load <4 x i16>, <4 x i16>* %p 1219 %v2 = zext <4 x i16> %v to <4 x i32> 1220 ret <4 x i32> %v2 1221} 1222 1223define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) { 1224; CHECK-LABEL: load_ext_v4i32: 1225; CHECK: .functype load_ext_v4i32 (i32) -> (v128) 1226; CHECK-NEXT: # %bb.0: 1227; CHECK-NEXT: local.get 0 1228; CHECK-NEXT: i32x4.load16x4_u 0 1229; CHECK-NEXT: # fallthrough-return 1230 %v = load <4 x i16>, <4 x i16>* %p 1231 ret <4 x i16> %v 1232} 1233 1234define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { 1235; CHECK-LABEL: load_v4i32_with_folded_offset: 1236; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128) 1237; CHECK-NEXT: # %bb.0: 1238; CHECK-NEXT: local.get 0 1239; CHECK-NEXT: v128.load 16 1240; CHECK-NEXT: # fallthrough-return 1241 %q = ptrtoint <4 x i32>* %p to i32 1242 %r = add nuw i32 %q, 16 1243 %s = inttoptr i32 %r to <4 x i32>* 1244 %v = load <4 x i32>, <4 x i32>* %s 1245 ret <4 x i32> %v 1246} 1247 1248define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { 1249; CHECK-LABEL: load_splat_v4i32_with_folded_offset: 1250; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) 1251; CHECK-NEXT: # %bb.0: 1252; CHECK-NEXT: local.get 0 1253; CHECK-NEXT: v128.load32_splat 16 1254; CHECK-NEXT: # fallthrough-return 1255 %q = ptrtoint i32* %p to i32 1256 %r = add nuw i32 %q, 16 1257 %s = inttoptr i32 %r to i32* 1258 %e = load i32, i32* %s 1259 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1260 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1261 ret <4 x i32> %v2 1262} 1263 1264define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) { 1265; CHECK-LABEL: load_sext_v4i32_with_folded_offset: 1266; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128) 1267; CHECK-NEXT: # %bb.0: 1268; CHECK-NEXT: local.get 0 1269; CHECK-NEXT: i32x4.load16x4_s 16 1270; CHECK-NEXT: # fallthrough-return 1271 %q = ptrtoint <4 x i16>* %p to i32 1272 %r = add nuw i32 %q, 16 1273 %s = inttoptr i32 %r to <4 x i16>* 1274 %v = load <4 x i16>, <4 x i16>* %s 1275 %v2 = sext <4 x i16> %v to <4 x i32> 1276 ret <4 x i32> %v2 1277} 1278 1279define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) { 1280; CHECK-LABEL: load_zext_v4i32_with_folded_offset: 1281; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128) 1282; CHECK-NEXT: # %bb.0: 1283; CHECK-NEXT: local.get 0 1284; CHECK-NEXT: i32x4.load16x4_u 16 1285; CHECK-NEXT: # fallthrough-return 1286 %q = ptrtoint <4 x i16>* %p to i32 1287 %r = add nuw i32 %q, 16 1288 %s = inttoptr i32 %r to <4 x i16>* 1289 %v = load <4 x i16>, <4 x i16>* %s 1290 %v2 = zext <4 x i16> %v to <4 x i32> 1291 ret <4 x i32> %v2 1292} 1293 1294define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) { 1295; CHECK-LABEL: load_ext_v4i32_with_folded_offset: 1296; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128) 1297; CHECK-NEXT: # %bb.0: 1298; CHECK-NEXT: local.get 0 1299; CHECK-NEXT: i32x4.load16x4_u 16 1300; CHECK-NEXT: # fallthrough-return 1301 %q = ptrtoint <4 x i16>* %p to i32 1302 %r = add nuw i32 %q, 16 1303 %s = inttoptr i32 %r to <4 x i16>* 1304 %v = load <4 x i16>, <4 x i16>* %s 1305 ret <4 x i16> %v 1306} 1307 1308define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) { 1309; CHECK-LABEL: load_v4i32_with_folded_gep_offset: 1310; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128) 1311; CHECK-NEXT: # %bb.0: 1312; CHECK-NEXT: local.get 0 1313; CHECK-NEXT: v128.load 16 1314; CHECK-NEXT: # fallthrough-return 1315 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1316 %v = load <4 x i32>, <4 x i32>* %s 1317 ret <4 x i32> %v 1318} 1319 1320define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { 1321; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: 1322; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) 1323; CHECK-NEXT: # %bb.0: 1324; CHECK-NEXT: local.get 0 1325; CHECK-NEXT: v128.load32_splat 4 1326; CHECK-NEXT: # fallthrough-return 1327 %s = getelementptr inbounds i32, i32* %p, i32 1 1328 %e = load i32, i32* %s 1329 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1330 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1331 ret <4 x i32> %v2 1332} 1333 1334define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1335; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset: 1336; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128) 1337; CHECK-NEXT: # %bb.0: 1338; CHECK-NEXT: local.get 0 1339; CHECK-NEXT: i32x4.load16x4_s 8 1340; CHECK-NEXT: # fallthrough-return 1341 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1342 %v = load <4 x i16>, <4 x i16>* %s 1343 %v2 = sext <4 x i16> %v to <4 x i32> 1344 ret <4 x i32> %v2 1345} 1346 1347define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1348; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset: 1349; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128) 1350; CHECK-NEXT: # %bb.0: 1351; CHECK-NEXT: local.get 0 1352; CHECK-NEXT: i32x4.load16x4_u 8 1353; CHECK-NEXT: # fallthrough-return 1354 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1355 %v = load <4 x i16>, <4 x i16>* %s 1356 %v2 = zext <4 x i16> %v to <4 x i32> 1357 ret <4 x i32> %v2 1358} 1359 1360define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1361; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: 1362; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128) 1363; CHECK-NEXT: # %bb.0: 1364; CHECK-NEXT: local.get 0 1365; CHECK-NEXT: i32x4.load16x4_u 8 1366; CHECK-NEXT: # fallthrough-return 1367 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1368 %v = load <4 x i16>, <4 x i16>* %s 1369 ret <4 x i16> %v 1370} 1371 1372define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) { 1373; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: 1374; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1375; CHECK-NEXT: # %bb.0: 1376; CHECK-NEXT: local.get 0 1377; CHECK-NEXT: i32.const -16 1378; CHECK-NEXT: i32.add 1379; CHECK-NEXT: v128.load 0 1380; CHECK-NEXT: # fallthrough-return 1381 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1382 %v = load <4 x i32>, <4 x i32>* %s 1383 ret <4 x i32> %v 1384} 1385 1386define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { 1387; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: 1388; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1389; CHECK-NEXT: # %bb.0: 1390; CHECK-NEXT: local.get 0 1391; CHECK-NEXT: i32.const -4 1392; CHECK-NEXT: i32.add 1393; CHECK-NEXT: v128.load32_splat 0 1394; CHECK-NEXT: # fallthrough-return 1395 %s = getelementptr inbounds i32, i32* %p, i32 -1 1396 %e = load i32, i32* %s 1397 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1398 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1399 ret <4 x i32> %v2 1400} 1401 1402define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1403; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset: 1404; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1405; CHECK-NEXT: # %bb.0: 1406; CHECK-NEXT: local.get 0 1407; CHECK-NEXT: i32.const -8 1408; CHECK-NEXT: i32.add 1409; CHECK-NEXT: i32x4.load16x4_s 0 1410; CHECK-NEXT: # fallthrough-return 1411 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1412 %v = load <4 x i16>, <4 x i16>* %s 1413 %v2 = sext <4 x i16> %v to <4 x i32> 1414 ret <4 x i32> %v2 1415} 1416 1417define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1418; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset: 1419; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1420; CHECK-NEXT: # %bb.0: 1421; CHECK-NEXT: local.get 0 1422; CHECK-NEXT: i32.const -8 1423; CHECK-NEXT: i32.add 1424; CHECK-NEXT: i32x4.load16x4_u 0 1425; CHECK-NEXT: # fallthrough-return 1426 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1427 %v = load <4 x i16>, <4 x i16>* %s 1428 %v2 = zext <4 x i16> %v to <4 x i32> 1429 ret <4 x i32> %v2 1430} 1431 1432define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1433; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: 1434; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1435; CHECK-NEXT: # %bb.0: 1436; CHECK-NEXT: local.get 0 1437; CHECK-NEXT: i32.const -8 1438; CHECK-NEXT: i32.add 1439; CHECK-NEXT: i32x4.load16x4_u 0 1440; CHECK-NEXT: # fallthrough-return 1441 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1442 %v = load <4 x i16>, <4 x i16>* %s 1443 ret <4 x i16> %v 1444} 1445 1446define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { 1447; CHECK-LABEL: load_v4i32_with_unfolded_offset: 1448; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128) 1449; CHECK-NEXT: # %bb.0: 1450; CHECK-NEXT: local.get 0 1451; CHECK-NEXT: i32.const 16 1452; CHECK-NEXT: i32.add 1453; CHECK-NEXT: v128.load 0 1454; CHECK-NEXT: # fallthrough-return 1455 %q = ptrtoint <4 x i32>* %p to i32 1456 %r = add nsw i32 %q, 16 1457 %s = inttoptr i32 %r to <4 x i32>* 1458 %v = load <4 x i32>, <4 x i32>* %s 1459 ret <4 x i32> %v 1460} 1461 1462define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { 1463; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: 1464; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128) 1465; CHECK-NEXT: # %bb.0: 1466; CHECK-NEXT: local.get 0 1467; CHECK-NEXT: i32.const 16 1468; CHECK-NEXT: i32.add 1469; CHECK-NEXT: v128.load32_splat 0 1470; CHECK-NEXT: # fallthrough-return 1471 %q = ptrtoint i32* %p to i32 1472 %r = add nsw i32 %q, 16 1473 %s = inttoptr i32 %r to i32* 1474 %e = load i32, i32* %s 1475 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1476 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1477 ret <4 x i32> %v2 1478} 1479 1480define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1481; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset: 1482; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128) 1483; CHECK-NEXT: # %bb.0: 1484; CHECK-NEXT: local.get 0 1485; CHECK-NEXT: i32.const 16 1486; CHECK-NEXT: i32.add 1487; CHECK-NEXT: i32x4.load16x4_s 0 1488; CHECK-NEXT: # fallthrough-return 1489 %q = ptrtoint <4 x i16>* %p to i32 1490 %r = add nsw i32 %q, 16 1491 %s = inttoptr i32 %r to <4 x i16>* 1492 %v = load <4 x i16>, <4 x i16>* %s 1493 %v2 = sext <4 x i16> %v to <4 x i32> 1494 ret <4 x i32> %v2 1495} 1496 1497define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1498; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset: 1499; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128) 1500; CHECK-NEXT: # %bb.0: 1501; CHECK-NEXT: local.get 0 1502; CHECK-NEXT: i32.const 16 1503; CHECK-NEXT: i32.add 1504; CHECK-NEXT: i32x4.load16x4_u 0 1505; CHECK-NEXT: # fallthrough-return 1506 %q = ptrtoint <4 x i16>* %p to i32 1507 %r = add nsw i32 %q, 16 1508 %s = inttoptr i32 %r to <4 x i16>* 1509 %v = load <4 x i16>, <4 x i16>* %s 1510 %v2 = zext <4 x i16> %v to <4 x i32> 1511 ret <4 x i32> %v2 1512} 1513 1514define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1515; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: 1516; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128) 1517; CHECK-NEXT: # %bb.0: 1518; CHECK-NEXT: local.get 0 1519; CHECK-NEXT: i32.const 16 1520; CHECK-NEXT: i32.add 1521; CHECK-NEXT: i32x4.load16x4_u 0 1522; CHECK-NEXT: # fallthrough-return 1523 %q = ptrtoint <4 x i16>* %p to i32 1524 %r = add nsw i32 %q, 16 1525 %s = inttoptr i32 %r to <4 x i16>* 1526 %v = load <4 x i16>, <4 x i16>* %s 1527 ret <4 x i16> %v 1528} 1529 1530define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) { 1531; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: 1532; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1533; CHECK-NEXT: # %bb.0: 1534; CHECK-NEXT: local.get 0 1535; CHECK-NEXT: i32.const 16 1536; CHECK-NEXT: i32.add 1537; CHECK-NEXT: v128.load 0 1538; CHECK-NEXT: # fallthrough-return 1539 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1540 %v = load <4 x i32>, <4 x i32>* %s 1541 ret <4 x i32> %v 1542} 1543 1544define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { 1545; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: 1546; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1547; CHECK-NEXT: # %bb.0: 1548; CHECK-NEXT: local.get 0 1549; CHECK-NEXT: i32.const 4 1550; CHECK-NEXT: i32.add 1551; CHECK-NEXT: v128.load32_splat 0 1552; CHECK-NEXT: # fallthrough-return 1553 %s = getelementptr i32, i32* %p, i32 1 1554 %e = load i32, i32* %s 1555 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1556 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1557 ret <4 x i32> %v2 1558} 1559 1560define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1561; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset: 1562; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1563; CHECK-NEXT: # %bb.0: 1564; CHECK-NEXT: local.get 0 1565; CHECK-NEXT: i32.const 8 1566; CHECK-NEXT: i32.add 1567; CHECK-NEXT: i32x4.load16x4_s 0 1568; CHECK-NEXT: # fallthrough-return 1569 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1570 %v = load <4 x i16>, <4 x i16>* %s 1571 %v2 = sext <4 x i16> %v to <4 x i32> 1572 ret <4 x i32> %v2 1573} 1574 1575define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1576; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset: 1577; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1578; CHECK-NEXT: # %bb.0: 1579; CHECK-NEXT: local.get 0 1580; CHECK-NEXT: i32.const 8 1581; CHECK-NEXT: i32.add 1582; CHECK-NEXT: i32x4.load16x4_u 0 1583; CHECK-NEXT: # fallthrough-return 1584 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1585 %v = load <4 x i16>, <4 x i16>* %s 1586 %v2 = zext <4 x i16> %v to <4 x i32> 1587 ret <4 x i32> %v2 1588} 1589 1590define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1591; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: 1592; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1593; CHECK-NEXT: # %bb.0: 1594; CHECK-NEXT: local.get 0 1595; CHECK-NEXT: i32.const 8 1596; CHECK-NEXT: i32.add 1597; CHECK-NEXT: i32x4.load16x4_u 0 1598; CHECK-NEXT: # fallthrough-return 1599 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1600 %v = load <4 x i16>, <4 x i16>* %s 1601 ret <4 x i16> %v 1602} 1603 1604define <4 x i32> @load_v4i32_from_numeric_address() { 1605; CHECK-LABEL: load_v4i32_from_numeric_address: 1606; CHECK: .functype load_v4i32_from_numeric_address () -> (v128) 1607; CHECK-NEXT: # %bb.0: 1608; CHECK-NEXT: i32.const 0 1609; CHECK-NEXT: v128.load 32 1610; CHECK-NEXT: # fallthrough-return 1611 %s = inttoptr i32 32 to <4 x i32>* 1612 %v = load <4 x i32>, <4 x i32>* %s 1613 ret <4 x i32> %v 1614} 1615 1616define <4 x i32> @load_splat_v4i32_from_numeric_address() { 1617; CHECK-LABEL: load_splat_v4i32_from_numeric_address: 1618; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) 1619; CHECK-NEXT: # %bb.0: 1620; CHECK-NEXT: i32.const 0 1621; CHECK-NEXT: v128.load32_splat 32 1622; CHECK-NEXT: # fallthrough-return 1623 %s = inttoptr i32 32 to i32* 1624 %e = load i32, i32* %s 1625 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1626 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1627 ret <4 x i32> %v2 1628} 1629 1630define <4 x i32> @load_sext_v4i32_from_numeric_address() { 1631; CHECK-LABEL: load_sext_v4i32_from_numeric_address: 1632; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128) 1633; CHECK-NEXT: # %bb.0: 1634; CHECK-NEXT: i32.const 0 1635; CHECK-NEXT: i32x4.load16x4_s 32 1636; CHECK-NEXT: # fallthrough-return 1637 %s = inttoptr i32 32 to <4 x i16>* 1638 %v = load <4 x i16>, <4 x i16>* %s 1639 %v2 = sext <4 x i16> %v to <4 x i32> 1640 ret <4 x i32> %v2 1641} 1642 1643define <4 x i32> @load_zext_v4i32_from_numeric_address() { 1644; CHECK-LABEL: load_zext_v4i32_from_numeric_address: 1645; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128) 1646; CHECK-NEXT: # %bb.0: 1647; CHECK-NEXT: i32.const 0 1648; CHECK-NEXT: i32x4.load16x4_u 32 1649; CHECK-NEXT: # fallthrough-return 1650 %s = inttoptr i32 32 to <4 x i16>* 1651 %v = load <4 x i16>, <4 x i16>* %s 1652 %v2 = zext <4 x i16> %v to <4 x i32> 1653 ret <4 x i32> %v2 1654} 1655 1656define <4 x i16> @load_ext_v4i32_from_numeric_address() { 1657; CHECK-LABEL: load_ext_v4i32_from_numeric_address: 1658; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128) 1659; CHECK-NEXT: # %bb.0: 1660; CHECK-NEXT: i32.const 0 1661; CHECK-NEXT: i32x4.load16x4_u 32 1662; CHECK-NEXT: # fallthrough-return 1663 %s = inttoptr i32 32 to <4 x i16>* 1664 %v = load <4 x i16>, <4 x i16>* %s 1665 ret <4 x i16> %v 1666} 1667 1668@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42> 1669define <4 x i32> @load_v4i32_from_global_address() { 1670; CHECK-LABEL: load_v4i32_from_global_address: 1671; CHECK: .functype load_v4i32_from_global_address () -> (v128) 1672; CHECK-NEXT: # %bb.0: 1673; CHECK-NEXT: i32.const 0 1674; CHECK-NEXT: v128.load gv_v4i32 1675; CHECK-NEXT: # fallthrough-return 1676 %v = load <4 x i32>, <4 x i32>* @gv_v4i32 1677 ret <4 x i32> %v 1678} 1679 1680@gv_i32 = global i32 42 1681define <4 x i32> @load_splat_v4i32_from_global_address() { 1682; CHECK-LABEL: load_splat_v4i32_from_global_address: 1683; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) 1684; CHECK-NEXT: # %bb.0: 1685; CHECK-NEXT: i32.const 0 1686; CHECK-NEXT: v128.load32_splat gv_i32 1687; CHECK-NEXT: # fallthrough-return 1688 %e = load i32, i32* @gv_i32 1689 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1690 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1691 ret <4 x i32> %v2 1692} 1693 1694@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42> 1695define <4 x i32> @load_sext_v4i32_from_global_address() { 1696; CHECK-LABEL: load_sext_v4i32_from_global_address: 1697; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128) 1698; CHECK-NEXT: # %bb.0: 1699; CHECK-NEXT: i32.const 0 1700; CHECK-NEXT: i32x4.load16x4_s gv_v4i16 1701; CHECK-NEXT: # fallthrough-return 1702 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1703 %v2 = sext <4 x i16> %v to <4 x i32> 1704 ret <4 x i32> %v2 1705} 1706 1707define <4 x i32> @load_zext_v4i32_from_global_address() { 1708; CHECK-LABEL: load_zext_v4i32_from_global_address: 1709; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128) 1710; CHECK-NEXT: # %bb.0: 1711; CHECK-NEXT: i32.const 0 1712; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1713; CHECK-NEXT: # fallthrough-return 1714 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1715 %v2 = zext <4 x i16> %v to <4 x i32> 1716 ret <4 x i32> %v2 1717} 1718 1719define <4 x i16> @load_ext_v4i32_from_global_address() { 1720; CHECK-LABEL: load_ext_v4i32_from_global_address: 1721; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128) 1722; CHECK-NEXT: # %bb.0: 1723; CHECK-NEXT: i32.const 0 1724; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1725; CHECK-NEXT: # fallthrough-return 1726 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1727 ret <4 x i16> %v 1728} 1729 1730define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) { 1731; CHECK-LABEL: store_v4i32: 1732; CHECK: .functype store_v4i32 (v128, i32) -> () 1733; CHECK-NEXT: # %bb.0: 1734; CHECK-NEXT: local.get 1 1735; CHECK-NEXT: local.get 0 1736; CHECK-NEXT: v128.store 0 1737; CHECK-NEXT: # fallthrough-return 1738 store <4 x i32> %v , <4 x i32>* %p 1739 ret void 1740} 1741 1742define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) { 1743; CHECK-LABEL: store_narrowing_v4i32: 1744; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> () 1745; CHECK-NEXT: # %bb.0: 1746; CHECK-NEXT: local.get 1 1747; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1748; CHECK-NEXT: local.get 0 1749; CHECK-NEXT: v128.and 1750; CHECK-NEXT: local.get 0 1751; CHECK-NEXT: i16x8.narrow_i32x4_u 1752; CHECK-NEXT: i64x2.extract_lane 0 1753; CHECK-NEXT: i64.store 0 1754; CHECK-NEXT: # fallthrough-return 1755 store <4 x i16> %v , <4 x i16>* %p 1756 ret void 1757} 1758 1759define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { 1760; CHECK-LABEL: store_v4i32_with_folded_offset: 1761; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () 1762; CHECK-NEXT: # %bb.0: 1763; CHECK-NEXT: local.get 1 1764; CHECK-NEXT: local.get 0 1765; CHECK-NEXT: v128.store 16 1766; CHECK-NEXT: # fallthrough-return 1767 %q = ptrtoint <4 x i32>* %p to i32 1768 %r = add nuw i32 %q, 16 1769 %s = inttoptr i32 %r to <4 x i32>* 1770 store <4 x i32> %v , <4 x i32>* %s 1771 ret void 1772} 1773 1774define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) { 1775; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset: 1776; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> () 1777; CHECK-NEXT: # %bb.0: 1778; CHECK-NEXT: local.get 1 1779; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1780; CHECK-NEXT: local.get 0 1781; CHECK-NEXT: v128.and 1782; CHECK-NEXT: local.get 0 1783; CHECK-NEXT: i16x8.narrow_i32x4_u 1784; CHECK-NEXT: i64x2.extract_lane 0 1785; CHECK-NEXT: i64.store 16 1786; CHECK-NEXT: # fallthrough-return 1787 %q = ptrtoint <4 x i16>* %p to i32 1788 %r = add nuw i32 %q, 16 1789 %s = inttoptr i32 %r to <4 x i16>* 1790 store <4 x i16> %v , <4 x i16>* %s 1791 ret void 1792} 1793 1794define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1795; CHECK-LABEL: store_v4i32_with_folded_gep_offset: 1796; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () 1797; CHECK-NEXT: # %bb.0: 1798; CHECK-NEXT: local.get 1 1799; CHECK-NEXT: local.get 0 1800; CHECK-NEXT: v128.store 16 1801; CHECK-NEXT: # fallthrough-return 1802 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1803 store <4 x i32> %v , <4 x i32>* %s 1804 ret void 1805} 1806 1807define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { 1808; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset: 1809; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> () 1810; CHECK-NEXT: # %bb.0: 1811; CHECK-NEXT: local.get 1 1812; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1813; CHECK-NEXT: local.get 0 1814; CHECK-NEXT: v128.and 1815; CHECK-NEXT: local.get 0 1816; CHECK-NEXT: i16x8.narrow_i32x4_u 1817; CHECK-NEXT: i64x2.extract_lane 0 1818; CHECK-NEXT: i64.store 8 1819; CHECK-NEXT: # fallthrough-return 1820 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1821 store <4 x i16> %v , <4 x i16>* %s 1822 ret void 1823} 1824 1825define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { 1826; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: 1827; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1828; CHECK-NEXT: # %bb.0: 1829; CHECK-NEXT: local.get 1 1830; CHECK-NEXT: i32.const -16 1831; CHECK-NEXT: i32.add 1832; CHECK-NEXT: local.get 0 1833; CHECK-NEXT: v128.store 0 1834; CHECK-NEXT: # fallthrough-return 1835 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1836 store <4 x i32> %v , <4 x i32>* %s 1837 ret void 1838} 1839 1840define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) { 1841; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset: 1842; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1843; CHECK-NEXT: # %bb.0: 1844; CHECK-NEXT: local.get 1 1845; CHECK-NEXT: i32.const -8 1846; CHECK-NEXT: i32.add 1847; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1848; CHECK-NEXT: local.get 0 1849; CHECK-NEXT: v128.and 1850; CHECK-NEXT: local.get 0 1851; CHECK-NEXT: i16x8.narrow_i32x4_u 1852; CHECK-NEXT: i64x2.extract_lane 0 1853; CHECK-NEXT: i64.store 0 1854; CHECK-NEXT: # fallthrough-return 1855 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1856 store <4 x i16> %v , <4 x i16>* %s 1857 ret void 1858} 1859 1860define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { 1861; CHECK-LABEL: store_v4i32_with_unfolded_offset: 1862; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () 1863; CHECK-NEXT: # %bb.0: 1864; CHECK-NEXT: local.get 1 1865; CHECK-NEXT: i32.const 16 1866; CHECK-NEXT: i32.add 1867; CHECK-NEXT: local.get 0 1868; CHECK-NEXT: v128.store 0 1869; CHECK-NEXT: # fallthrough-return 1870 %q = ptrtoint <4 x i32>* %p to i32 1871 %r = add nsw i32 %q, 16 1872 %s = inttoptr i32 %r to <4 x i32>* 1873 store <4 x i32> %v , <4 x i32>* %s 1874 ret void 1875} 1876 1877define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) { 1878; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset: 1879; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> () 1880; CHECK-NEXT: # %bb.0: 1881; CHECK-NEXT: local.get 1 1882; CHECK-NEXT: i32.const 16 1883; CHECK-NEXT: i32.add 1884; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1885; CHECK-NEXT: local.get 0 1886; CHECK-NEXT: v128.and 1887; CHECK-NEXT: local.get 0 1888; CHECK-NEXT: i16x8.narrow_i32x4_u 1889; CHECK-NEXT: i64x2.extract_lane 0 1890; CHECK-NEXT: i64.store 0 1891; CHECK-NEXT: # fallthrough-return 1892 %q = ptrtoint <4 x i16>* %p to i32 1893 %r = add nsw i32 %q, 16 1894 %s = inttoptr i32 %r to <4 x i16>* 1895 store <4 x i16> %v , <4 x i16>* %s 1896 ret void 1897} 1898 1899define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1900; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: 1901; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1902; CHECK-NEXT: # %bb.0: 1903; CHECK-NEXT: local.get 1 1904; CHECK-NEXT: i32.const 16 1905; CHECK-NEXT: i32.add 1906; CHECK-NEXT: local.get 0 1907; CHECK-NEXT: v128.store 0 1908; CHECK-NEXT: # fallthrough-return 1909 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1910 store <4 x i32> %v , <4 x i32>* %s 1911 ret void 1912} 1913 1914define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { 1915; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset: 1916; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1917; CHECK-NEXT: # %bb.0: 1918; CHECK-NEXT: local.get 1 1919; CHECK-NEXT: i32.const 8 1920; CHECK-NEXT: i32.add 1921; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1922; CHECK-NEXT: local.get 0 1923; CHECK-NEXT: v128.and 1924; CHECK-NEXT: local.get 0 1925; CHECK-NEXT: i16x8.narrow_i32x4_u 1926; CHECK-NEXT: i64x2.extract_lane 0 1927; CHECK-NEXT: i64.store 0 1928; CHECK-NEXT: # fallthrough-return 1929 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1930 store <4 x i16> %v , <4 x i16>* %s 1931 ret void 1932} 1933 1934define void @store_v4i32_to_numeric_address(<4 x i32> %v) { 1935; CHECK-LABEL: store_v4i32_to_numeric_address: 1936; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () 1937; CHECK-NEXT: # %bb.0: 1938; CHECK-NEXT: i32.const 0 1939; CHECK-NEXT: local.get 0 1940; CHECK-NEXT: v128.store 32 1941; CHECK-NEXT: # fallthrough-return 1942 %s = inttoptr i32 32 to <4 x i32>* 1943 store <4 x i32> %v , <4 x i32>* %s 1944 ret void 1945} 1946 1947define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) { 1948; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address: 1949; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> () 1950; CHECK-NEXT: # %bb.0: 1951; CHECK-NEXT: i32.const 0 1952; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1953; CHECK-NEXT: local.get 0 1954; CHECK-NEXT: v128.and 1955; CHECK-NEXT: local.get 0 1956; CHECK-NEXT: i16x8.narrow_i32x4_u 1957; CHECK-NEXT: i64x2.extract_lane 0 1958; CHECK-NEXT: i64.store 32 1959; CHECK-NEXT: # fallthrough-return 1960 %s = inttoptr i32 32 to <4 x i16>* 1961 store <4 x i16> %v , <4 x i16>* %s 1962 ret void 1963} 1964 1965define void @store_v4i32_to_global_address(<4 x i32> %v) { 1966; CHECK-LABEL: store_v4i32_to_global_address: 1967; CHECK: .functype store_v4i32_to_global_address (v128) -> () 1968; CHECK-NEXT: # %bb.0: 1969; CHECK-NEXT: i32.const 0 1970; CHECK-NEXT: local.get 0 1971; CHECK-NEXT: v128.store gv_v4i32 1972; CHECK-NEXT: # fallthrough-return 1973 store <4 x i32> %v , <4 x i32>* @gv_v4i32 1974 ret void 1975} 1976 1977define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) { 1978; CHECK-LABEL: store_narrowing_v4i32_to_global_address: 1979; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> () 1980; CHECK-NEXT: # %bb.0: 1981; CHECK-NEXT: i32.const 0 1982; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1983; CHECK-NEXT: local.get 0 1984; CHECK-NEXT: v128.and 1985; CHECK-NEXT: local.get 0 1986; CHECK-NEXT: i16x8.narrow_i32x4_u 1987; CHECK-NEXT: i64x2.extract_lane 0 1988; CHECK-NEXT: i64.store gv_v4i16 1989; CHECK-NEXT: # fallthrough-return 1990 store <4 x i16> %v , <4 x i16>* @gv_v4i16 1991 ret void 1992} 1993 1994; ============================================================================== 1995; 2 x i64 1996; ============================================================================== 1997define <2 x i64> @load_v2i64(<2 x i64>* %p) { 1998; CHECK-LABEL: load_v2i64: 1999; CHECK: .functype load_v2i64 (i32) -> (v128) 2000; CHECK-NEXT: # %bb.0: 2001; CHECK-NEXT: local.get 0 2002; CHECK-NEXT: v128.load 0 2003; CHECK-NEXT: # fallthrough-return 2004 %v = load <2 x i64>, <2 x i64>* %p 2005 ret <2 x i64> %v 2006} 2007 2008define <2 x i64> @load_splat_v2i64(i64* %p) { 2009; CHECK-LABEL: load_splat_v2i64: 2010; CHECK: .functype load_splat_v2i64 (i32) -> (v128) 2011; CHECK-NEXT: # %bb.0: 2012; CHECK-NEXT: local.get 0 2013; CHECK-NEXT: v128.load64_splat 0 2014; CHECK-NEXT: # fallthrough-return 2015 %e = load i64, i64* %p 2016 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2017 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2018 ret <2 x i64> %v2 2019} 2020 2021define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) { 2022; CHECK-LABEL: load_sext_v2i64: 2023; CHECK: .functype load_sext_v2i64 (i32) -> (v128) 2024; CHECK-NEXT: # %bb.0: 2025; CHECK-NEXT: local.get 0 2026; CHECK-NEXT: i64x2.load32x2_s 0 2027; CHECK-NEXT: # fallthrough-return 2028 %v = load <2 x i32>, <2 x i32>* %p 2029 %v2 = sext <2 x i32> %v to <2 x i64> 2030 ret <2 x i64> %v2 2031} 2032 2033define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) { 2034; CHECK-LABEL: load_zext_v2i64: 2035; CHECK: .functype load_zext_v2i64 (i32) -> (v128) 2036; CHECK-NEXT: # %bb.0: 2037; CHECK-NEXT: local.get 0 2038; CHECK-NEXT: i64x2.load32x2_u 0 2039; CHECK-NEXT: # fallthrough-return 2040 %v = load <2 x i32>, <2 x i32>* %p 2041 %v2 = zext <2 x i32> %v to <2 x i64> 2042 ret <2 x i64> %v2 2043} 2044 2045define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) { 2046; CHECK-LABEL: load_ext_v2i64: 2047; CHECK: .functype load_ext_v2i64 (i32) -> (v128) 2048; CHECK-NEXT: # %bb.0: 2049; CHECK-NEXT: local.get 0 2050; CHECK-NEXT: i64x2.load32x2_u 0 2051; CHECK-NEXT: # fallthrough-return 2052 %v = load <2 x i32>, <2 x i32>* %p 2053 ret <2 x i32> %v 2054} 2055 2056define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { 2057; CHECK-LABEL: load_v2i64_with_folded_offset: 2058; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128) 2059; CHECK-NEXT: # %bb.0: 2060; CHECK-NEXT: local.get 0 2061; CHECK-NEXT: v128.load 16 2062; CHECK-NEXT: # fallthrough-return 2063 %q = ptrtoint <2 x i64>* %p to i32 2064 %r = add nuw i32 %q, 16 2065 %s = inttoptr i32 %r to <2 x i64>* 2066 %v = load <2 x i64>, <2 x i64>* %s 2067 ret <2 x i64> %v 2068} 2069 2070define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { 2071; CHECK-LABEL: load_splat_v2i64_with_folded_offset: 2072; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) 2073; CHECK-NEXT: # %bb.0: 2074; CHECK-NEXT: local.get 0 2075; CHECK-NEXT: v128.load64_splat 16 2076; CHECK-NEXT: # fallthrough-return 2077 %q = ptrtoint i64* %p to i32 2078 %r = add nuw i32 %q, 16 2079 %s = inttoptr i32 %r to i64* 2080 %e = load i64, i64* %s 2081 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2082 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2083 ret <2 x i64> %v2 2084} 2085 2086define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) { 2087; CHECK-LABEL: load_sext_v2i64_with_folded_offset: 2088; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128) 2089; CHECK-NEXT: # %bb.0: 2090; CHECK-NEXT: local.get 0 2091; CHECK-NEXT: i64x2.load32x2_s 16 2092; CHECK-NEXT: # fallthrough-return 2093 %q = ptrtoint <2 x i32>* %p to i32 2094 %r = add nuw i32 %q, 16 2095 %s = inttoptr i32 %r to <2 x i32>* 2096 %v = load <2 x i32>, <2 x i32>* %s 2097 %v2 = sext <2 x i32> %v to <2 x i64> 2098 ret <2 x i64> %v2 2099} 2100 2101define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) { 2102; CHECK-LABEL: load_zext_v2i64_with_folded_offset: 2103; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128) 2104; CHECK-NEXT: # %bb.0: 2105; CHECK-NEXT: local.get 0 2106; CHECK-NEXT: i64x2.load32x2_u 16 2107; CHECK-NEXT: # fallthrough-return 2108 %q = ptrtoint <2 x i32>* %p to i32 2109 %r = add nuw i32 %q, 16 2110 %s = inttoptr i32 %r to <2 x i32>* 2111 %v = load <2 x i32>, <2 x i32>* %s 2112 %v2 = zext <2 x i32> %v to <2 x i64> 2113 ret <2 x i64> %v2 2114} 2115 2116define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) { 2117; CHECK-LABEL: load_ext_v2i64_with_folded_offset: 2118; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128) 2119; CHECK-NEXT: # %bb.0: 2120; CHECK-NEXT: local.get 0 2121; CHECK-NEXT: i64x2.load32x2_u 16 2122; CHECK-NEXT: # fallthrough-return 2123 %q = ptrtoint <2 x i32>* %p to i32 2124 %r = add nuw i32 %q, 16 2125 %s = inttoptr i32 %r to <2 x i32>* 2126 %v = load <2 x i32>, <2 x i32>* %s 2127 ret <2 x i32> %v 2128} 2129 2130define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) { 2131; CHECK-LABEL: load_v2i64_with_folded_gep_offset: 2132; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128) 2133; CHECK-NEXT: # %bb.0: 2134; CHECK-NEXT: local.get 0 2135; CHECK-NEXT: v128.load 16 2136; CHECK-NEXT: # fallthrough-return 2137 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2138 %v = load <2 x i64>, <2 x i64>* %s 2139 ret <2 x i64> %v 2140} 2141 2142define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { 2143; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: 2144; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) 2145; CHECK-NEXT: # %bb.0: 2146; CHECK-NEXT: local.get 0 2147; CHECK-NEXT: v128.load64_splat 8 2148; CHECK-NEXT: # fallthrough-return 2149 %s = getelementptr inbounds i64, i64* %p, i32 1 2150 %e = load i64, i64* %s 2151 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2152 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2153 ret <2 x i64> %v2 2154} 2155 2156define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2157; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: 2158; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128) 2159; CHECK-NEXT: # %bb.0: 2160; CHECK-NEXT: local.get 0 2161; CHECK-NEXT: i64x2.load32x2_s 8 2162; CHECK-NEXT: # fallthrough-return 2163 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2164 %v = load <2 x i32>, <2 x i32>* %s 2165 %v2 = sext <2 x i32> %v to <2 x i64> 2166 ret <2 x i64> %v2 2167} 2168 2169define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2170; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: 2171; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128) 2172; CHECK-NEXT: # %bb.0: 2173; CHECK-NEXT: local.get 0 2174; CHECK-NEXT: i64x2.load32x2_u 8 2175; CHECK-NEXT: # fallthrough-return 2176 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2177 %v = load <2 x i32>, <2 x i32>* %s 2178 %v2 = zext <2 x i32> %v to <2 x i64> 2179 ret <2 x i64> %v2 2180} 2181 2182define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2183; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: 2184; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128) 2185; CHECK-NEXT: # %bb.0: 2186; CHECK-NEXT: local.get 0 2187; CHECK-NEXT: i64x2.load32x2_u 8 2188; CHECK-NEXT: # fallthrough-return 2189 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2190 %v = load <2 x i32>, <2 x i32>* %s 2191 ret <2 x i32> %v 2192} 2193 2194define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) { 2195; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: 2196; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2197; CHECK-NEXT: # %bb.0: 2198; CHECK-NEXT: local.get 0 2199; CHECK-NEXT: i32.const -16 2200; CHECK-NEXT: i32.add 2201; CHECK-NEXT: v128.load 0 2202; CHECK-NEXT: # fallthrough-return 2203 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2204 %v = load <2 x i64>, <2 x i64>* %s 2205 ret <2 x i64> %v 2206} 2207 2208define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { 2209; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: 2210; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2211; CHECK-NEXT: # %bb.0: 2212; CHECK-NEXT: local.get 0 2213; CHECK-NEXT: i32.const -8 2214; CHECK-NEXT: i32.add 2215; CHECK-NEXT: v128.load64_splat 0 2216; CHECK-NEXT: # fallthrough-return 2217 %s = getelementptr inbounds i64, i64* %p, i32 -1 2218 %e = load i64, i64* %s 2219 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2220 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2221 ret <2 x i64> %v2 2222} 2223 2224define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2225; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: 2226; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2227; CHECK-NEXT: # %bb.0: 2228; CHECK-NEXT: local.get 0 2229; CHECK-NEXT: i32.const -8 2230; CHECK-NEXT: i32.add 2231; CHECK-NEXT: i64x2.load32x2_s 0 2232; CHECK-NEXT: # fallthrough-return 2233 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2234 %v = load <2 x i32>, <2 x i32>* %s 2235 %v2 = sext <2 x i32> %v to <2 x i64> 2236 ret <2 x i64> %v2 2237} 2238 2239define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2240; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: 2241; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2242; CHECK-NEXT: # %bb.0: 2243; CHECK-NEXT: local.get 0 2244; CHECK-NEXT: i32.const -8 2245; CHECK-NEXT: i32.add 2246; CHECK-NEXT: i64x2.load32x2_u 0 2247; CHECK-NEXT: # fallthrough-return 2248 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2249 %v = load <2 x i32>, <2 x i32>* %s 2250 %v2 = zext <2 x i32> %v to <2 x i64> 2251 ret <2 x i64> %v2 2252} 2253 2254define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2255; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: 2256; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2257; CHECK-NEXT: # %bb.0: 2258; CHECK-NEXT: local.get 0 2259; CHECK-NEXT: i32.const -8 2260; CHECK-NEXT: i32.add 2261; CHECK-NEXT: i64x2.load32x2_u 0 2262; CHECK-NEXT: # fallthrough-return 2263 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2264 %v = load <2 x i32>, <2 x i32>* %s 2265 ret <2 x i32> %v 2266} 2267 2268define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { 2269; CHECK-LABEL: load_v2i64_with_unfolded_offset: 2270; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128) 2271; CHECK-NEXT: # %bb.0: 2272; CHECK-NEXT: local.get 0 2273; CHECK-NEXT: i32.const 16 2274; CHECK-NEXT: i32.add 2275; CHECK-NEXT: v128.load 0 2276; CHECK-NEXT: # fallthrough-return 2277 %q = ptrtoint <2 x i64>* %p to i32 2278 %r = add nsw i32 %q, 16 2279 %s = inttoptr i32 %r to <2 x i64>* 2280 %v = load <2 x i64>, <2 x i64>* %s 2281 ret <2 x i64> %v 2282} 2283 2284define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { 2285; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: 2286; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128) 2287; CHECK-NEXT: # %bb.0: 2288; CHECK-NEXT: local.get 0 2289; CHECK-NEXT: i32.const 16 2290; CHECK-NEXT: i32.add 2291; CHECK-NEXT: v128.load64_splat 0 2292; CHECK-NEXT: # fallthrough-return 2293 %q = ptrtoint i64* %p to i32 2294 %r = add nsw i32 %q, 16 2295 %s = inttoptr i32 %r to i64* 2296 %e = load i64, i64* %s 2297 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2298 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2299 ret <2 x i64> %v2 2300} 2301 2302define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2303; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: 2304; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128) 2305; CHECK-NEXT: # %bb.0: 2306; CHECK-NEXT: local.get 0 2307; CHECK-NEXT: i32.const 16 2308; CHECK-NEXT: i32.add 2309; CHECK-NEXT: i64x2.load32x2_s 0 2310; CHECK-NEXT: # fallthrough-return 2311 %q = ptrtoint <2 x i32>* %p to i32 2312 %r = add nsw i32 %q, 16 2313 %s = inttoptr i32 %r to <2 x i32>* 2314 %v = load <2 x i32>, <2 x i32>* %s 2315 %v2 = sext <2 x i32> %v to <2 x i64> 2316 ret <2 x i64> %v2 2317} 2318 2319define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2320; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: 2321; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128) 2322; CHECK-NEXT: # %bb.0: 2323; CHECK-NEXT: local.get 0 2324; CHECK-NEXT: i32.const 16 2325; CHECK-NEXT: i32.add 2326; CHECK-NEXT: i64x2.load32x2_u 0 2327; CHECK-NEXT: # fallthrough-return 2328 %q = ptrtoint <2 x i32>* %p to i32 2329 %r = add nsw i32 %q, 16 2330 %s = inttoptr i32 %r to <2 x i32>* 2331 %v = load <2 x i32>, <2 x i32>* %s 2332 %v2 = zext <2 x i32> %v to <2 x i64> 2333 ret <2 x i64> %v2 2334} 2335 2336define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2337; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: 2338; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128) 2339; CHECK-NEXT: # %bb.0: 2340; CHECK-NEXT: local.get 0 2341; CHECK-NEXT: i32.const 16 2342; CHECK-NEXT: i32.add 2343; CHECK-NEXT: i64x2.load32x2_u 0 2344; CHECK-NEXT: # fallthrough-return 2345 %q = ptrtoint <2 x i32>* %p to i32 2346 %r = add nsw i32 %q, 16 2347 %s = inttoptr i32 %r to <2 x i32>* 2348 %v = load <2 x i32>, <2 x i32>* %s 2349 ret <2 x i32> %v 2350} 2351 2352define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) { 2353; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: 2354; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2355; CHECK-NEXT: # %bb.0: 2356; CHECK-NEXT: local.get 0 2357; CHECK-NEXT: i32.const 16 2358; CHECK-NEXT: i32.add 2359; CHECK-NEXT: v128.load 0 2360; CHECK-NEXT: # fallthrough-return 2361 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2362 %v = load <2 x i64>, <2 x i64>* %s 2363 ret <2 x i64> %v 2364} 2365 2366define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { 2367; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: 2368; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2369; CHECK-NEXT: # %bb.0: 2370; CHECK-NEXT: local.get 0 2371; CHECK-NEXT: i32.const 8 2372; CHECK-NEXT: i32.add 2373; CHECK-NEXT: v128.load64_splat 0 2374; CHECK-NEXT: # fallthrough-return 2375 %s = getelementptr i64, i64* %p, i32 1 2376 %e = load i64, i64* %s 2377 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2378 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2379 ret <2 x i64> %v2 2380} 2381 2382define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2383; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: 2384; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2385; CHECK-NEXT: # %bb.0: 2386; CHECK-NEXT: local.get 0 2387; CHECK-NEXT: i32.const 8 2388; CHECK-NEXT: i32.add 2389; CHECK-NEXT: i64x2.load32x2_s 0 2390; CHECK-NEXT: # fallthrough-return 2391 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2392 %v = load <2 x i32>, <2 x i32>* %s 2393 %v2 = sext <2 x i32> %v to <2 x i64> 2394 ret <2 x i64> %v2 2395} 2396 2397define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2398; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: 2399; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2400; CHECK-NEXT: # %bb.0: 2401; CHECK-NEXT: local.get 0 2402; CHECK-NEXT: i32.const 8 2403; CHECK-NEXT: i32.add 2404; CHECK-NEXT: i64x2.load32x2_u 0 2405; CHECK-NEXT: # fallthrough-return 2406 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2407 %v = load <2 x i32>, <2 x i32>* %s 2408 %v2 = zext <2 x i32> %v to <2 x i64> 2409 ret <2 x i64> %v2 2410} 2411 2412define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2413; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: 2414; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2415; CHECK-NEXT: # %bb.0: 2416; CHECK-NEXT: local.get 0 2417; CHECK-NEXT: i32.const 8 2418; CHECK-NEXT: i32.add 2419; CHECK-NEXT: i64x2.load32x2_u 0 2420; CHECK-NEXT: # fallthrough-return 2421 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2422 %v = load <2 x i32>, <2 x i32>* %s 2423 ret <2 x i32> %v 2424} 2425 2426define <2 x i64> @load_v2i64_from_numeric_address() { 2427; CHECK-LABEL: load_v2i64_from_numeric_address: 2428; CHECK: .functype load_v2i64_from_numeric_address () -> (v128) 2429; CHECK-NEXT: # %bb.0: 2430; CHECK-NEXT: i32.const 0 2431; CHECK-NEXT: v128.load 32 2432; CHECK-NEXT: # fallthrough-return 2433 %s = inttoptr i32 32 to <2 x i64>* 2434 %v = load <2 x i64>, <2 x i64>* %s 2435 ret <2 x i64> %v 2436} 2437 2438define <2 x i64> @load_splat_v2i64_from_numeric_address() { 2439; CHECK-LABEL: load_splat_v2i64_from_numeric_address: 2440; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) 2441; CHECK-NEXT: # %bb.0: 2442; CHECK-NEXT: i32.const 0 2443; CHECK-NEXT: v128.load64_splat 32 2444; CHECK-NEXT: # fallthrough-return 2445 %s = inttoptr i32 32 to i64* 2446 %e = load i64, i64* %s 2447 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2448 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2449 ret <2 x i64> %v2 2450} 2451 2452define <2 x i64> @load_sext_v2i64_from_numeric_address() { 2453; CHECK-LABEL: load_sext_v2i64_from_numeric_address: 2454; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128) 2455; CHECK-NEXT: # %bb.0: 2456; CHECK-NEXT: i32.const 0 2457; CHECK-NEXT: i64x2.load32x2_s 32 2458; CHECK-NEXT: # fallthrough-return 2459 %s = inttoptr i32 32 to <2 x i32>* 2460 %v = load <2 x i32>, <2 x i32>* %s 2461 %v2 = sext <2 x i32> %v to <2 x i64> 2462 ret <2 x i64> %v2 2463} 2464 2465define <2 x i64> @load_zext_v2i64_from_numeric_address() { 2466; CHECK-LABEL: load_zext_v2i64_from_numeric_address: 2467; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128) 2468; CHECK-NEXT: # %bb.0: 2469; CHECK-NEXT: i32.const 0 2470; CHECK-NEXT: i64x2.load32x2_u 32 2471; CHECK-NEXT: # fallthrough-return 2472 %s = inttoptr i32 32 to <2 x i32>* 2473 %v = load <2 x i32>, <2 x i32>* %s 2474 %v2 = zext <2 x i32> %v to <2 x i64> 2475 ret <2 x i64> %v2 2476} 2477 2478define <2 x i32> @load_ext_v2i64_from_numeric_address() { 2479; CHECK-LABEL: load_ext_v2i64_from_numeric_address: 2480; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128) 2481; CHECK-NEXT: # %bb.0: 2482; CHECK-NEXT: i32.const 0 2483; CHECK-NEXT: i64x2.load32x2_u 32 2484; CHECK-NEXT: # fallthrough-return 2485 %s = inttoptr i32 32 to <2 x i32>* 2486 %v = load <2 x i32>, <2 x i32>* %s 2487 ret <2 x i32> %v 2488} 2489 2490@gv_v2i64 = global <2 x i64> <i64 42, i64 42> 2491define <2 x i64> @load_v2i64_from_global_address() { 2492; CHECK-LABEL: load_v2i64_from_global_address: 2493; CHECK: .functype load_v2i64_from_global_address () -> (v128) 2494; CHECK-NEXT: # %bb.0: 2495; CHECK-NEXT: i32.const 0 2496; CHECK-NEXT: v128.load gv_v2i64 2497; CHECK-NEXT: # fallthrough-return 2498 %v = load <2 x i64>, <2 x i64>* @gv_v2i64 2499 ret <2 x i64> %v 2500} 2501 2502@gv_i64 = global i64 42 2503define <2 x i64> @load_splat_v2i64_from_global_address() { 2504; CHECK-LABEL: load_splat_v2i64_from_global_address: 2505; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) 2506; CHECK-NEXT: # %bb.0: 2507; CHECK-NEXT: i32.const 0 2508; CHECK-NEXT: v128.load64_splat gv_i64 2509; CHECK-NEXT: # fallthrough-return 2510 %e = load i64, i64* @gv_i64 2511 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2512 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2513 ret <2 x i64> %v2 2514} 2515 2516@gv_v2i32 = global <2 x i32> <i32 42, i32 42> 2517define <2 x i64> @load_sext_v2i64_from_global_address() { 2518; CHECK-LABEL: load_sext_v2i64_from_global_address: 2519; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128) 2520; CHECK-NEXT: # %bb.0: 2521; CHECK-NEXT: i32.const 0 2522; CHECK-NEXT: i64x2.load32x2_s gv_v2i32 2523; CHECK-NEXT: # fallthrough-return 2524 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2525 %v2 = sext <2 x i32> %v to <2 x i64> 2526 ret <2 x i64> %v2 2527} 2528 2529define <2 x i64> @load_zext_v2i64_from_global_address() { 2530; CHECK-LABEL: load_zext_v2i64_from_global_address: 2531; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128) 2532; CHECK-NEXT: # %bb.0: 2533; CHECK-NEXT: i32.const 0 2534; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2535; CHECK-NEXT: # fallthrough-return 2536 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2537 %v2 = zext <2 x i32> %v to <2 x i64> 2538 ret <2 x i64> %v2 2539} 2540 2541define <2 x i32> @load_ext_v2i64_from_global_address() { 2542; CHECK-LABEL: load_ext_v2i64_from_global_address: 2543; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128) 2544; CHECK-NEXT: # %bb.0: 2545; CHECK-NEXT: i32.const 0 2546; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2547; CHECK-NEXT: # fallthrough-return 2548 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2549 ret <2 x i32> %v 2550} 2551 2552define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) { 2553; CHECK-LABEL: store_v2i64: 2554; CHECK: .functype store_v2i64 (v128, i32) -> () 2555; CHECK-NEXT: # %bb.0: 2556; CHECK-NEXT: local.get 1 2557; CHECK-NEXT: local.get 0 2558; CHECK-NEXT: v128.store 0 2559; CHECK-NEXT: # fallthrough-return 2560 store <2 x i64> %v , <2 x i64>* %p 2561 ret void 2562} 2563 2564define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { 2565; CHECK-LABEL: store_v2i64_with_folded_offset: 2566; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> () 2567; CHECK-NEXT: # %bb.0: 2568; CHECK-NEXT: local.get 1 2569; CHECK-NEXT: local.get 0 2570; CHECK-NEXT: v128.store 16 2571; CHECK-NEXT: # fallthrough-return 2572 %q = ptrtoint <2 x i64>* %p to i32 2573 %r = add nuw i32 %q, 16 2574 %s = inttoptr i32 %r to <2 x i64>* 2575 store <2 x i64> %v , <2 x i64>* %s 2576 ret void 2577} 2578 2579define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2580; CHECK-LABEL: store_v2i64_with_folded_gep_offset: 2581; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> () 2582; CHECK-NEXT: # %bb.0: 2583; CHECK-NEXT: local.get 1 2584; CHECK-NEXT: local.get 0 2585; CHECK-NEXT: v128.store 16 2586; CHECK-NEXT: # fallthrough-return 2587 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2588 store <2 x i64> %v , <2 x i64>* %s 2589 ret void 2590} 2591 2592define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) { 2593; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: 2594; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> () 2595; CHECK-NEXT: # %bb.0: 2596; CHECK-NEXT: local.get 1 2597; CHECK-NEXT: i32.const -16 2598; CHECK-NEXT: i32.add 2599; CHECK-NEXT: local.get 0 2600; CHECK-NEXT: v128.store 0 2601; CHECK-NEXT: # fallthrough-return 2602 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2603 store <2 x i64> %v , <2 x i64>* %s 2604 ret void 2605} 2606 2607define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) { 2608; CHECK-LABEL: store_v2i64_with_unfolded_offset: 2609; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> () 2610; CHECK-NEXT: # %bb.0: 2611; CHECK-NEXT: local.get 1 2612; CHECK-NEXT: i32.const 16 2613; CHECK-NEXT: i32.add 2614; CHECK-NEXT: local.get 0 2615; CHECK-NEXT: v128.store 0 2616; CHECK-NEXT: # fallthrough-return 2617 %q = ptrtoint <2 x i64>* %p to i32 2618 %r = add nsw i32 %q, 16 2619 %s = inttoptr i32 %r to <2 x i64>* 2620 store <2 x i64> %v , <2 x i64>* %s 2621 ret void 2622} 2623 2624define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2625; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: 2626; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> () 2627; CHECK-NEXT: # %bb.0: 2628; CHECK-NEXT: local.get 1 2629; CHECK-NEXT: i32.const 16 2630; CHECK-NEXT: i32.add 2631; CHECK-NEXT: local.get 0 2632; CHECK-NEXT: v128.store 0 2633; CHECK-NEXT: # fallthrough-return 2634 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2635 store <2 x i64> %v , <2 x i64>* %s 2636 ret void 2637} 2638 2639define void @store_v2i64_to_numeric_address(<2 x i64> %v) { 2640; CHECK-LABEL: store_v2i64_to_numeric_address: 2641; CHECK: .functype store_v2i64_to_numeric_address (v128) -> () 2642; CHECK-NEXT: # %bb.0: 2643; CHECK-NEXT: i32.const 0 2644; CHECK-NEXT: local.get 0 2645; CHECK-NEXT: v128.store 32 2646; CHECK-NEXT: # fallthrough-return 2647 %s = inttoptr i32 32 to <2 x i64>* 2648 store <2 x i64> %v , <2 x i64>* %s 2649 ret void 2650} 2651 2652define void @store_v2i64_to_global_address(<2 x i64> %v) { 2653; CHECK-LABEL: store_v2i64_to_global_address: 2654; CHECK: .functype store_v2i64_to_global_address (v128) -> () 2655; CHECK-NEXT: # %bb.0: 2656; CHECK-NEXT: i32.const 0 2657; CHECK-NEXT: local.get 0 2658; CHECK-NEXT: v128.store gv_v2i64 2659; CHECK-NEXT: # fallthrough-return 2660 store <2 x i64> %v , <2 x i64>* @gv_v2i64 2661 ret void 2662} 2663 2664; ============================================================================== 2665; 4 x float 2666; ============================================================================== 2667define <4 x float> @load_v4f32(<4 x float>* %p) { 2668; CHECK-LABEL: load_v4f32: 2669; CHECK: .functype load_v4f32 (i32) -> (v128) 2670; CHECK-NEXT: # %bb.0: 2671; CHECK-NEXT: local.get 0 2672; CHECK-NEXT: v128.load 0 2673; CHECK-NEXT: # fallthrough-return 2674 %v = load <4 x float>, <4 x float>* %p 2675 ret <4 x float> %v 2676} 2677 2678define <4 x float> @load_splat_v4f32(float* %p) { 2679; CHECK-LABEL: load_splat_v4f32: 2680; CHECK: .functype load_splat_v4f32 (i32) -> (v128) 2681; CHECK-NEXT: # %bb.0: 2682; CHECK-NEXT: local.get 0 2683; CHECK-NEXT: v128.load32_splat 0 2684; CHECK-NEXT: # fallthrough-return 2685 %e = load float, float* %p 2686 %v1 = insertelement <4 x float> undef, float %e, i32 0 2687 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2688 ret <4 x float> %v2 2689} 2690 2691define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { 2692; CHECK-LABEL: load_v4f32_with_folded_offset: 2693; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128) 2694; CHECK-NEXT: # %bb.0: 2695; CHECK-NEXT: local.get 0 2696; CHECK-NEXT: v128.load 16 2697; CHECK-NEXT: # fallthrough-return 2698 %q = ptrtoint <4 x float>* %p to i32 2699 %r = add nuw i32 %q, 16 2700 %s = inttoptr i32 %r to <4 x float>* 2701 %v = load <4 x float>, <4 x float>* %s 2702 ret <4 x float> %v 2703} 2704 2705define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { 2706; CHECK-LABEL: load_splat_v4f32_with_folded_offset: 2707; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) 2708; CHECK-NEXT: # %bb.0: 2709; CHECK-NEXT: local.get 0 2710; CHECK-NEXT: v128.load32_splat 16 2711; CHECK-NEXT: # fallthrough-return 2712 %q = ptrtoint float* %p to i32 2713 %r = add nuw i32 %q, 16 2714 %s = inttoptr i32 %r to float* 2715 %e = load float, float* %s 2716 %v1 = insertelement <4 x float> undef, float %e, i32 0 2717 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2718 ret <4 x float> %v2 2719} 2720 2721define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) { 2722; CHECK-LABEL: load_v4f32_with_folded_gep_offset: 2723; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128) 2724; CHECK-NEXT: # %bb.0: 2725; CHECK-NEXT: local.get 0 2726; CHECK-NEXT: v128.load 16 2727; CHECK-NEXT: # fallthrough-return 2728 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2729 %v = load <4 x float>, <4 x float>* %s 2730 ret <4 x float> %v 2731} 2732 2733define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { 2734; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: 2735; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) 2736; CHECK-NEXT: # %bb.0: 2737; CHECK-NEXT: local.get 0 2738; CHECK-NEXT: v128.load32_splat 4 2739; CHECK-NEXT: # fallthrough-return 2740 %s = getelementptr inbounds float, float* %p, i32 1 2741 %e = load float, float* %s 2742 %v1 = insertelement <4 x float> undef, float %e, i32 0 2743 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2744 ret <4 x float> %v2 2745} 2746 2747define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) { 2748; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: 2749; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2750; CHECK-NEXT: # %bb.0: 2751; CHECK-NEXT: local.get 0 2752; CHECK-NEXT: i32.const -16 2753; CHECK-NEXT: i32.add 2754; CHECK-NEXT: v128.load 0 2755; CHECK-NEXT: # fallthrough-return 2756 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2757 %v = load <4 x float>, <4 x float>* %s 2758 ret <4 x float> %v 2759} 2760 2761define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) { 2762; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: 2763; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2764; CHECK-NEXT: # %bb.0: 2765; CHECK-NEXT: local.get 0 2766; CHECK-NEXT: i32.const -4 2767; CHECK-NEXT: i32.add 2768; CHECK-NEXT: v128.load32_splat 0 2769; CHECK-NEXT: # fallthrough-return 2770 %s = getelementptr inbounds float, float* %p, i32 -1 2771 %e = load float, float* %s 2772 %v1 = insertelement <4 x float> undef, float %e, i32 0 2773 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2774 ret <4 x float> %v2 2775} 2776 2777define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { 2778; CHECK-LABEL: load_v4f32_with_unfolded_offset: 2779; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128) 2780; CHECK-NEXT: # %bb.0: 2781; CHECK-NEXT: local.get 0 2782; CHECK-NEXT: i32.const 16 2783; CHECK-NEXT: i32.add 2784; CHECK-NEXT: v128.load 0 2785; CHECK-NEXT: # fallthrough-return 2786 %q = ptrtoint <4 x float>* %p to i32 2787 %r = add nsw i32 %q, 16 2788 %s = inttoptr i32 %r to <4 x float>* 2789 %v = load <4 x float>, <4 x float>* %s 2790 ret <4 x float> %v 2791} 2792 2793define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { 2794; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: 2795; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128) 2796; CHECK-NEXT: # %bb.0: 2797; CHECK-NEXT: local.get 0 2798; CHECK-NEXT: i32.const 16 2799; CHECK-NEXT: i32.add 2800; CHECK-NEXT: v128.load32_splat 0 2801; CHECK-NEXT: # fallthrough-return 2802 %q = ptrtoint float* %p to i32 2803 %r = add nsw i32 %q, 16 2804 %s = inttoptr i32 %r to float* 2805 %e = load float, float* %s 2806 %v1 = insertelement <4 x float> undef, float %e, i32 0 2807 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2808 ret <4 x float> %v2 2809} 2810 2811define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) { 2812; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: 2813; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2814; CHECK-NEXT: # %bb.0: 2815; CHECK-NEXT: local.get 0 2816; CHECK-NEXT: i32.const 16 2817; CHECK-NEXT: i32.add 2818; CHECK-NEXT: v128.load 0 2819; CHECK-NEXT: # fallthrough-return 2820 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2821 %v = load <4 x float>, <4 x float>* %s 2822 ret <4 x float> %v 2823} 2824 2825define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { 2826; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: 2827; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2828; CHECK-NEXT: # %bb.0: 2829; CHECK-NEXT: local.get 0 2830; CHECK-NEXT: i32.const 4 2831; CHECK-NEXT: i32.add 2832; CHECK-NEXT: v128.load32_splat 0 2833; CHECK-NEXT: # fallthrough-return 2834 %s = getelementptr float, float* %p, i32 1 2835 %e = load float, float* %s 2836 %v1 = insertelement <4 x float> undef, float %e, i32 0 2837 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2838 ret <4 x float> %v2 2839} 2840 2841define <4 x float> @load_v4f32_from_numeric_address() { 2842; CHECK-LABEL: load_v4f32_from_numeric_address: 2843; CHECK: .functype load_v4f32_from_numeric_address () -> (v128) 2844; CHECK-NEXT: # %bb.0: 2845; CHECK-NEXT: i32.const 0 2846; CHECK-NEXT: v128.load 32 2847; CHECK-NEXT: # fallthrough-return 2848 %s = inttoptr i32 32 to <4 x float>* 2849 %v = load <4 x float>, <4 x float>* %s 2850 ret <4 x float> %v 2851} 2852 2853define <4 x float> @load_splat_v4f32_from_numeric_address() { 2854; CHECK-LABEL: load_splat_v4f32_from_numeric_address: 2855; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) 2856; CHECK-NEXT: # %bb.0: 2857; CHECK-NEXT: i32.const 0 2858; CHECK-NEXT: v128.load32_splat 32 2859; CHECK-NEXT: # fallthrough-return 2860 %s = inttoptr i32 32 to float* 2861 %e = load float, float* %s 2862 %v1 = insertelement <4 x float> undef, float %e, i32 0 2863 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2864 ret <4 x float> %v2 2865} 2866 2867@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.> 2868define <4 x float> @load_v4f32_from_global_address() { 2869; CHECK-LABEL: load_v4f32_from_global_address: 2870; CHECK: .functype load_v4f32_from_global_address () -> (v128) 2871; CHECK-NEXT: # %bb.0: 2872; CHECK-NEXT: i32.const 0 2873; CHECK-NEXT: v128.load gv_v4f32 2874; CHECK-NEXT: # fallthrough-return 2875 %v = load <4 x float>, <4 x float>* @gv_v4f32 2876 ret <4 x float> %v 2877} 2878 2879@gv_f32 = global float 42. 2880define <4 x float> @load_splat_v4f32_from_global_address() { 2881; CHECK-LABEL: load_splat_v4f32_from_global_address: 2882; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) 2883; CHECK-NEXT: # %bb.0: 2884; CHECK-NEXT: i32.const 0 2885; CHECK-NEXT: v128.load32_splat gv_f32 2886; CHECK-NEXT: # fallthrough-return 2887 %e = load float, float* @gv_f32 2888 %v1 = insertelement <4 x float> undef, float %e, i32 0 2889 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2890 ret <4 x float> %v2 2891} 2892 2893define void @store_v4f32(<4 x float> %v, <4 x float>* %p) { 2894; CHECK-LABEL: store_v4f32: 2895; CHECK: .functype store_v4f32 (v128, i32) -> () 2896; CHECK-NEXT: # %bb.0: 2897; CHECK-NEXT: local.get 1 2898; CHECK-NEXT: local.get 0 2899; CHECK-NEXT: v128.store 0 2900; CHECK-NEXT: # fallthrough-return 2901 store <4 x float> %v , <4 x float>* %p 2902 ret void 2903} 2904 2905define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { 2906; CHECK-LABEL: store_v4f32_with_folded_offset: 2907; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> () 2908; CHECK-NEXT: # %bb.0: 2909; CHECK-NEXT: local.get 1 2910; CHECK-NEXT: local.get 0 2911; CHECK-NEXT: v128.store 16 2912; CHECK-NEXT: # fallthrough-return 2913 %q = ptrtoint <4 x float>* %p to i32 2914 %r = add nuw i32 %q, 16 2915 %s = inttoptr i32 %r to <4 x float>* 2916 store <4 x float> %v , <4 x float>* %s 2917 ret void 2918} 2919 2920define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2921; CHECK-LABEL: store_v4f32_with_folded_gep_offset: 2922; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> () 2923; CHECK-NEXT: # %bb.0: 2924; CHECK-NEXT: local.get 1 2925; CHECK-NEXT: local.get 0 2926; CHECK-NEXT: v128.store 16 2927; CHECK-NEXT: # fallthrough-return 2928 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2929 store <4 x float> %v , <4 x float>* %s 2930 ret void 2931} 2932 2933define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) { 2934; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: 2935; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> () 2936; CHECK-NEXT: # %bb.0: 2937; CHECK-NEXT: local.get 1 2938; CHECK-NEXT: i32.const -16 2939; CHECK-NEXT: i32.add 2940; CHECK-NEXT: local.get 0 2941; CHECK-NEXT: v128.store 0 2942; CHECK-NEXT: # fallthrough-return 2943 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2944 store <4 x float> %v , <4 x float>* %s 2945 ret void 2946} 2947 2948define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) { 2949; CHECK-LABEL: store_v4f32_with_unfolded_offset: 2950; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> () 2951; CHECK-NEXT: # %bb.0: 2952; CHECK-NEXT: local.get 1 2953; CHECK-NEXT: i32.const 16 2954; CHECK-NEXT: i32.add 2955; CHECK-NEXT: local.get 0 2956; CHECK-NEXT: v128.store 0 2957; CHECK-NEXT: # fallthrough-return 2958 %q = ptrtoint <4 x float>* %p to i32 2959 %r = add nsw i32 %q, 16 2960 %s = inttoptr i32 %r to <4 x float>* 2961 store <4 x float> %v , <4 x float>* %s 2962 ret void 2963} 2964 2965define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2966; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: 2967; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> () 2968; CHECK-NEXT: # %bb.0: 2969; CHECK-NEXT: local.get 1 2970; CHECK-NEXT: i32.const 16 2971; CHECK-NEXT: i32.add 2972; CHECK-NEXT: local.get 0 2973; CHECK-NEXT: v128.store 0 2974; CHECK-NEXT: # fallthrough-return 2975 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2976 store <4 x float> %v , <4 x float>* %s 2977 ret void 2978} 2979 2980define void @store_v4f32_to_numeric_address(<4 x float> %v) { 2981; CHECK-LABEL: store_v4f32_to_numeric_address: 2982; CHECK: .functype store_v4f32_to_numeric_address (v128) -> () 2983; CHECK-NEXT: # %bb.0: 2984; CHECK-NEXT: i32.const 0 2985; CHECK-NEXT: local.get 0 2986; CHECK-NEXT: v128.store 32 2987; CHECK-NEXT: # fallthrough-return 2988 %s = inttoptr i32 32 to <4 x float>* 2989 store <4 x float> %v , <4 x float>* %s 2990 ret void 2991} 2992 2993define void @store_v4f32_to_global_address(<4 x float> %v) { 2994; CHECK-LABEL: store_v4f32_to_global_address: 2995; CHECK: .functype store_v4f32_to_global_address (v128) -> () 2996; CHECK-NEXT: # %bb.0: 2997; CHECK-NEXT: i32.const 0 2998; CHECK-NEXT: local.get 0 2999; CHECK-NEXT: v128.store gv_v4f32 3000; CHECK-NEXT: # fallthrough-return 3001 store <4 x float> %v , <4 x float>* @gv_v4f32 3002 ret void 3003} 3004 3005; ============================================================================== 3006; 2 x double 3007; ============================================================================== 3008define <2 x double> @load_v2f64(<2 x double>* %p) { 3009; CHECK-LABEL: load_v2f64: 3010; CHECK: .functype load_v2f64 (i32) -> (v128) 3011; CHECK-NEXT: # %bb.0: 3012; CHECK-NEXT: local.get 0 3013; CHECK-NEXT: v128.load 0 3014; CHECK-NEXT: # fallthrough-return 3015 %v = load <2 x double>, <2 x double>* %p 3016 ret <2 x double> %v 3017} 3018 3019define <2 x double> @load_splat_v2f64(double* %p) { 3020; CHECK-LABEL: load_splat_v2f64: 3021; CHECK: .functype load_splat_v2f64 (i32) -> (v128) 3022; CHECK-NEXT: # %bb.0: 3023; CHECK-NEXT: local.get 0 3024; CHECK-NEXT: v128.load64_splat 0 3025; CHECK-NEXT: # fallthrough-return 3026 %e = load double, double* %p 3027 %v1 = insertelement <2 x double> undef, double %e, i32 0 3028 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3029 ret <2 x double> %v2 3030} 3031 3032define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { 3033; CHECK-LABEL: load_v2f64_with_folded_offset: 3034; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128) 3035; CHECK-NEXT: # %bb.0: 3036; CHECK-NEXT: local.get 0 3037; CHECK-NEXT: v128.load 16 3038; CHECK-NEXT: # fallthrough-return 3039 %q = ptrtoint <2 x double>* %p to i32 3040 %r = add nuw i32 %q, 16 3041 %s = inttoptr i32 %r to <2 x double>* 3042 %v = load <2 x double>, <2 x double>* %s 3043 ret <2 x double> %v 3044} 3045 3046define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { 3047; CHECK-LABEL: load_splat_v2f64_with_folded_offset: 3048; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) 3049; CHECK-NEXT: # %bb.0: 3050; CHECK-NEXT: local.get 0 3051; CHECK-NEXT: v128.load64_splat 16 3052; CHECK-NEXT: # fallthrough-return 3053 %q = ptrtoint double* %p to i32 3054 %r = add nuw i32 %q, 16 3055 %s = inttoptr i32 %r to double* 3056 %e = load double, double* %s 3057 %v1 = insertelement <2 x double> undef, double %e, i32 0 3058 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3059 ret <2 x double> %v2 3060} 3061 3062define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) { 3063; CHECK-LABEL: load_v2f64_with_folded_gep_offset: 3064; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128) 3065; CHECK-NEXT: # %bb.0: 3066; CHECK-NEXT: local.get 0 3067; CHECK-NEXT: v128.load 16 3068; CHECK-NEXT: # fallthrough-return 3069 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 3070 %v = load <2 x double>, <2 x double>* %s 3071 ret <2 x double> %v 3072} 3073 3074define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { 3075; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: 3076; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) 3077; CHECK-NEXT: # %bb.0: 3078; CHECK-NEXT: local.get 0 3079; CHECK-NEXT: v128.load64_splat 8 3080; CHECK-NEXT: # fallthrough-return 3081 %s = getelementptr inbounds double, double* %p, i32 1 3082 %e = load double, double* %s 3083 %v1 = insertelement <2 x double> undef, double %e, i32 0 3084 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3085 ret <2 x double> %v2 3086} 3087 3088define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) { 3089; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: 3090; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3091; CHECK-NEXT: # %bb.0: 3092; CHECK-NEXT: local.get 0 3093; CHECK-NEXT: i32.const -16 3094; CHECK-NEXT: i32.add 3095; CHECK-NEXT: v128.load 0 3096; CHECK-NEXT: # fallthrough-return 3097 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 3098 %v = load <2 x double>, <2 x double>* %s 3099 ret <2 x double> %v 3100} 3101 3102define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) { 3103; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: 3104; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3105; CHECK-NEXT: # %bb.0: 3106; CHECK-NEXT: local.get 0 3107; CHECK-NEXT: i32.const -8 3108; CHECK-NEXT: i32.add 3109; CHECK-NEXT: v128.load64_splat 0 3110; CHECK-NEXT: # fallthrough-return 3111 %s = getelementptr inbounds double, double* %p, i32 -1 3112 %e = load double, double* %s 3113 %v1 = insertelement <2 x double> undef, double %e, i32 0 3114 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3115 ret <2 x double> %v2 3116} 3117 3118define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { 3119; CHECK-LABEL: load_v2f64_with_unfolded_offset: 3120; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128) 3121; CHECK-NEXT: # %bb.0: 3122; CHECK-NEXT: local.get 0 3123; CHECK-NEXT: i32.const 16 3124; CHECK-NEXT: i32.add 3125; CHECK-NEXT: v128.load 0 3126; CHECK-NEXT: # fallthrough-return 3127 %q = ptrtoint <2 x double>* %p to i32 3128 %r = add nsw i32 %q, 16 3129 %s = inttoptr i32 %r to <2 x double>* 3130 %v = load <2 x double>, <2 x double>* %s 3131 ret <2 x double> %v 3132} 3133 3134define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { 3135; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: 3136; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128) 3137; CHECK-NEXT: # %bb.0: 3138; CHECK-NEXT: local.get 0 3139; CHECK-NEXT: i32.const 16 3140; CHECK-NEXT: i32.add 3141; CHECK-NEXT: v128.load64_splat 0 3142; CHECK-NEXT: # fallthrough-return 3143 %q = ptrtoint double* %p to i32 3144 %r = add nsw i32 %q, 16 3145 %s = inttoptr i32 %r to double* 3146 %e = load double, double* %s 3147 %v1 = insertelement <2 x double> undef, double %e, i32 0 3148 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3149 ret <2 x double> %v2 3150} 3151 3152define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) { 3153; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: 3154; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3155; CHECK-NEXT: # %bb.0: 3156; CHECK-NEXT: local.get 0 3157; CHECK-NEXT: i32.const 16 3158; CHECK-NEXT: i32.add 3159; CHECK-NEXT: v128.load 0 3160; CHECK-NEXT: # fallthrough-return 3161 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3162 %v = load <2 x double>, <2 x double>* %s 3163 ret <2 x double> %v 3164} 3165 3166define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { 3167; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: 3168; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3169; CHECK-NEXT: # %bb.0: 3170; CHECK-NEXT: local.get 0 3171; CHECK-NEXT: i32.const 8 3172; CHECK-NEXT: i32.add 3173; CHECK-NEXT: v128.load64_splat 0 3174; CHECK-NEXT: # fallthrough-return 3175 %s = getelementptr double, double* %p, i32 1 3176 %e = load double, double* %s 3177 %v1 = insertelement <2 x double> undef, double %e, i32 0 3178 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3179 ret <2 x double> %v2 3180} 3181 3182define <2 x double> @load_v2f64_from_numeric_address() { 3183; CHECK-LABEL: load_v2f64_from_numeric_address: 3184; CHECK: .functype load_v2f64_from_numeric_address () -> (v128) 3185; CHECK-NEXT: # %bb.0: 3186; CHECK-NEXT: i32.const 0 3187; CHECK-NEXT: v128.load 32 3188; CHECK-NEXT: # fallthrough-return 3189 %s = inttoptr i32 32 to <2 x double>* 3190 %v = load <2 x double>, <2 x double>* %s 3191 ret <2 x double> %v 3192} 3193 3194define <2 x double> @load_splat_v2f64_from_numeric_address() { 3195; CHECK-LABEL: load_splat_v2f64_from_numeric_address: 3196; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) 3197; CHECK-NEXT: # %bb.0: 3198; CHECK-NEXT: i32.const 0 3199; CHECK-NEXT: v128.load64_splat 32 3200; CHECK-NEXT: # fallthrough-return 3201 %s = inttoptr i32 32 to double* 3202 %e = load double, double* %s 3203 %v1 = insertelement <2 x double> undef, double %e, i32 0 3204 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3205 ret <2 x double> %v2 3206} 3207 3208@gv_v2f64 = global <2 x double> <double 42., double 42.> 3209define <2 x double> @load_v2f64_from_global_address() { 3210; CHECK-LABEL: load_v2f64_from_global_address: 3211; CHECK: .functype load_v2f64_from_global_address () -> (v128) 3212; CHECK-NEXT: # %bb.0: 3213; CHECK-NEXT: i32.const 0 3214; CHECK-NEXT: v128.load gv_v2f64 3215; CHECK-NEXT: # fallthrough-return 3216 %v = load <2 x double>, <2 x double>* @gv_v2f64 3217 ret <2 x double> %v 3218} 3219 3220@gv_f64 = global double 42. 3221define <2 x double> @load_splat_v2f64_from_global_address() { 3222; CHECK-LABEL: load_splat_v2f64_from_global_address: 3223; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) 3224; CHECK-NEXT: # %bb.0: 3225; CHECK-NEXT: i32.const 0 3226; CHECK-NEXT: v128.load64_splat gv_f64 3227; CHECK-NEXT: # fallthrough-return 3228 %e = load double, double* @gv_f64 3229 %v1 = insertelement <2 x double> undef, double %e, i32 0 3230 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3231 ret <2 x double> %v2 3232} 3233 3234define void @store_v2f64(<2 x double> %v, <2 x double>* %p) { 3235; CHECK-LABEL: store_v2f64: 3236; CHECK: .functype store_v2f64 (v128, i32) -> () 3237; CHECK-NEXT: # %bb.0: 3238; CHECK-NEXT: local.get 1 3239; CHECK-NEXT: local.get 0 3240; CHECK-NEXT: v128.store 0 3241; CHECK-NEXT: # fallthrough-return 3242 store <2 x double> %v , <2 x double>* %p 3243 ret void 3244} 3245 3246define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { 3247; CHECK-LABEL: store_v2f64_with_folded_offset: 3248; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> () 3249; CHECK-NEXT: # %bb.0: 3250; CHECK-NEXT: local.get 1 3251; CHECK-NEXT: local.get 0 3252; CHECK-NEXT: v128.store 16 3253; CHECK-NEXT: # fallthrough-return 3254 %q = ptrtoint <2 x double>* %p to i32 3255 %r = add nuw i32 %q, 16 3256 %s = inttoptr i32 %r to <2 x double>* 3257 store <2 x double> %v , <2 x double>* %s 3258 ret void 3259} 3260 3261define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3262; CHECK-LABEL: store_v2f64_with_folded_gep_offset: 3263; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> () 3264; CHECK-NEXT: # %bb.0: 3265; CHECK-NEXT: local.get 1 3266; CHECK-NEXT: local.get 0 3267; CHECK-NEXT: v128.store 16 3268; CHECK-NEXT: # fallthrough-return 3269 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 3270 store <2 x double> %v , <2 x double>* %s 3271 ret void 3272} 3273 3274define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) { 3275; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: 3276; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> () 3277; CHECK-NEXT: # %bb.0: 3278; CHECK-NEXT: local.get 1 3279; CHECK-NEXT: i32.const -16 3280; CHECK-NEXT: i32.add 3281; CHECK-NEXT: local.get 0 3282; CHECK-NEXT: v128.store 0 3283; CHECK-NEXT: # fallthrough-return 3284 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 3285 store <2 x double> %v , <2 x double>* %s 3286 ret void 3287} 3288 3289define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) { 3290; CHECK-LABEL: store_v2f64_with_unfolded_offset: 3291; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> () 3292; CHECK-NEXT: # %bb.0: 3293; CHECK-NEXT: local.get 1 3294; CHECK-NEXT: i32.const 16 3295; CHECK-NEXT: i32.add 3296; CHECK-NEXT: local.get 0 3297; CHECK-NEXT: v128.store 0 3298; CHECK-NEXT: # fallthrough-return 3299 %q = ptrtoint <2 x double>* %p to i32 3300 %r = add nsw i32 %q, 16 3301 %s = inttoptr i32 %r to <2 x double>* 3302 store <2 x double> %v , <2 x double>* %s 3303 ret void 3304} 3305 3306define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3307; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: 3308; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> () 3309; CHECK-NEXT: # %bb.0: 3310; CHECK-NEXT: local.get 1 3311; CHECK-NEXT: i32.const 16 3312; CHECK-NEXT: i32.add 3313; CHECK-NEXT: local.get 0 3314; CHECK-NEXT: v128.store 0 3315; CHECK-NEXT: # fallthrough-return 3316 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3317 store <2 x double> %v , <2 x double>* %s 3318 ret void 3319} 3320 3321define void @store_v2f64_to_numeric_address(<2 x double> %v) { 3322; CHECK-LABEL: store_v2f64_to_numeric_address: 3323; CHECK: .functype store_v2f64_to_numeric_address (v128) -> () 3324; CHECK-NEXT: # %bb.0: 3325; CHECK-NEXT: i32.const 0 3326; CHECK-NEXT: local.get 0 3327; CHECK-NEXT: v128.store 32 3328; CHECK-NEXT: # fallthrough-return 3329 %s = inttoptr i32 32 to <2 x double>* 3330 store <2 x double> %v , <2 x double>* %s 3331 ret void 3332} 3333 3334define void @store_v2f64_to_global_address(<2 x double> %v) { 3335; CHECK-LABEL: store_v2f64_to_global_address: 3336; CHECK: .functype store_v2f64_to_global_address (v128) -> () 3337; CHECK-NEXT: # %bb.0: 3338; CHECK-NEXT: i32.const 0 3339; CHECK-NEXT: local.get 0 3340; CHECK-NEXT: v128.store gv_v2f64 3341; CHECK-NEXT: # fallthrough-return 3342 store <2 x double> %v , <2 x double>* @gv_v2f64 3343 ret void 3344} 3345