1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test SIMD loads and stores 5 6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7target triple = "wasm32-unknown-unknown" 8 9; ============================================================================== 10; 16 x i8 11; ============================================================================== 12define <16 x i8> @load_v16i8(<16 x i8>* %p) { 13; CHECK-LABEL: load_v16i8: 14; CHECK: .functype load_v16i8 (i32) -> (v128) 15; CHECK-NEXT: # %bb.0: 16; CHECK-NEXT: local.get 0 17; CHECK-NEXT: v128.load 0 18; CHECK-NEXT: # fallthrough-return 19 %v = load <16 x i8>, <16 x i8>* %p 20 ret <16 x i8> %v 21} 22 23define <16 x i8> @load_splat_v16i8(i8* %p) { 24; CHECK-LABEL: load_splat_v16i8: 25; CHECK: .functype load_splat_v16i8 (i32) -> (v128) 26; CHECK-NEXT: # %bb.0: 27; CHECK-NEXT: local.get 0 28; CHECK-NEXT: v8x16.load_splat 0 29; CHECK-NEXT: # fallthrough-return 30 %e = load i8, i8* %p 31 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 32 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 33 ret <16 x i8> %v2 34} 35 36define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { 37; CHECK-LABEL: load_v16i8_with_folded_offset: 38; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128) 39; CHECK-NEXT: # %bb.0: 40; CHECK-NEXT: local.get 0 41; CHECK-NEXT: v128.load 16 42; CHECK-NEXT: # fallthrough-return 43 %q = ptrtoint <16 x i8>* %p to i32 44 %r = add nuw i32 %q, 16 45 %s = inttoptr i32 %r to <16 x i8>* 46 %v = load <16 x i8>, <16 x i8>* %s 47 ret <16 x i8> %v 48} 49 50define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { 51; CHECK-LABEL: load_splat_v16i8_with_folded_offset: 52; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) 53; CHECK-NEXT: # %bb.0: 54; CHECK-NEXT: local.get 0 55; CHECK-NEXT: v8x16.load_splat 16 56; CHECK-NEXT: # fallthrough-return 57 %q = ptrtoint i8* %p to i32 58 %r = add nuw i32 %q, 16 59 %s = inttoptr i32 %r to i8* 60 %e = load i8, i8* %s 61 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 62 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 63 ret <16 x i8> %v2 64} 65 66define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) { 67; CHECK-LABEL: load_v16i8_with_folded_gep_offset: 68; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128) 69; CHECK-NEXT: # %bb.0: 70; CHECK-NEXT: local.get 0 71; CHECK-NEXT: v128.load 16 72; CHECK-NEXT: # fallthrough-return 73 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 74 %v = load <16 x i8>, <16 x i8>* %s 75 ret <16 x i8> %v 76} 77 78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { 79; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: 80; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) 81; CHECK-NEXT: # %bb.0: 82; CHECK-NEXT: local.get 0 83; CHECK-NEXT: v8x16.load_splat 1 84; CHECK-NEXT: # fallthrough-return 85 %s = getelementptr inbounds i8, i8* %p, i32 1 86 %e = load i8, i8* %s 87 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 88 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 89 ret <16 x i8> %v2 90} 91 92define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) { 93; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: 94; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 95; CHECK-NEXT: # %bb.0: 96; CHECK-NEXT: local.get 0 97; CHECK-NEXT: i32.const -16 98; CHECK-NEXT: i32.add 99; CHECK-NEXT: v128.load 0 100; CHECK-NEXT: # fallthrough-return 101 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 102 %v = load <16 x i8>, <16 x i8>* %s 103 ret <16 x i8> %v 104} 105 106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { 107; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: 108; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 109; CHECK-NEXT: # %bb.0: 110; CHECK-NEXT: local.get 0 111; CHECK-NEXT: i32.const -1 112; CHECK-NEXT: i32.add 113; CHECK-NEXT: v8x16.load_splat 0 114; CHECK-NEXT: # fallthrough-return 115 %s = getelementptr inbounds i8, i8* %p, i32 -1 116 %e = load i8, i8* %s 117 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 118 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 119 ret <16 x i8> %v2 120} 121 122define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { 123; CHECK-LABEL: load_v16i8_with_unfolded_offset: 124; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128) 125; CHECK-NEXT: # %bb.0: 126; CHECK-NEXT: local.get 0 127; CHECK-NEXT: i32.const 16 128; CHECK-NEXT: i32.add 129; CHECK-NEXT: v128.load 0 130; CHECK-NEXT: # fallthrough-return 131 %q = ptrtoint <16 x i8>* %p to i32 132 %r = add nsw i32 %q, 16 133 %s = inttoptr i32 %r to <16 x i8>* 134 %v = load <16 x i8>, <16 x i8>* %s 135 ret <16 x i8> %v 136} 137 138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { 139; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: 140; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128) 141; CHECK-NEXT: # %bb.0: 142; CHECK-NEXT: local.get 0 143; CHECK-NEXT: i32.const 16 144; CHECK-NEXT: i32.add 145; CHECK-NEXT: v8x16.load_splat 0 146; CHECK-NEXT: # fallthrough-return 147 %q = ptrtoint i8* %p to i32 148 %r = add nsw i32 %q, 16 149 %s = inttoptr i32 %r to i8* 150 %e = load i8, i8* %s 151 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 152 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 153 ret <16 x i8> %v2 154} 155 156define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) { 157; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: 158; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128) 159; CHECK-NEXT: # %bb.0: 160; CHECK-NEXT: local.get 0 161; CHECK-NEXT: i32.const 16 162; CHECK-NEXT: i32.add 163; CHECK-NEXT: v128.load 0 164; CHECK-NEXT: # fallthrough-return 165 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 166 %v = load <16 x i8>, <16 x i8>* %s 167 ret <16 x i8> %v 168} 169 170define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { 171; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: 172; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128) 173; CHECK-NEXT: # %bb.0: 174; CHECK-NEXT: local.get 0 175; CHECK-NEXT: i32.const 1 176; CHECK-NEXT: i32.add 177; CHECK-NEXT: v8x16.load_splat 0 178; CHECK-NEXT: # fallthrough-return 179 %s = getelementptr i8, i8* %p, i32 1 180 %e = load i8, i8* %s 181 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 182 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 183 ret <16 x i8> %v2 184} 185 186define <16 x i8> @load_v16i8_from_numeric_address() { 187; CHECK-LABEL: load_v16i8_from_numeric_address: 188; CHECK: .functype load_v16i8_from_numeric_address () -> (v128) 189; CHECK-NEXT: # %bb.0: 190; CHECK-NEXT: i32.const 0 191; CHECK-NEXT: v128.load 32 192; CHECK-NEXT: # fallthrough-return 193 %s = inttoptr i32 32 to <16 x i8>* 194 %v = load <16 x i8>, <16 x i8>* %s 195 ret <16 x i8> %v 196} 197 198define <16 x i8> @load_splat_v16i8_from_numeric_address() { 199; CHECK-LABEL: load_splat_v16i8_from_numeric_address: 200; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) 201; CHECK-NEXT: # %bb.0: 202; CHECK-NEXT: i32.const 0 203; CHECK-NEXT: v8x16.load_splat 32 204; CHECK-NEXT: # fallthrough-return 205 %s = inttoptr i32 32 to i8* 206 %e = load i8, i8* %s 207 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 208 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 209 ret <16 x i8> %v2 210} 211 212@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 213define <16 x i8> @load_v16i8_from_global_address() { 214; CHECK-LABEL: load_v16i8_from_global_address: 215; CHECK: .functype load_v16i8_from_global_address () -> (v128) 216; CHECK-NEXT: # %bb.0: 217; CHECK-NEXT: i32.const 0 218; CHECK-NEXT: v128.load gv_v16i8 219; CHECK-NEXT: # fallthrough-return 220 %v = load <16 x i8>, <16 x i8>* @gv_v16i8 221 ret <16 x i8> %v 222} 223 224@gv_i8 = global i8 42 225define <16 x i8> @load_splat_v16i8_from_global_address() { 226; CHECK-LABEL: load_splat_v16i8_from_global_address: 227; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) 228; CHECK-NEXT: # %bb.0: 229; CHECK-NEXT: i32.const 0 230; CHECK-NEXT: v8x16.load_splat gv_i8 231; CHECK-NEXT: # fallthrough-return 232 %e = load i8, i8* @gv_i8 233 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 234 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 235 ret <16 x i8> %v2 236} 237 238define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) { 239; CHECK-LABEL: store_v16i8: 240; CHECK: .functype store_v16i8 (v128, i32) -> () 241; CHECK-NEXT: # %bb.0: 242; CHECK-NEXT: local.get 1 243; CHECK-NEXT: local.get 0 244; CHECK-NEXT: v128.store 0 245; CHECK-NEXT: # fallthrough-return 246 store <16 x i8> %v , <16 x i8>* %p 247 ret void 248} 249 250define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { 251; CHECK-LABEL: store_v16i8_with_folded_offset: 252; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> () 253; CHECK-NEXT: # %bb.0: 254; CHECK-NEXT: local.get 1 255; CHECK-NEXT: local.get 0 256; CHECK-NEXT: v128.store 16 257; CHECK-NEXT: # fallthrough-return 258 %q = ptrtoint <16 x i8>* %p to i32 259 %r = add nuw i32 %q, 16 260 %s = inttoptr i32 %r to <16 x i8>* 261 store <16 x i8> %v , <16 x i8>* %s 262 ret void 263} 264 265define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 266; CHECK-LABEL: store_v16i8_with_folded_gep_offset: 267; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> () 268; CHECK-NEXT: # %bb.0: 269; CHECK-NEXT: local.get 1 270; CHECK-NEXT: local.get 0 271; CHECK-NEXT: v128.store 16 272; CHECK-NEXT: # fallthrough-return 273 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 274 store <16 x i8> %v , <16 x i8>* %s 275 ret void 276} 277 278define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) { 279; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: 280; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> () 281; CHECK-NEXT: # %bb.0: 282; CHECK-NEXT: local.get 1 283; CHECK-NEXT: i32.const -16 284; CHECK-NEXT: i32.add 285; CHECK-NEXT: local.get 0 286; CHECK-NEXT: v128.store 0 287; CHECK-NEXT: # fallthrough-return 288 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 289 store <16 x i8> %v , <16 x i8>* %s 290 ret void 291} 292 293define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) { 294; CHECK-LABEL: store_v16i8_with_unfolded_offset: 295; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> () 296; CHECK-NEXT: # %bb.0: 297; CHECK-NEXT: local.get 1 298; CHECK-NEXT: i32.const 16 299; CHECK-NEXT: i32.add 300; CHECK-NEXT: local.get 0 301; CHECK-NEXT: v128.store 0 302; CHECK-NEXT: # fallthrough-return 303 %q = ptrtoint <16 x i8>* %p to i32 304 %r = add nsw i32 %q, 16 305 %s = inttoptr i32 %r to <16 x i8>* 306 store <16 x i8> %v , <16 x i8>* %s 307 ret void 308} 309 310define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 311; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: 312; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> () 313; CHECK-NEXT: # %bb.0: 314; CHECK-NEXT: local.get 1 315; CHECK-NEXT: i32.const 16 316; CHECK-NEXT: i32.add 317; CHECK-NEXT: local.get 0 318; CHECK-NEXT: v128.store 0 319; CHECK-NEXT: # fallthrough-return 320 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 321 store <16 x i8> %v , <16 x i8>* %s 322 ret void 323} 324 325define void @store_v16i8_to_numeric_address(<16 x i8> %v) { 326; CHECK-LABEL: store_v16i8_to_numeric_address: 327; CHECK: .functype store_v16i8_to_numeric_address (v128) -> () 328; CHECK-NEXT: # %bb.0: 329; CHECK-NEXT: i32.const 0 330; CHECK-NEXT: local.get 0 331; CHECK-NEXT: v128.store 32 332; CHECK-NEXT: # fallthrough-return 333 %s = inttoptr i32 32 to <16 x i8>* 334 store <16 x i8> %v , <16 x i8>* %s 335 ret void 336} 337 338define void @store_v16i8_to_global_address(<16 x i8> %v) { 339; CHECK-LABEL: store_v16i8_to_global_address: 340; CHECK: .functype store_v16i8_to_global_address (v128) -> () 341; CHECK-NEXT: # %bb.0: 342; CHECK-NEXT: i32.const 0 343; CHECK-NEXT: local.get 0 344; CHECK-NEXT: v128.store gv_v16i8 345; CHECK-NEXT: # fallthrough-return 346 store <16 x i8> %v , <16 x i8>* @gv_v16i8 347 ret void 348} 349 350; ============================================================================== 351; 8 x i16 352; ============================================================================== 353define <8 x i16> @load_v8i16(<8 x i16>* %p) { 354; CHECK-LABEL: load_v8i16: 355; CHECK: .functype load_v8i16 (i32) -> (v128) 356; CHECK-NEXT: # %bb.0: 357; CHECK-NEXT: local.get 0 358; CHECK-NEXT: v128.load 0 359; CHECK-NEXT: # fallthrough-return 360 %v = load <8 x i16>, <8 x i16>* %p 361 ret <8 x i16> %v 362} 363 364define <8 x i16> @load_splat_v8i16(i16* %p) { 365; CHECK-LABEL: load_splat_v8i16: 366; CHECK: .functype load_splat_v8i16 (i32) -> (v128) 367; CHECK-NEXT: # %bb.0: 368; CHECK-NEXT: local.get 0 369; CHECK-NEXT: v16x8.load_splat 0 370; CHECK-NEXT: # fallthrough-return 371 %e = load i16, i16* %p 372 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 373 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 374 ret <8 x i16> %v2 375} 376 377define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) { 378; CHECK-LABEL: load_sext_v8i16: 379; CHECK: .functype load_sext_v8i16 (i32) -> (v128) 380; CHECK-NEXT: # %bb.0: 381; CHECK-NEXT: local.get 0 382; CHECK-NEXT: i16x8.load8x8_s 0 383; CHECK-NEXT: # fallthrough-return 384 %v = load <8 x i8>, <8 x i8>* %p 385 %v2 = sext <8 x i8> %v to <8 x i16> 386 ret <8 x i16> %v2 387} 388 389define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) { 390; CHECK-LABEL: load_zext_v8i16: 391; CHECK: .functype load_zext_v8i16 (i32) -> (v128) 392; CHECK-NEXT: # %bb.0: 393; CHECK-NEXT: local.get 0 394; CHECK-NEXT: i16x8.load8x8_u 0 395; CHECK-NEXT: # fallthrough-return 396 %v = load <8 x i8>, <8 x i8>* %p 397 %v2 = zext <8 x i8> %v to <8 x i16> 398 ret <8 x i16> %v2 399} 400 401define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) { 402; CHECK-LABEL: load_ext_v8i16: 403; CHECK: .functype load_ext_v8i16 (i32) -> (v128) 404; CHECK-NEXT: # %bb.0: 405; CHECK-NEXT: local.get 0 406; CHECK-NEXT: i16x8.load8x8_u 0 407; CHECK-NEXT: # fallthrough-return 408 %v = load <8 x i8>, <8 x i8>* %p 409 ret <8 x i8> %v 410} 411 412define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { 413; CHECK-LABEL: load_v8i16_with_folded_offset: 414; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128) 415; CHECK-NEXT: # %bb.0: 416; CHECK-NEXT: local.get 0 417; CHECK-NEXT: v128.load 16 418; CHECK-NEXT: # fallthrough-return 419 %q = ptrtoint <8 x i16>* %p to i32 420 %r = add nuw i32 %q, 16 421 %s = inttoptr i32 %r to <8 x i16>* 422 %v = load <8 x i16>, <8 x i16>* %s 423 ret <8 x i16> %v 424} 425 426define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { 427; CHECK-LABEL: load_splat_v8i16_with_folded_offset: 428; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) 429; CHECK-NEXT: # %bb.0: 430; CHECK-NEXT: local.get 0 431; CHECK-NEXT: v16x8.load_splat 16 432; CHECK-NEXT: # fallthrough-return 433 %q = ptrtoint i16* %p to i32 434 %r = add nuw i32 %q, 16 435 %s = inttoptr i32 %r to i16* 436 %e = load i16, i16* %s 437 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 438 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 439 ret <8 x i16> %v2 440} 441 442define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) { 443; CHECK-LABEL: load_sext_v8i16_with_folded_offset: 444; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128) 445; CHECK-NEXT: # %bb.0: 446; CHECK-NEXT: local.get 0 447; CHECK-NEXT: i16x8.load8x8_s 16 448; CHECK-NEXT: # fallthrough-return 449 %q = ptrtoint <8 x i8>* %p to i32 450 %r = add nuw i32 %q, 16 451 %s = inttoptr i32 %r to <8 x i8>* 452 %v = load <8 x i8>, <8 x i8>* %s 453 %v2 = sext <8 x i8> %v to <8 x i16> 454 ret <8 x i16> %v2 455} 456 457define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) { 458; CHECK-LABEL: load_zext_v8i16_with_folded_offset: 459; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128) 460; CHECK-NEXT: # %bb.0: 461; CHECK-NEXT: local.get 0 462; CHECK-NEXT: i16x8.load8x8_u 16 463; CHECK-NEXT: # fallthrough-return 464 %q = ptrtoint <8 x i8>* %p to i32 465 %r = add nuw i32 %q, 16 466 %s = inttoptr i32 %r to <8 x i8>* 467 %v = load <8 x i8>, <8 x i8>* %s 468 %v2 = zext <8 x i8> %v to <8 x i16> 469 ret <8 x i16> %v2 470} 471 472define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) { 473; CHECK-LABEL: load_ext_v8i16_with_folded_offset: 474; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128) 475; CHECK-NEXT: # %bb.0: 476; CHECK-NEXT: local.get 0 477; CHECK-NEXT: i16x8.load8x8_u 16 478; CHECK-NEXT: # fallthrough-return 479 %q = ptrtoint <8 x i8>* %p to i32 480 %r = add nuw i32 %q, 16 481 %s = inttoptr i32 %r to <8 x i8>* 482 %v = load <8 x i8>, <8 x i8>* %s 483 ret <8 x i8> %v 484} 485 486define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) { 487; CHECK-LABEL: load_v8i16_with_folded_gep_offset: 488; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128) 489; CHECK-NEXT: # %bb.0: 490; CHECK-NEXT: local.get 0 491; CHECK-NEXT: v128.load 16 492; CHECK-NEXT: # fallthrough-return 493 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 494 %v = load <8 x i16>, <8 x i16>* %s 495 ret <8 x i16> %v 496} 497 498define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { 499; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: 500; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) 501; CHECK-NEXT: # %bb.0: 502; CHECK-NEXT: local.get 0 503; CHECK-NEXT: v16x8.load_splat 2 504; CHECK-NEXT: # fallthrough-return 505 %s = getelementptr inbounds i16, i16* %p, i32 1 506 %e = load i16, i16* %s 507 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 508 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 509 ret <8 x i16> %v2 510} 511 512define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 513; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: 514; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128) 515; CHECK-NEXT: # %bb.0: 516; CHECK-NEXT: local.get 0 517; CHECK-NEXT: i16x8.load8x8_s 8 518; CHECK-NEXT: # fallthrough-return 519 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 520 %v = load <8 x i8>, <8 x i8>* %s 521 %v2 = sext <8 x i8> %v to <8 x i16> 522 ret <8 x i16> %v2 523} 524 525define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 526; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: 527; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128) 528; CHECK-NEXT: # %bb.0: 529; CHECK-NEXT: local.get 0 530; CHECK-NEXT: i16x8.load8x8_u 8 531; CHECK-NEXT: # fallthrough-return 532 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 533 %v = load <8 x i8>, <8 x i8>* %s 534 %v2 = zext <8 x i8> %v to <8 x i16> 535 ret <8 x i16> %v2 536} 537 538define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 539; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: 540; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128) 541; CHECK-NEXT: # %bb.0: 542; CHECK-NEXT: local.get 0 543; CHECK-NEXT: i16x8.load8x8_u 8 544; CHECK-NEXT: # fallthrough-return 545 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 546 %v = load <8 x i8>, <8 x i8>* %s 547 ret <8 x i8> %v 548} 549 550define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) { 551; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: 552; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 553; CHECK-NEXT: # %bb.0: 554; CHECK-NEXT: local.get 0 555; CHECK-NEXT: i32.const -16 556; CHECK-NEXT: i32.add 557; CHECK-NEXT: v128.load 0 558; CHECK-NEXT: # fallthrough-return 559 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 560 %v = load <8 x i16>, <8 x i16>* %s 561 ret <8 x i16> %v 562} 563 564define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { 565; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: 566; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 567; CHECK-NEXT: # %bb.0: 568; CHECK-NEXT: local.get 0 569; CHECK-NEXT: i32.const -2 570; CHECK-NEXT: i32.add 571; CHECK-NEXT: v16x8.load_splat 0 572; CHECK-NEXT: # fallthrough-return 573 %s = getelementptr inbounds i16, i16* %p, i32 -1 574 %e = load i16, i16* %s 575 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 576 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 577 ret <8 x i16> %v2 578} 579 580define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 581; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: 582; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 583; CHECK-NEXT: # %bb.0: 584; CHECK-NEXT: local.get 0 585; CHECK-NEXT: i32.const -8 586; CHECK-NEXT: i32.add 587; CHECK-NEXT: i16x8.load8x8_s 0 588; CHECK-NEXT: # fallthrough-return 589 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 590 %v = load <8 x i8>, <8 x i8>* %s 591 %v2 = sext <8 x i8> %v to <8 x i16> 592 ret <8 x i16> %v2 593} 594 595define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 596; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: 597; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 598; CHECK-NEXT: # %bb.0: 599; CHECK-NEXT: local.get 0 600; CHECK-NEXT: i32.const -8 601; CHECK-NEXT: i32.add 602; CHECK-NEXT: i16x8.load8x8_u 0 603; CHECK-NEXT: # fallthrough-return 604 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 605 %v = load <8 x i8>, <8 x i8>* %s 606 %v2 = zext <8 x i8> %v to <8 x i16> 607 ret <8 x i16> %v2 608} 609 610define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 611; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: 612; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 613; CHECK-NEXT: # %bb.0: 614; CHECK-NEXT: local.get 0 615; CHECK-NEXT: i32.const -8 616; CHECK-NEXT: i32.add 617; CHECK-NEXT: i16x8.load8x8_u 0 618; CHECK-NEXT: # fallthrough-return 619 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 620 %v = load <8 x i8>, <8 x i8>* %s 621 ret <8 x i8> %v 622} 623 624define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { 625; CHECK-LABEL: load_v8i16_with_unfolded_offset: 626; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128) 627; CHECK-NEXT: # %bb.0: 628; CHECK-NEXT: local.get 0 629; CHECK-NEXT: i32.const 16 630; CHECK-NEXT: i32.add 631; CHECK-NEXT: v128.load 0 632; CHECK-NEXT: # fallthrough-return 633 %q = ptrtoint <8 x i16>* %p to i32 634 %r = add nsw i32 %q, 16 635 %s = inttoptr i32 %r to <8 x i16>* 636 %v = load <8 x i16>, <8 x i16>* %s 637 ret <8 x i16> %v 638} 639 640define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { 641; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: 642; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128) 643; CHECK-NEXT: # %bb.0: 644; CHECK-NEXT: local.get 0 645; CHECK-NEXT: i32.const 16 646; CHECK-NEXT: i32.add 647; CHECK-NEXT: v16x8.load_splat 0 648; CHECK-NEXT: # fallthrough-return 649 %q = ptrtoint i16* %p to i32 650 %r = add nsw i32 %q, 16 651 %s = inttoptr i32 %r to i16* 652 %e = load i16, i16* %s 653 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 654 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 655 ret <8 x i16> %v2 656} 657 658define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 659; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: 660; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128) 661; CHECK-NEXT: # %bb.0: 662; CHECK-NEXT: local.get 0 663; CHECK-NEXT: i32.const 16 664; CHECK-NEXT: i32.add 665; CHECK-NEXT: i16x8.load8x8_s 0 666; CHECK-NEXT: # fallthrough-return 667 %q = ptrtoint <8 x i8>* %p to i32 668 %r = add nsw i32 %q, 16 669 %s = inttoptr i32 %r to <8 x i8>* 670 %v = load <8 x i8>, <8 x i8>* %s 671 %v2 = sext <8 x i8> %v to <8 x i16> 672 ret <8 x i16> %v2 673} 674 675define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 676; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: 677; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128) 678; CHECK-NEXT: # %bb.0: 679; CHECK-NEXT: local.get 0 680; CHECK-NEXT: i32.const 16 681; CHECK-NEXT: i32.add 682; CHECK-NEXT: i16x8.load8x8_u 0 683; CHECK-NEXT: # fallthrough-return 684 %q = ptrtoint <8 x i8>* %p to i32 685 %r = add nsw i32 %q, 16 686 %s = inttoptr i32 %r to <8 x i8>* 687 %v = load <8 x i8>, <8 x i8>* %s 688 %v2 = zext <8 x i8> %v to <8 x i16> 689 ret <8 x i16> %v2 690} 691 692define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 693; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: 694; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128) 695; CHECK-NEXT: # %bb.0: 696; CHECK-NEXT: local.get 0 697; CHECK-NEXT: i32.const 16 698; CHECK-NEXT: i32.add 699; CHECK-NEXT: i16x8.load8x8_u 0 700; CHECK-NEXT: # fallthrough-return 701 %q = ptrtoint <8 x i8>* %p to i32 702 %r = add nsw i32 %q, 16 703 %s = inttoptr i32 %r to <8 x i8>* 704 %v = load <8 x i8>, <8 x i8>* %s 705 ret <8 x i8> %v 706} 707 708define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) { 709; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: 710; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128) 711; CHECK-NEXT: # %bb.0: 712; CHECK-NEXT: local.get 0 713; CHECK-NEXT: i32.const 16 714; CHECK-NEXT: i32.add 715; CHECK-NEXT: v128.load 0 716; CHECK-NEXT: # fallthrough-return 717 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 718 %v = load <8 x i16>, <8 x i16>* %s 719 ret <8 x i16> %v 720} 721 722define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { 723; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: 724; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128) 725; CHECK-NEXT: # %bb.0: 726; CHECK-NEXT: local.get 0 727; CHECK-NEXT: i32.const 2 728; CHECK-NEXT: i32.add 729; CHECK-NEXT: v16x8.load_splat 0 730; CHECK-NEXT: # fallthrough-return 731 %s = getelementptr i16, i16* %p, i32 1 732 %e = load i16, i16* %s 733 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 734 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 735 ret <8 x i16> %v2 736} 737 738define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 739; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: 740; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 741; CHECK-NEXT: # %bb.0: 742; CHECK-NEXT: local.get 0 743; CHECK-NEXT: i32.const 8 744; CHECK-NEXT: i32.add 745; CHECK-NEXT: i16x8.load8x8_s 0 746; CHECK-NEXT: # fallthrough-return 747 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 748 %v = load <8 x i8>, <8 x i8>* %s 749 %v2 = sext <8 x i8> %v to <8 x i16> 750 ret <8 x i16> %v2 751} 752 753define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 754; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: 755; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 756; CHECK-NEXT: # %bb.0: 757; CHECK-NEXT: local.get 0 758; CHECK-NEXT: i32.const 8 759; CHECK-NEXT: i32.add 760; CHECK-NEXT: i16x8.load8x8_u 0 761; CHECK-NEXT: # fallthrough-return 762 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 763 %v = load <8 x i8>, <8 x i8>* %s 764 %v2 = zext <8 x i8> %v to <8 x i16> 765 ret <8 x i16> %v2 766} 767 768define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 769; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: 770; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 771; CHECK-NEXT: # %bb.0: 772; CHECK-NEXT: local.get 0 773; CHECK-NEXT: i32.const 8 774; CHECK-NEXT: i32.add 775; CHECK-NEXT: i16x8.load8x8_u 0 776; CHECK-NEXT: # fallthrough-return 777 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 778 %v = load <8 x i8>, <8 x i8>* %s 779 ret <8 x i8> %v 780} 781 782define <8 x i16> @load_v8i16_from_numeric_address() { 783; CHECK-LABEL: load_v8i16_from_numeric_address: 784; CHECK: .functype load_v8i16_from_numeric_address () -> (v128) 785; CHECK-NEXT: # %bb.0: 786; CHECK-NEXT: i32.const 0 787; CHECK-NEXT: v128.load 32 788; CHECK-NEXT: # fallthrough-return 789 %s = inttoptr i32 32 to <8 x i16>* 790 %v = load <8 x i16>, <8 x i16>* %s 791 ret <8 x i16> %v 792} 793 794define <8 x i16> @load_splat_v8i16_from_numeric_address() { 795; CHECK-LABEL: load_splat_v8i16_from_numeric_address: 796; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) 797; CHECK-NEXT: # %bb.0: 798; CHECK-NEXT: i32.const 0 799; CHECK-NEXT: v16x8.load_splat 32 800; CHECK-NEXT: # fallthrough-return 801 %s = inttoptr i32 32 to i16* 802 %e = load i16, i16* %s 803 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 804 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 805 ret <8 x i16> %v2 806} 807 808define <8 x i16> @load_sext_v8i16_from_numeric_address() { 809; CHECK-LABEL: load_sext_v8i16_from_numeric_address: 810; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128) 811; CHECK-NEXT: # %bb.0: 812; CHECK-NEXT: i32.const 0 813; CHECK-NEXT: i16x8.load8x8_s 32 814; CHECK-NEXT: # fallthrough-return 815 %s = inttoptr i32 32 to <8 x i8>* 816 %v = load <8 x i8>, <8 x i8>* %s 817 %v2 = sext <8 x i8> %v to <8 x i16> 818 ret <8 x i16> %v2 819} 820 821define <8 x i16> @load_zext_v8i16_from_numeric_address() { 822; CHECK-LABEL: load_zext_v8i16_from_numeric_address: 823; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128) 824; CHECK-NEXT: # %bb.0: 825; CHECK-NEXT: i32.const 0 826; CHECK-NEXT: i16x8.load8x8_u 32 827; CHECK-NEXT: # fallthrough-return 828 %s = inttoptr i32 32 to <8 x i8>* 829 %v = load <8 x i8>, <8 x i8>* %s 830 %v2 = zext <8 x i8> %v to <8 x i16> 831 ret <8 x i16> %v2 832} 833 834define <8 x i8> @load_ext_v8i16_from_numeric_address() { 835; CHECK-LABEL: load_ext_v8i16_from_numeric_address: 836; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128) 837; CHECK-NEXT: # %bb.0: 838; CHECK-NEXT: i32.const 0 839; CHECK-NEXT: i16x8.load8x8_u 32 840; CHECK-NEXT: # fallthrough-return 841 %s = inttoptr i32 32 to <8 x i8>* 842 %v = load <8 x i8>, <8 x i8>* %s 843 ret <8 x i8> %v 844} 845 846@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 847define <8 x i16> @load_v8i16_from_global_address() { 848; CHECK-LABEL: load_v8i16_from_global_address: 849; CHECK: .functype load_v8i16_from_global_address () -> (v128) 850; CHECK-NEXT: # %bb.0: 851; CHECK-NEXT: i32.const 0 852; CHECK-NEXT: v128.load gv_v8i16 853; CHECK-NEXT: # fallthrough-return 854 %v = load <8 x i16>, <8 x i16>* @gv_v8i16 855 ret <8 x i16> %v 856} 857 858@gv_i16 = global i16 42 859define <8 x i16> @load_splat_v8i16_from_global_address() { 860; CHECK-LABEL: load_splat_v8i16_from_global_address: 861; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) 862; CHECK-NEXT: # %bb.0: 863; CHECK-NEXT: i32.const 0 864; CHECK-NEXT: v16x8.load_splat gv_i16 865; CHECK-NEXT: # fallthrough-return 866 %e = load i16, i16* @gv_i16 867 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 868 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 869 ret <8 x i16> %v2 870} 871 872@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 873define <8 x i16> @load_sext_v8i16_from_global_address() { 874; CHECK-LABEL: load_sext_v8i16_from_global_address: 875; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128) 876; CHECK-NEXT: # %bb.0: 877; CHECK-NEXT: i32.const 0 878; CHECK-NEXT: i16x8.load8x8_s gv_v8i8 879; CHECK-NEXT: # fallthrough-return 880 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 881 %v2 = sext <8 x i8> %v to <8 x i16> 882 ret <8 x i16> %v2 883} 884 885define <8 x i16> @load_zext_v8i16_from_global_address() { 886; CHECK-LABEL: load_zext_v8i16_from_global_address: 887; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128) 888; CHECK-NEXT: # %bb.0: 889; CHECK-NEXT: i32.const 0 890; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 891; CHECK-NEXT: # fallthrough-return 892 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 893 %v2 = zext <8 x i8> %v to <8 x i16> 894 ret <8 x i16> %v2 895} 896 897define <8 x i8> @load_ext_v8i16_from_global_address() { 898; CHECK-LABEL: load_ext_v8i16_from_global_address: 899; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128) 900; CHECK-NEXT: # %bb.0: 901; CHECK-NEXT: i32.const 0 902; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 903; CHECK-NEXT: # fallthrough-return 904 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 905 ret <8 x i8> %v 906} 907 908 909define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) { 910; CHECK-LABEL: store_v8i16: 911; CHECK: .functype store_v8i16 (v128, i32) -> () 912; CHECK-NEXT: # %bb.0: 913; CHECK-NEXT: local.get 1 914; CHECK-NEXT: local.get 0 915; CHECK-NEXT: v128.store 0 916; CHECK-NEXT: # fallthrough-return 917 store <8 x i16> %v , <8 x i16>* %p 918 ret void 919} 920 921define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { 922; CHECK-LABEL: store_v8i16_with_folded_offset: 923; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () 924; CHECK-NEXT: # %bb.0: 925; CHECK-NEXT: local.get 1 926; CHECK-NEXT: local.get 0 927; CHECK-NEXT: v128.store 16 928; CHECK-NEXT: # fallthrough-return 929 %q = ptrtoint <8 x i16>* %p to i32 930 %r = add nuw i32 %q, 16 931 %s = inttoptr i32 %r to <8 x i16>* 932 store <8 x i16> %v , <8 x i16>* %s 933 ret void 934} 935 936define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 937; CHECK-LABEL: store_v8i16_with_folded_gep_offset: 938; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () 939; CHECK-NEXT: # %bb.0: 940; CHECK-NEXT: local.get 1 941; CHECK-NEXT: local.get 0 942; CHECK-NEXT: v128.store 16 943; CHECK-NEXT: # fallthrough-return 944 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 945 store <8 x i16> %v , <8 x i16>* %s 946 ret void 947} 948 949define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { 950; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: 951; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 952; CHECK-NEXT: # %bb.0: 953; CHECK-NEXT: local.get 1 954; CHECK-NEXT: i32.const -16 955; CHECK-NEXT: i32.add 956; CHECK-NEXT: local.get 0 957; CHECK-NEXT: v128.store 0 958; CHECK-NEXT: # fallthrough-return 959 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 960 store <8 x i16> %v , <8 x i16>* %s 961 ret void 962} 963 964define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { 965; CHECK-LABEL: store_v8i16_with_unfolded_offset: 966; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () 967; CHECK-NEXT: # %bb.0: 968; CHECK-NEXT: local.get 1 969; CHECK-NEXT: i32.const 16 970; CHECK-NEXT: i32.add 971; CHECK-NEXT: local.get 0 972; CHECK-NEXT: v128.store 0 973; CHECK-NEXT: # fallthrough-return 974 %q = ptrtoint <8 x i16>* %p to i32 975 %r = add nsw i32 %q, 16 976 %s = inttoptr i32 %r to <8 x i16>* 977 store <8 x i16> %v , <8 x i16>* %s 978 ret void 979} 980 981define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 982; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: 983; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () 984; CHECK-NEXT: # %bb.0: 985; CHECK-NEXT: local.get 1 986; CHECK-NEXT: i32.const 16 987; CHECK-NEXT: i32.add 988; CHECK-NEXT: local.get 0 989; CHECK-NEXT: v128.store 0 990; CHECK-NEXT: # fallthrough-return 991 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 992 store <8 x i16> %v , <8 x i16>* %s 993 ret void 994} 995 996define void @store_v8i16_to_numeric_address(<8 x i16> %v) { 997; CHECK-LABEL: store_v8i16_to_numeric_address: 998; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () 999; CHECK-NEXT: # %bb.0: 1000; CHECK-NEXT: i32.const 0 1001; CHECK-NEXT: local.get 0 1002; CHECK-NEXT: v128.store 32 1003; CHECK-NEXT: # fallthrough-return 1004 %s = inttoptr i32 32 to <8 x i16>* 1005 store <8 x i16> %v , <8 x i16>* %s 1006 ret void 1007} 1008 1009define void @store_v8i16_to_global_address(<8 x i16> %v) { 1010; CHECK-LABEL: store_v8i16_to_global_address: 1011; CHECK: .functype store_v8i16_to_global_address (v128) -> () 1012; CHECK-NEXT: # %bb.0: 1013; CHECK-NEXT: i32.const 0 1014; CHECK-NEXT: local.get 0 1015; CHECK-NEXT: v128.store gv_v8i16 1016; CHECK-NEXT: # fallthrough-return 1017 store <8 x i16> %v , <8 x i16>* @gv_v8i16 1018 ret void 1019} 1020 1021; ============================================================================== 1022; 4 x i32 1023; ============================================================================== 1024define <4 x i32> @load_v4i32(<4 x i32>* %p) { 1025; CHECK-LABEL: load_v4i32: 1026; CHECK: .functype load_v4i32 (i32) -> (v128) 1027; CHECK-NEXT: # %bb.0: 1028; CHECK-NEXT: local.get 0 1029; CHECK-NEXT: v128.load 0 1030; CHECK-NEXT: # fallthrough-return 1031 %v = load <4 x i32>, <4 x i32>* %p 1032 ret <4 x i32> %v 1033} 1034 1035define <4 x i32> @load_splat_v4i32(i32* %addr) { 1036; CHECK-LABEL: load_splat_v4i32: 1037; CHECK: .functype load_splat_v4i32 (i32) -> (v128) 1038; CHECK-NEXT: # %bb.0: 1039; CHECK-NEXT: local.get 0 1040; CHECK-NEXT: v32x4.load_splat 0 1041; CHECK-NEXT: # fallthrough-return 1042 %e = load i32, i32* %addr, align 4 1043 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1044 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1045 ret <4 x i32> %v2 1046} 1047 1048define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) { 1049; CHECK-LABEL: load_sext_v4i32: 1050; CHECK: .functype load_sext_v4i32 (i32) -> (v128) 1051; CHECK-NEXT: # %bb.0: 1052; CHECK-NEXT: local.get 0 1053; CHECK-NEXT: i32x4.load16x4_s 0 1054; CHECK-NEXT: # fallthrough-return 1055 %v = load <4 x i16>, <4 x i16>* %p 1056 %v2 = sext <4 x i16> %v to <4 x i32> 1057 ret <4 x i32> %v2 1058} 1059 1060define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) { 1061; CHECK-LABEL: load_zext_v4i32: 1062; CHECK: .functype load_zext_v4i32 (i32) -> (v128) 1063; CHECK-NEXT: # %bb.0: 1064; CHECK-NEXT: local.get 0 1065; CHECK-NEXT: i32x4.load16x4_u 0 1066; CHECK-NEXT: # fallthrough-return 1067 %v = load <4 x i16>, <4 x i16>* %p 1068 %v2 = zext <4 x i16> %v to <4 x i32> 1069 ret <4 x i32> %v2 1070} 1071 1072define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) { 1073; CHECK-LABEL: load_ext_v4i32: 1074; CHECK: .functype load_ext_v4i32 (i32) -> (v128) 1075; CHECK-NEXT: # %bb.0: 1076; CHECK-NEXT: local.get 0 1077; CHECK-NEXT: i32x4.load16x4_u 0 1078; CHECK-NEXT: # fallthrough-return 1079 %v = load <4 x i16>, <4 x i16>* %p 1080 ret <4 x i16> %v 1081} 1082 1083define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { 1084; CHECK-LABEL: load_v4i32_with_folded_offset: 1085; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128) 1086; CHECK-NEXT: # %bb.0: 1087; CHECK-NEXT: local.get 0 1088; CHECK-NEXT: v128.load 16 1089; CHECK-NEXT: # fallthrough-return 1090 %q = ptrtoint <4 x i32>* %p to i32 1091 %r = add nuw i32 %q, 16 1092 %s = inttoptr i32 %r to <4 x i32>* 1093 %v = load <4 x i32>, <4 x i32>* %s 1094 ret <4 x i32> %v 1095} 1096 1097define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { 1098; CHECK-LABEL: load_splat_v4i32_with_folded_offset: 1099; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) 1100; CHECK-NEXT: # %bb.0: 1101; CHECK-NEXT: local.get 0 1102; CHECK-NEXT: v32x4.load_splat 16 1103; CHECK-NEXT: # fallthrough-return 1104 %q = ptrtoint i32* %p to i32 1105 %r = add nuw i32 %q, 16 1106 %s = inttoptr i32 %r to i32* 1107 %e = load i32, i32* %s 1108 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1109 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1110 ret <4 x i32> %v2 1111} 1112 1113define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) { 1114; CHECK-LABEL: load_sext_v4i32_with_folded_offset: 1115; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128) 1116; CHECK-NEXT: # %bb.0: 1117; CHECK-NEXT: local.get 0 1118; CHECK-NEXT: i32x4.load16x4_s 16 1119; CHECK-NEXT: # fallthrough-return 1120 %q = ptrtoint <4 x i16>* %p to i32 1121 %r = add nuw i32 %q, 16 1122 %s = inttoptr i32 %r to <4 x i16>* 1123 %v = load <4 x i16>, <4 x i16>* %s 1124 %v2 = sext <4 x i16> %v to <4 x i32> 1125 ret <4 x i32> %v2 1126} 1127 1128define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) { 1129; CHECK-LABEL: load_zext_v4i32_with_folded_offset: 1130; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128) 1131; CHECK-NEXT: # %bb.0: 1132; CHECK-NEXT: local.get 0 1133; CHECK-NEXT: i32x4.load16x4_u 16 1134; CHECK-NEXT: # fallthrough-return 1135 %q = ptrtoint <4 x i16>* %p to i32 1136 %r = add nuw i32 %q, 16 1137 %s = inttoptr i32 %r to <4 x i16>* 1138 %v = load <4 x i16>, <4 x i16>* %s 1139 %v2 = zext <4 x i16> %v to <4 x i32> 1140 ret <4 x i32> %v2 1141} 1142 1143define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) { 1144; CHECK-LABEL: load_ext_v4i32_with_folded_offset: 1145; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128) 1146; CHECK-NEXT: # %bb.0: 1147; CHECK-NEXT: local.get 0 1148; CHECK-NEXT: i32x4.load16x4_u 16 1149; CHECK-NEXT: # fallthrough-return 1150 %q = ptrtoint <4 x i16>* %p to i32 1151 %r = add nuw i32 %q, 16 1152 %s = inttoptr i32 %r to <4 x i16>* 1153 %v = load <4 x i16>, <4 x i16>* %s 1154 ret <4 x i16> %v 1155} 1156 1157define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) { 1158; CHECK-LABEL: load_v4i32_with_folded_gep_offset: 1159; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128) 1160; CHECK-NEXT: # %bb.0: 1161; CHECK-NEXT: local.get 0 1162; CHECK-NEXT: v128.load 16 1163; CHECK-NEXT: # fallthrough-return 1164 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1165 %v = load <4 x i32>, <4 x i32>* %s 1166 ret <4 x i32> %v 1167} 1168 1169define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { 1170; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: 1171; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) 1172; CHECK-NEXT: # %bb.0: 1173; CHECK-NEXT: local.get 0 1174; CHECK-NEXT: v32x4.load_splat 4 1175; CHECK-NEXT: # fallthrough-return 1176 %s = getelementptr inbounds i32, i32* %p, i32 1 1177 %e = load i32, i32* %s 1178 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1179 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1180 ret <4 x i32> %v2 1181} 1182 1183define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1184; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset: 1185; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128) 1186; CHECK-NEXT: # %bb.0: 1187; CHECK-NEXT: local.get 0 1188; CHECK-NEXT: i32x4.load16x4_s 8 1189; CHECK-NEXT: # fallthrough-return 1190 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1191 %v = load <4 x i16>, <4 x i16>* %s 1192 %v2 = sext <4 x i16> %v to <4 x i32> 1193 ret <4 x i32> %v2 1194} 1195 1196define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1197; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset: 1198; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128) 1199; CHECK-NEXT: # %bb.0: 1200; CHECK-NEXT: local.get 0 1201; CHECK-NEXT: i32x4.load16x4_u 8 1202; CHECK-NEXT: # fallthrough-return 1203 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1204 %v = load <4 x i16>, <4 x i16>* %s 1205 %v2 = zext <4 x i16> %v to <4 x i32> 1206 ret <4 x i32> %v2 1207} 1208 1209define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1210; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: 1211; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128) 1212; CHECK-NEXT: # %bb.0: 1213; CHECK-NEXT: local.get 0 1214; CHECK-NEXT: i32x4.load16x4_u 8 1215; CHECK-NEXT: # fallthrough-return 1216 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1217 %v = load <4 x i16>, <4 x i16>* %s 1218 ret <4 x i16> %v 1219} 1220 1221define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) { 1222; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: 1223; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1224; CHECK-NEXT: # %bb.0: 1225; CHECK-NEXT: local.get 0 1226; CHECK-NEXT: i32.const -16 1227; CHECK-NEXT: i32.add 1228; CHECK-NEXT: v128.load 0 1229; CHECK-NEXT: # fallthrough-return 1230 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1231 %v = load <4 x i32>, <4 x i32>* %s 1232 ret <4 x i32> %v 1233} 1234 1235define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { 1236; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: 1237; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1238; CHECK-NEXT: # %bb.0: 1239; CHECK-NEXT: local.get 0 1240; CHECK-NEXT: i32.const -4 1241; CHECK-NEXT: i32.add 1242; CHECK-NEXT: v32x4.load_splat 0 1243; CHECK-NEXT: # fallthrough-return 1244 %s = getelementptr inbounds i32, i32* %p, i32 -1 1245 %e = load i32, i32* %s 1246 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1247 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1248 ret <4 x i32> %v2 1249} 1250 1251define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1252; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset: 1253; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1254; CHECK-NEXT: # %bb.0: 1255; CHECK-NEXT: local.get 0 1256; CHECK-NEXT: i32.const -8 1257; CHECK-NEXT: i32.add 1258; CHECK-NEXT: i32x4.load16x4_s 0 1259; CHECK-NEXT: # fallthrough-return 1260 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1261 %v = load <4 x i16>, <4 x i16>* %s 1262 %v2 = sext <4 x i16> %v to <4 x i32> 1263 ret <4 x i32> %v2 1264} 1265 1266define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1267; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset: 1268; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1269; CHECK-NEXT: # %bb.0: 1270; CHECK-NEXT: local.get 0 1271; CHECK-NEXT: i32.const -8 1272; CHECK-NEXT: i32.add 1273; CHECK-NEXT: i32x4.load16x4_u 0 1274; CHECK-NEXT: # fallthrough-return 1275 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1276 %v = load <4 x i16>, <4 x i16>* %s 1277 %v2 = zext <4 x i16> %v to <4 x i32> 1278 ret <4 x i32> %v2 1279} 1280 1281define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1282; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: 1283; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1284; CHECK-NEXT: # %bb.0: 1285; CHECK-NEXT: local.get 0 1286; CHECK-NEXT: i32.const -8 1287; CHECK-NEXT: i32.add 1288; CHECK-NEXT: i32x4.load16x4_u 0 1289; CHECK-NEXT: # fallthrough-return 1290 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1291 %v = load <4 x i16>, <4 x i16>* %s 1292 ret <4 x i16> %v 1293} 1294 1295define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { 1296; CHECK-LABEL: load_v4i32_with_unfolded_offset: 1297; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128) 1298; CHECK-NEXT: # %bb.0: 1299; CHECK-NEXT: local.get 0 1300; CHECK-NEXT: i32.const 16 1301; CHECK-NEXT: i32.add 1302; CHECK-NEXT: v128.load 0 1303; CHECK-NEXT: # fallthrough-return 1304 %q = ptrtoint <4 x i32>* %p to i32 1305 %r = add nsw i32 %q, 16 1306 %s = inttoptr i32 %r to <4 x i32>* 1307 %v = load <4 x i32>, <4 x i32>* %s 1308 ret <4 x i32> %v 1309} 1310 1311define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { 1312; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: 1313; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128) 1314; CHECK-NEXT: # %bb.0: 1315; CHECK-NEXT: local.get 0 1316; CHECK-NEXT: i32.const 16 1317; CHECK-NEXT: i32.add 1318; CHECK-NEXT: v32x4.load_splat 0 1319; CHECK-NEXT: # fallthrough-return 1320 %q = ptrtoint i32* %p to i32 1321 %r = add nsw i32 %q, 16 1322 %s = inttoptr i32 %r to i32* 1323 %e = load i32, i32* %s 1324 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1325 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1326 ret <4 x i32> %v2 1327} 1328 1329define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1330; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset: 1331; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128) 1332; CHECK-NEXT: # %bb.0: 1333; CHECK-NEXT: local.get 0 1334; CHECK-NEXT: i32.const 16 1335; CHECK-NEXT: i32.add 1336; CHECK-NEXT: i32x4.load16x4_s 0 1337; CHECK-NEXT: # fallthrough-return 1338 %q = ptrtoint <4 x i16>* %p to i32 1339 %r = add nsw i32 %q, 16 1340 %s = inttoptr i32 %r to <4 x i16>* 1341 %v = load <4 x i16>, <4 x i16>* %s 1342 %v2 = sext <4 x i16> %v to <4 x i32> 1343 ret <4 x i32> %v2 1344} 1345 1346define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1347; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset: 1348; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128) 1349; CHECK-NEXT: # %bb.0: 1350; CHECK-NEXT: local.get 0 1351; CHECK-NEXT: i32.const 16 1352; CHECK-NEXT: i32.add 1353; CHECK-NEXT: i32x4.load16x4_u 0 1354; CHECK-NEXT: # fallthrough-return 1355 %q = ptrtoint <4 x i16>* %p to i32 1356 %r = add nsw i32 %q, 16 1357 %s = inttoptr i32 %r to <4 x i16>* 1358 %v = load <4 x i16>, <4 x i16>* %s 1359 %v2 = zext <4 x i16> %v to <4 x i32> 1360 ret <4 x i32> %v2 1361} 1362 1363define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1364; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: 1365; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128) 1366; CHECK-NEXT: # %bb.0: 1367; CHECK-NEXT: local.get 0 1368; CHECK-NEXT: i32.const 16 1369; CHECK-NEXT: i32.add 1370; CHECK-NEXT: i32x4.load16x4_u 0 1371; CHECK-NEXT: # fallthrough-return 1372 %q = ptrtoint <4 x i16>* %p to i32 1373 %r = add nsw i32 %q, 16 1374 %s = inttoptr i32 %r to <4 x i16>* 1375 %v = load <4 x i16>, <4 x i16>* %s 1376 ret <4 x i16> %v 1377} 1378 1379define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) { 1380; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: 1381; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1382; CHECK-NEXT: # %bb.0: 1383; CHECK-NEXT: local.get 0 1384; CHECK-NEXT: i32.const 16 1385; CHECK-NEXT: i32.add 1386; CHECK-NEXT: v128.load 0 1387; CHECK-NEXT: # fallthrough-return 1388 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1389 %v = load <4 x i32>, <4 x i32>* %s 1390 ret <4 x i32> %v 1391} 1392 1393define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { 1394; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: 1395; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1396; CHECK-NEXT: # %bb.0: 1397; CHECK-NEXT: local.get 0 1398; CHECK-NEXT: i32.const 4 1399; CHECK-NEXT: i32.add 1400; CHECK-NEXT: v32x4.load_splat 0 1401; CHECK-NEXT: # fallthrough-return 1402 %s = getelementptr i32, i32* %p, i32 1 1403 %e = load i32, i32* %s 1404 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1405 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1406 ret <4 x i32> %v2 1407} 1408 1409define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1410; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset: 1411; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1412; CHECK-NEXT: # %bb.0: 1413; CHECK-NEXT: local.get 0 1414; CHECK-NEXT: i32.const 8 1415; CHECK-NEXT: i32.add 1416; CHECK-NEXT: i32x4.load16x4_s 0 1417; CHECK-NEXT: # fallthrough-return 1418 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1419 %v = load <4 x i16>, <4 x i16>* %s 1420 %v2 = sext <4 x i16> %v to <4 x i32> 1421 ret <4 x i32> %v2 1422} 1423 1424define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1425; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset: 1426; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1427; CHECK-NEXT: # %bb.0: 1428; CHECK-NEXT: local.get 0 1429; CHECK-NEXT: i32.const 8 1430; CHECK-NEXT: i32.add 1431; CHECK-NEXT: i32x4.load16x4_u 0 1432; CHECK-NEXT: # fallthrough-return 1433 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1434 %v = load <4 x i16>, <4 x i16>* %s 1435 %v2 = zext <4 x i16> %v to <4 x i32> 1436 ret <4 x i32> %v2 1437} 1438 1439define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1440; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: 1441; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1442; CHECK-NEXT: # %bb.0: 1443; CHECK-NEXT: local.get 0 1444; CHECK-NEXT: i32.const 8 1445; CHECK-NEXT: i32.add 1446; CHECK-NEXT: i32x4.load16x4_u 0 1447; CHECK-NEXT: # fallthrough-return 1448 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1449 %v = load <4 x i16>, <4 x i16>* %s 1450 ret <4 x i16> %v 1451} 1452 1453define <4 x i32> @load_v4i32_from_numeric_address() { 1454; CHECK-LABEL: load_v4i32_from_numeric_address: 1455; CHECK: .functype load_v4i32_from_numeric_address () -> (v128) 1456; CHECK-NEXT: # %bb.0: 1457; CHECK-NEXT: i32.const 0 1458; CHECK-NEXT: v128.load 32 1459; CHECK-NEXT: # fallthrough-return 1460 %s = inttoptr i32 32 to <4 x i32>* 1461 %v = load <4 x i32>, <4 x i32>* %s 1462 ret <4 x i32> %v 1463} 1464 1465define <4 x i32> @load_splat_v4i32_from_numeric_address() { 1466; CHECK-LABEL: load_splat_v4i32_from_numeric_address: 1467; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) 1468; CHECK-NEXT: # %bb.0: 1469; CHECK-NEXT: i32.const 0 1470; CHECK-NEXT: v32x4.load_splat 32 1471; CHECK-NEXT: # fallthrough-return 1472 %s = inttoptr i32 32 to i32* 1473 %e = load i32, i32* %s 1474 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1475 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1476 ret <4 x i32> %v2 1477} 1478 1479define <4 x i32> @load_sext_v4i32_from_numeric_address() { 1480; CHECK-LABEL: load_sext_v4i32_from_numeric_address: 1481; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128) 1482; CHECK-NEXT: # %bb.0: 1483; CHECK-NEXT: i32.const 0 1484; CHECK-NEXT: i32x4.load16x4_s 32 1485; CHECK-NEXT: # fallthrough-return 1486 %s = inttoptr i32 32 to <4 x i16>* 1487 %v = load <4 x i16>, <4 x i16>* %s 1488 %v2 = sext <4 x i16> %v to <4 x i32> 1489 ret <4 x i32> %v2 1490} 1491 1492define <4 x i32> @load_zext_v4i32_from_numeric_address() { 1493; CHECK-LABEL: load_zext_v4i32_from_numeric_address: 1494; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128) 1495; CHECK-NEXT: # %bb.0: 1496; CHECK-NEXT: i32.const 0 1497; CHECK-NEXT: i32x4.load16x4_u 32 1498; CHECK-NEXT: # fallthrough-return 1499 %s = inttoptr i32 32 to <4 x i16>* 1500 %v = load <4 x i16>, <4 x i16>* %s 1501 %v2 = zext <4 x i16> %v to <4 x i32> 1502 ret <4 x i32> %v2 1503} 1504 1505define <4 x i16> @load_ext_v4i32_from_numeric_address() { 1506; CHECK-LABEL: load_ext_v4i32_from_numeric_address: 1507; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128) 1508; CHECK-NEXT: # %bb.0: 1509; CHECK-NEXT: i32.const 0 1510; CHECK-NEXT: i32x4.load16x4_u 32 1511; CHECK-NEXT: # fallthrough-return 1512 %s = inttoptr i32 32 to <4 x i16>* 1513 %v = load <4 x i16>, <4 x i16>* %s 1514 ret <4 x i16> %v 1515} 1516 1517@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42> 1518define <4 x i32> @load_v4i32_from_global_address() { 1519; CHECK-LABEL: load_v4i32_from_global_address: 1520; CHECK: .functype load_v4i32_from_global_address () -> (v128) 1521; CHECK-NEXT: # %bb.0: 1522; CHECK-NEXT: i32.const 0 1523; CHECK-NEXT: v128.load gv_v4i32 1524; CHECK-NEXT: # fallthrough-return 1525 %v = load <4 x i32>, <4 x i32>* @gv_v4i32 1526 ret <4 x i32> %v 1527} 1528 1529@gv_i32 = global i32 42 1530define <4 x i32> @load_splat_v4i32_from_global_address() { 1531; CHECK-LABEL: load_splat_v4i32_from_global_address: 1532; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) 1533; CHECK-NEXT: # %bb.0: 1534; CHECK-NEXT: i32.const 0 1535; CHECK-NEXT: v32x4.load_splat gv_i32 1536; CHECK-NEXT: # fallthrough-return 1537 %e = load i32, i32* @gv_i32 1538 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1539 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1540 ret <4 x i32> %v2 1541} 1542 1543@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42> 1544define <4 x i32> @load_sext_v4i32_from_global_address() { 1545; CHECK-LABEL: load_sext_v4i32_from_global_address: 1546; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128) 1547; CHECK-NEXT: # %bb.0: 1548; CHECK-NEXT: i32.const 0 1549; CHECK-NEXT: i32x4.load16x4_s gv_v4i16 1550; CHECK-NEXT: # fallthrough-return 1551 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1552 %v2 = sext <4 x i16> %v to <4 x i32> 1553 ret <4 x i32> %v2 1554} 1555 1556define <4 x i32> @load_zext_v4i32_from_global_address() { 1557; CHECK-LABEL: load_zext_v4i32_from_global_address: 1558; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128) 1559; CHECK-NEXT: # %bb.0: 1560; CHECK-NEXT: i32.const 0 1561; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1562; CHECK-NEXT: # fallthrough-return 1563 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1564 %v2 = zext <4 x i16> %v to <4 x i32> 1565 ret <4 x i32> %v2 1566} 1567 1568define <4 x i16> @load_ext_v4i32_from_global_address() { 1569; CHECK-LABEL: load_ext_v4i32_from_global_address: 1570; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128) 1571; CHECK-NEXT: # %bb.0: 1572; CHECK-NEXT: i32.const 0 1573; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1574; CHECK-NEXT: # fallthrough-return 1575 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1576 ret <4 x i16> %v 1577} 1578 1579define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) { 1580; CHECK-LABEL: store_v4i32: 1581; CHECK: .functype store_v4i32 (v128, i32) -> () 1582; CHECK-NEXT: # %bb.0: 1583; CHECK-NEXT: local.get 1 1584; CHECK-NEXT: local.get 0 1585; CHECK-NEXT: v128.store 0 1586; CHECK-NEXT: # fallthrough-return 1587 store <4 x i32> %v , <4 x i32>* %p 1588 ret void 1589} 1590 1591define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { 1592; CHECK-LABEL: store_v4i32_with_folded_offset: 1593; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () 1594; CHECK-NEXT: # %bb.0: 1595; CHECK-NEXT: local.get 1 1596; CHECK-NEXT: local.get 0 1597; CHECK-NEXT: v128.store 16 1598; CHECK-NEXT: # fallthrough-return 1599 %q = ptrtoint <4 x i32>* %p to i32 1600 %r = add nuw i32 %q, 16 1601 %s = inttoptr i32 %r to <4 x i32>* 1602 store <4 x i32> %v , <4 x i32>* %s 1603 ret void 1604} 1605 1606define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1607; CHECK-LABEL: store_v4i32_with_folded_gep_offset: 1608; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () 1609; CHECK-NEXT: # %bb.0: 1610; CHECK-NEXT: local.get 1 1611; CHECK-NEXT: local.get 0 1612; CHECK-NEXT: v128.store 16 1613; CHECK-NEXT: # fallthrough-return 1614 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1615 store <4 x i32> %v , <4 x i32>* %s 1616 ret void 1617} 1618 1619define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { 1620; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: 1621; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1622; CHECK-NEXT: # %bb.0: 1623; CHECK-NEXT: local.get 1 1624; CHECK-NEXT: i32.const -16 1625; CHECK-NEXT: i32.add 1626; CHECK-NEXT: local.get 0 1627; CHECK-NEXT: v128.store 0 1628; CHECK-NEXT: # fallthrough-return 1629 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1630 store <4 x i32> %v , <4 x i32>* %s 1631 ret void 1632} 1633 1634define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { 1635; CHECK-LABEL: store_v4i32_with_unfolded_offset: 1636; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () 1637; CHECK-NEXT: # %bb.0: 1638; CHECK-NEXT: local.get 1 1639; CHECK-NEXT: i32.const 16 1640; CHECK-NEXT: i32.add 1641; CHECK-NEXT: local.get 0 1642; CHECK-NEXT: v128.store 0 1643; CHECK-NEXT: # fallthrough-return 1644 %q = ptrtoint <4 x i32>* %p to i32 1645 %r = add nsw i32 %q, 16 1646 %s = inttoptr i32 %r to <4 x i32>* 1647 store <4 x i32> %v , <4 x i32>* %s 1648 ret void 1649} 1650 1651define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1652; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: 1653; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1654; CHECK-NEXT: # %bb.0: 1655; CHECK-NEXT: local.get 1 1656; CHECK-NEXT: i32.const 16 1657; CHECK-NEXT: i32.add 1658; CHECK-NEXT: local.get 0 1659; CHECK-NEXT: v128.store 0 1660; CHECK-NEXT: # fallthrough-return 1661 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1662 store <4 x i32> %v , <4 x i32>* %s 1663 ret void 1664} 1665 1666define void @store_v4i32_to_numeric_address(<4 x i32> %v) { 1667; CHECK-LABEL: store_v4i32_to_numeric_address: 1668; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () 1669; CHECK-NEXT: # %bb.0: 1670; CHECK-NEXT: i32.const 0 1671; CHECK-NEXT: local.get 0 1672; CHECK-NEXT: v128.store 32 1673; CHECK-NEXT: # fallthrough-return 1674 %s = inttoptr i32 32 to <4 x i32>* 1675 store <4 x i32> %v , <4 x i32>* %s 1676 ret void 1677} 1678 1679define void @store_v4i32_to_global_address(<4 x i32> %v) { 1680; CHECK-LABEL: store_v4i32_to_global_address: 1681; CHECK: .functype store_v4i32_to_global_address (v128) -> () 1682; CHECK-NEXT: # %bb.0: 1683; CHECK-NEXT: i32.const 0 1684; CHECK-NEXT: local.get 0 1685; CHECK-NEXT: v128.store gv_v4i32 1686; CHECK-NEXT: # fallthrough-return 1687 store <4 x i32> %v , <4 x i32>* @gv_v4i32 1688 ret void 1689} 1690 1691; ============================================================================== 1692; 2 x i64 1693; ============================================================================== 1694define <2 x i64> @load_v2i64(<2 x i64>* %p) { 1695; CHECK-LABEL: load_v2i64: 1696; CHECK: .functype load_v2i64 (i32) -> (v128) 1697; CHECK-NEXT: # %bb.0: 1698; CHECK-NEXT: local.get 0 1699; CHECK-NEXT: v128.load 0 1700; CHECK-NEXT: # fallthrough-return 1701 %v = load <2 x i64>, <2 x i64>* %p 1702 ret <2 x i64> %v 1703} 1704 1705define <2 x i64> @load_splat_v2i64(i64* %p) { 1706; CHECK-LABEL: load_splat_v2i64: 1707; CHECK: .functype load_splat_v2i64 (i32) -> (v128) 1708; CHECK-NEXT: # %bb.0: 1709; CHECK-NEXT: local.get 0 1710; CHECK-NEXT: v64x2.load_splat 0 1711; CHECK-NEXT: # fallthrough-return 1712 %e = load i64, i64* %p 1713 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1714 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1715 ret <2 x i64> %v2 1716} 1717 1718define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) { 1719; CHECK-LABEL: load_sext_v2i64: 1720; CHECK: .functype load_sext_v2i64 (i32) -> (v128) 1721; CHECK-NEXT: # %bb.0: 1722; CHECK-NEXT: local.get 0 1723; CHECK-NEXT: i64x2.load32x2_s 0 1724; CHECK-NEXT: # fallthrough-return 1725 %v = load <2 x i32>, <2 x i32>* %p 1726 %v2 = sext <2 x i32> %v to <2 x i64> 1727 ret <2 x i64> %v2 1728} 1729 1730define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) { 1731; CHECK-LABEL: load_zext_v2i64: 1732; CHECK: .functype load_zext_v2i64 (i32) -> (v128) 1733; CHECK-NEXT: # %bb.0: 1734; CHECK-NEXT: local.get 0 1735; CHECK-NEXT: i64x2.load32x2_u 0 1736; CHECK-NEXT: # fallthrough-return 1737 %v = load <2 x i32>, <2 x i32>* %p 1738 %v2 = zext <2 x i32> %v to <2 x i64> 1739 ret <2 x i64> %v2 1740} 1741 1742define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) { 1743; CHECK-LABEL: load_ext_v2i64: 1744; CHECK: .functype load_ext_v2i64 (i32) -> (v128) 1745; CHECK-NEXT: # %bb.0: 1746; CHECK-NEXT: local.get 0 1747; CHECK-NEXT: i64x2.load32x2_u 0 1748; CHECK-NEXT: # fallthrough-return 1749 %v = load <2 x i32>, <2 x i32>* %p 1750 ret <2 x i32> %v 1751} 1752 1753define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { 1754; CHECK-LABEL: load_v2i64_with_folded_offset: 1755; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128) 1756; CHECK-NEXT: # %bb.0: 1757; CHECK-NEXT: local.get 0 1758; CHECK-NEXT: v128.load 16 1759; CHECK-NEXT: # fallthrough-return 1760 %q = ptrtoint <2 x i64>* %p to i32 1761 %r = add nuw i32 %q, 16 1762 %s = inttoptr i32 %r to <2 x i64>* 1763 %v = load <2 x i64>, <2 x i64>* %s 1764 ret <2 x i64> %v 1765} 1766 1767define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { 1768; CHECK-LABEL: load_splat_v2i64_with_folded_offset: 1769; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) 1770; CHECK-NEXT: # %bb.0: 1771; CHECK-NEXT: local.get 0 1772; CHECK-NEXT: v64x2.load_splat 16 1773; CHECK-NEXT: # fallthrough-return 1774 %q = ptrtoint i64* %p to i32 1775 %r = add nuw i32 %q, 16 1776 %s = inttoptr i32 %r to i64* 1777 %e = load i64, i64* %s 1778 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1779 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1780 ret <2 x i64> %v2 1781} 1782 1783define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) { 1784; CHECK-LABEL: load_sext_v2i64_with_folded_offset: 1785; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128) 1786; CHECK-NEXT: # %bb.0: 1787; CHECK-NEXT: local.get 0 1788; CHECK-NEXT: i64x2.load32x2_s 16 1789; CHECK-NEXT: # fallthrough-return 1790 %q = ptrtoint <2 x i32>* %p to i32 1791 %r = add nuw i32 %q, 16 1792 %s = inttoptr i32 %r to <2 x i32>* 1793 %v = load <2 x i32>, <2 x i32>* %s 1794 %v2 = sext <2 x i32> %v to <2 x i64> 1795 ret <2 x i64> %v2 1796} 1797 1798define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) { 1799; CHECK-LABEL: load_zext_v2i64_with_folded_offset: 1800; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128) 1801; CHECK-NEXT: # %bb.0: 1802; CHECK-NEXT: local.get 0 1803; CHECK-NEXT: i64x2.load32x2_u 16 1804; CHECK-NEXT: # fallthrough-return 1805 %q = ptrtoint <2 x i32>* %p to i32 1806 %r = add nuw i32 %q, 16 1807 %s = inttoptr i32 %r to <2 x i32>* 1808 %v = load <2 x i32>, <2 x i32>* %s 1809 %v2 = zext <2 x i32> %v to <2 x i64> 1810 ret <2 x i64> %v2 1811} 1812 1813define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) { 1814; CHECK-LABEL: load_ext_v2i64_with_folded_offset: 1815; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128) 1816; CHECK-NEXT: # %bb.0: 1817; CHECK-NEXT: local.get 0 1818; CHECK-NEXT: i64x2.load32x2_u 16 1819; CHECK-NEXT: # fallthrough-return 1820 %q = ptrtoint <2 x i32>* %p to i32 1821 %r = add nuw i32 %q, 16 1822 %s = inttoptr i32 %r to <2 x i32>* 1823 %v = load <2 x i32>, <2 x i32>* %s 1824 ret <2 x i32> %v 1825} 1826 1827define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) { 1828; CHECK-LABEL: load_v2i64_with_folded_gep_offset: 1829; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128) 1830; CHECK-NEXT: # %bb.0: 1831; CHECK-NEXT: local.get 0 1832; CHECK-NEXT: v128.load 16 1833; CHECK-NEXT: # fallthrough-return 1834 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 1835 %v = load <2 x i64>, <2 x i64>* %s 1836 ret <2 x i64> %v 1837} 1838 1839define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { 1840; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: 1841; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) 1842; CHECK-NEXT: # %bb.0: 1843; CHECK-NEXT: local.get 0 1844; CHECK-NEXT: v64x2.load_splat 8 1845; CHECK-NEXT: # fallthrough-return 1846 %s = getelementptr inbounds i64, i64* %p, i32 1 1847 %e = load i64, i64* %s 1848 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1849 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1850 ret <2 x i64> %v2 1851} 1852 1853define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 1854; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: 1855; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128) 1856; CHECK-NEXT: # %bb.0: 1857; CHECK-NEXT: local.get 0 1858; CHECK-NEXT: i64x2.load32x2_s 8 1859; CHECK-NEXT: # fallthrough-return 1860 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 1861 %v = load <2 x i32>, <2 x i32>* %s 1862 %v2 = sext <2 x i32> %v to <2 x i64> 1863 ret <2 x i64> %v2 1864} 1865 1866define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 1867; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: 1868; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128) 1869; CHECK-NEXT: # %bb.0: 1870; CHECK-NEXT: local.get 0 1871; CHECK-NEXT: i64x2.load32x2_u 8 1872; CHECK-NEXT: # fallthrough-return 1873 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 1874 %v = load <2 x i32>, <2 x i32>* %s 1875 %v2 = zext <2 x i32> %v to <2 x i64> 1876 ret <2 x i64> %v2 1877} 1878 1879define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 1880; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: 1881; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128) 1882; CHECK-NEXT: # %bb.0: 1883; CHECK-NEXT: local.get 0 1884; CHECK-NEXT: i64x2.load32x2_u 8 1885; CHECK-NEXT: # fallthrough-return 1886 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 1887 %v = load <2 x i32>, <2 x i32>* %s 1888 ret <2 x i32> %v 1889} 1890 1891define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) { 1892; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: 1893; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1894; CHECK-NEXT: # %bb.0: 1895; CHECK-NEXT: local.get 0 1896; CHECK-NEXT: i32.const -16 1897; CHECK-NEXT: i32.add 1898; CHECK-NEXT: v128.load 0 1899; CHECK-NEXT: # fallthrough-return 1900 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 1901 %v = load <2 x i64>, <2 x i64>* %s 1902 ret <2 x i64> %v 1903} 1904 1905define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { 1906; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: 1907; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1908; CHECK-NEXT: # %bb.0: 1909; CHECK-NEXT: local.get 0 1910; CHECK-NEXT: i32.const -8 1911; CHECK-NEXT: i32.add 1912; CHECK-NEXT: v64x2.load_splat 0 1913; CHECK-NEXT: # fallthrough-return 1914 %s = getelementptr inbounds i64, i64* %p, i32 -1 1915 %e = load i64, i64* %s 1916 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1917 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1918 ret <2 x i64> %v2 1919} 1920 1921define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 1922; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: 1923; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1924; CHECK-NEXT: # %bb.0: 1925; CHECK-NEXT: local.get 0 1926; CHECK-NEXT: i32.const -8 1927; CHECK-NEXT: i32.add 1928; CHECK-NEXT: i64x2.load32x2_s 0 1929; CHECK-NEXT: # fallthrough-return 1930 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 1931 %v = load <2 x i32>, <2 x i32>* %s 1932 %v2 = sext <2 x i32> %v to <2 x i64> 1933 ret <2 x i64> %v2 1934} 1935 1936define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 1937; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: 1938; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1939; CHECK-NEXT: # %bb.0: 1940; CHECK-NEXT: local.get 0 1941; CHECK-NEXT: i32.const -8 1942; CHECK-NEXT: i32.add 1943; CHECK-NEXT: i64x2.load32x2_u 0 1944; CHECK-NEXT: # fallthrough-return 1945 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 1946 %v = load <2 x i32>, <2 x i32>* %s 1947 %v2 = zext <2 x i32> %v to <2 x i64> 1948 ret <2 x i64> %v2 1949} 1950 1951define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 1952; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: 1953; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1954; CHECK-NEXT: # %bb.0: 1955; CHECK-NEXT: local.get 0 1956; CHECK-NEXT: i32.const -8 1957; CHECK-NEXT: i32.add 1958; CHECK-NEXT: i64x2.load32x2_u 0 1959; CHECK-NEXT: # fallthrough-return 1960 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 1961 %v = load <2 x i32>, <2 x i32>* %s 1962 ret <2 x i32> %v 1963} 1964 1965define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { 1966; CHECK-LABEL: load_v2i64_with_unfolded_offset: 1967; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128) 1968; CHECK-NEXT: # %bb.0: 1969; CHECK-NEXT: local.get 0 1970; CHECK-NEXT: i32.const 16 1971; CHECK-NEXT: i32.add 1972; CHECK-NEXT: v128.load 0 1973; CHECK-NEXT: # fallthrough-return 1974 %q = ptrtoint <2 x i64>* %p to i32 1975 %r = add nsw i32 %q, 16 1976 %s = inttoptr i32 %r to <2 x i64>* 1977 %v = load <2 x i64>, <2 x i64>* %s 1978 ret <2 x i64> %v 1979} 1980 1981define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { 1982; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: 1983; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128) 1984; CHECK-NEXT: # %bb.0: 1985; CHECK-NEXT: local.get 0 1986; CHECK-NEXT: i32.const 16 1987; CHECK-NEXT: i32.add 1988; CHECK-NEXT: v64x2.load_splat 0 1989; CHECK-NEXT: # fallthrough-return 1990 %q = ptrtoint i64* %p to i32 1991 %r = add nsw i32 %q, 16 1992 %s = inttoptr i32 %r to i64* 1993 %e = load i64, i64* %s 1994 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1995 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1996 ret <2 x i64> %v2 1997} 1998 1999define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2000; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: 2001; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128) 2002; CHECK-NEXT: # %bb.0: 2003; CHECK-NEXT: local.get 0 2004; CHECK-NEXT: i32.const 16 2005; CHECK-NEXT: i32.add 2006; CHECK-NEXT: i64x2.load32x2_s 0 2007; CHECK-NEXT: # fallthrough-return 2008 %q = ptrtoint <2 x i32>* %p to i32 2009 %r = add nsw i32 %q, 16 2010 %s = inttoptr i32 %r to <2 x i32>* 2011 %v = load <2 x i32>, <2 x i32>* %s 2012 %v2 = sext <2 x i32> %v to <2 x i64> 2013 ret <2 x i64> %v2 2014} 2015 2016define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2017; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: 2018; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128) 2019; CHECK-NEXT: # %bb.0: 2020; CHECK-NEXT: local.get 0 2021; CHECK-NEXT: i32.const 16 2022; CHECK-NEXT: i32.add 2023; CHECK-NEXT: i64x2.load32x2_u 0 2024; CHECK-NEXT: # fallthrough-return 2025 %q = ptrtoint <2 x i32>* %p to i32 2026 %r = add nsw i32 %q, 16 2027 %s = inttoptr i32 %r to <2 x i32>* 2028 %v = load <2 x i32>, <2 x i32>* %s 2029 %v2 = zext <2 x i32> %v to <2 x i64> 2030 ret <2 x i64> %v2 2031} 2032 2033define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2034; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: 2035; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128) 2036; CHECK-NEXT: # %bb.0: 2037; CHECK-NEXT: local.get 0 2038; CHECK-NEXT: i32.const 16 2039; CHECK-NEXT: i32.add 2040; CHECK-NEXT: i64x2.load32x2_u 0 2041; CHECK-NEXT: # fallthrough-return 2042 %q = ptrtoint <2 x i32>* %p to i32 2043 %r = add nsw i32 %q, 16 2044 %s = inttoptr i32 %r to <2 x i32>* 2045 %v = load <2 x i32>, <2 x i32>* %s 2046 ret <2 x i32> %v 2047} 2048 2049define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) { 2050; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: 2051; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2052; CHECK-NEXT: # %bb.0: 2053; CHECK-NEXT: local.get 0 2054; CHECK-NEXT: i32.const 16 2055; CHECK-NEXT: i32.add 2056; CHECK-NEXT: v128.load 0 2057; CHECK-NEXT: # fallthrough-return 2058 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2059 %v = load <2 x i64>, <2 x i64>* %s 2060 ret <2 x i64> %v 2061} 2062 2063define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { 2064; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: 2065; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2066; CHECK-NEXT: # %bb.0: 2067; CHECK-NEXT: local.get 0 2068; CHECK-NEXT: i32.const 8 2069; CHECK-NEXT: i32.add 2070; CHECK-NEXT: v64x2.load_splat 0 2071; CHECK-NEXT: # fallthrough-return 2072 %s = getelementptr i64, i64* %p, i32 1 2073 %e = load i64, i64* %s 2074 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2075 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2076 ret <2 x i64> %v2 2077} 2078 2079define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2080; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: 2081; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2082; CHECK-NEXT: # %bb.0: 2083; CHECK-NEXT: local.get 0 2084; CHECK-NEXT: i32.const 8 2085; CHECK-NEXT: i32.add 2086; CHECK-NEXT: i64x2.load32x2_s 0 2087; CHECK-NEXT: # fallthrough-return 2088 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2089 %v = load <2 x i32>, <2 x i32>* %s 2090 %v2 = sext <2 x i32> %v to <2 x i64> 2091 ret <2 x i64> %v2 2092} 2093 2094define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2095; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: 2096; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2097; CHECK-NEXT: # %bb.0: 2098; CHECK-NEXT: local.get 0 2099; CHECK-NEXT: i32.const 8 2100; CHECK-NEXT: i32.add 2101; CHECK-NEXT: i64x2.load32x2_u 0 2102; CHECK-NEXT: # fallthrough-return 2103 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2104 %v = load <2 x i32>, <2 x i32>* %s 2105 %v2 = zext <2 x i32> %v to <2 x i64> 2106 ret <2 x i64> %v2 2107} 2108 2109define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2110; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: 2111; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2112; CHECK-NEXT: # %bb.0: 2113; CHECK-NEXT: local.get 0 2114; CHECK-NEXT: i32.const 8 2115; CHECK-NEXT: i32.add 2116; CHECK-NEXT: i64x2.load32x2_u 0 2117; CHECK-NEXT: # fallthrough-return 2118 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2119 %v = load <2 x i32>, <2 x i32>* %s 2120 ret <2 x i32> %v 2121} 2122 2123define <2 x i64> @load_v2i64_from_numeric_address() { 2124; CHECK-LABEL: load_v2i64_from_numeric_address: 2125; CHECK: .functype load_v2i64_from_numeric_address () -> (v128) 2126; CHECK-NEXT: # %bb.0: 2127; CHECK-NEXT: i32.const 0 2128; CHECK-NEXT: v128.load 32 2129; CHECK-NEXT: # fallthrough-return 2130 %s = inttoptr i32 32 to <2 x i64>* 2131 %v = load <2 x i64>, <2 x i64>* %s 2132 ret <2 x i64> %v 2133} 2134 2135define <2 x i64> @load_splat_v2i64_from_numeric_address() { 2136; CHECK-LABEL: load_splat_v2i64_from_numeric_address: 2137; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) 2138; CHECK-NEXT: # %bb.0: 2139; CHECK-NEXT: i32.const 0 2140; CHECK-NEXT: v64x2.load_splat 32 2141; CHECK-NEXT: # fallthrough-return 2142 %s = inttoptr i32 32 to i64* 2143 %e = load i64, i64* %s 2144 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2145 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2146 ret <2 x i64> %v2 2147} 2148 2149define <2 x i64> @load_sext_v2i64_from_numeric_address() { 2150; CHECK-LABEL: load_sext_v2i64_from_numeric_address: 2151; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128) 2152; CHECK-NEXT: # %bb.0: 2153; CHECK-NEXT: i32.const 0 2154; CHECK-NEXT: i64x2.load32x2_s 32 2155; CHECK-NEXT: # fallthrough-return 2156 %s = inttoptr i32 32 to <2 x i32>* 2157 %v = load <2 x i32>, <2 x i32>* %s 2158 %v2 = sext <2 x i32> %v to <2 x i64> 2159 ret <2 x i64> %v2 2160} 2161 2162define <2 x i64> @load_zext_v2i64_from_numeric_address() { 2163; CHECK-LABEL: load_zext_v2i64_from_numeric_address: 2164; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128) 2165; CHECK-NEXT: # %bb.0: 2166; CHECK-NEXT: i32.const 0 2167; CHECK-NEXT: i64x2.load32x2_u 32 2168; CHECK-NEXT: # fallthrough-return 2169 %s = inttoptr i32 32 to <2 x i32>* 2170 %v = load <2 x i32>, <2 x i32>* %s 2171 %v2 = zext <2 x i32> %v to <2 x i64> 2172 ret <2 x i64> %v2 2173} 2174 2175define <2 x i32> @load_ext_v2i64_from_numeric_address() { 2176; CHECK-LABEL: load_ext_v2i64_from_numeric_address: 2177; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128) 2178; CHECK-NEXT: # %bb.0: 2179; CHECK-NEXT: i32.const 0 2180; CHECK-NEXT: i64x2.load32x2_u 32 2181; CHECK-NEXT: # fallthrough-return 2182 %s = inttoptr i32 32 to <2 x i32>* 2183 %v = load <2 x i32>, <2 x i32>* %s 2184 ret <2 x i32> %v 2185} 2186 2187@gv_v2i64 = global <2 x i64> <i64 42, i64 42> 2188define <2 x i64> @load_v2i64_from_global_address() { 2189; CHECK-LABEL: load_v2i64_from_global_address: 2190; CHECK: .functype load_v2i64_from_global_address () -> (v128) 2191; CHECK-NEXT: # %bb.0: 2192; CHECK-NEXT: i32.const 0 2193; CHECK-NEXT: v128.load gv_v2i64 2194; CHECK-NEXT: # fallthrough-return 2195 %v = load <2 x i64>, <2 x i64>* @gv_v2i64 2196 ret <2 x i64> %v 2197} 2198 2199@gv_i64 = global i64 42 2200define <2 x i64> @load_splat_v2i64_from_global_address() { 2201; CHECK-LABEL: load_splat_v2i64_from_global_address: 2202; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) 2203; CHECK-NEXT: # %bb.0: 2204; CHECK-NEXT: i32.const 0 2205; CHECK-NEXT: v64x2.load_splat gv_i64 2206; CHECK-NEXT: # fallthrough-return 2207 %e = load i64, i64* @gv_i64 2208 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2209 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2210 ret <2 x i64> %v2 2211} 2212 2213@gv_v2i32 = global <2 x i32> <i32 42, i32 42> 2214define <2 x i64> @load_sext_v2i64_from_global_address() { 2215; CHECK-LABEL: load_sext_v2i64_from_global_address: 2216; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128) 2217; CHECK-NEXT: # %bb.0: 2218; CHECK-NEXT: i32.const 0 2219; CHECK-NEXT: i64x2.load32x2_s gv_v2i32 2220; CHECK-NEXT: # fallthrough-return 2221 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2222 %v2 = sext <2 x i32> %v to <2 x i64> 2223 ret <2 x i64> %v2 2224} 2225 2226define <2 x i64> @load_zext_v2i64_from_global_address() { 2227; CHECK-LABEL: load_zext_v2i64_from_global_address: 2228; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128) 2229; CHECK-NEXT: # %bb.0: 2230; CHECK-NEXT: i32.const 0 2231; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2232; CHECK-NEXT: # fallthrough-return 2233 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2234 %v2 = zext <2 x i32> %v to <2 x i64> 2235 ret <2 x i64> %v2 2236} 2237 2238define <2 x i32> @load_ext_v2i64_from_global_address() { 2239; CHECK-LABEL: load_ext_v2i64_from_global_address: 2240; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128) 2241; CHECK-NEXT: # %bb.0: 2242; CHECK-NEXT: i32.const 0 2243; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2244; CHECK-NEXT: # fallthrough-return 2245 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2246 ret <2 x i32> %v 2247} 2248 2249define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) { 2250; CHECK-LABEL: store_v2i64: 2251; CHECK: .functype store_v2i64 (v128, i32) -> () 2252; CHECK-NEXT: # %bb.0: 2253; CHECK-NEXT: local.get 1 2254; CHECK-NEXT: local.get 0 2255; CHECK-NEXT: v128.store 0 2256; CHECK-NEXT: # fallthrough-return 2257 store <2 x i64> %v , <2 x i64>* %p 2258 ret void 2259} 2260 2261define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { 2262; CHECK-LABEL: store_v2i64_with_folded_offset: 2263; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> () 2264; CHECK-NEXT: # %bb.0: 2265; CHECK-NEXT: local.get 1 2266; CHECK-NEXT: local.get 0 2267; CHECK-NEXT: v128.store 16 2268; CHECK-NEXT: # fallthrough-return 2269 %q = ptrtoint <2 x i64>* %p to i32 2270 %r = add nuw i32 %q, 16 2271 %s = inttoptr i32 %r to <2 x i64>* 2272 store <2 x i64> %v , <2 x i64>* %s 2273 ret void 2274} 2275 2276define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2277; CHECK-LABEL: store_v2i64_with_folded_gep_offset: 2278; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> () 2279; CHECK-NEXT: # %bb.0: 2280; CHECK-NEXT: local.get 1 2281; CHECK-NEXT: local.get 0 2282; CHECK-NEXT: v128.store 16 2283; CHECK-NEXT: # fallthrough-return 2284 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2285 store <2 x i64> %v , <2 x i64>* %s 2286 ret void 2287} 2288 2289define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) { 2290; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: 2291; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> () 2292; CHECK-NEXT: # %bb.0: 2293; CHECK-NEXT: local.get 1 2294; CHECK-NEXT: i32.const -16 2295; CHECK-NEXT: i32.add 2296; CHECK-NEXT: local.get 0 2297; CHECK-NEXT: v128.store 0 2298; CHECK-NEXT: # fallthrough-return 2299 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2300 store <2 x i64> %v , <2 x i64>* %s 2301 ret void 2302} 2303 2304define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) { 2305; CHECK-LABEL: store_v2i64_with_unfolded_offset: 2306; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> () 2307; CHECK-NEXT: # %bb.0: 2308; CHECK-NEXT: local.get 1 2309; CHECK-NEXT: i32.const 16 2310; CHECK-NEXT: i32.add 2311; CHECK-NEXT: local.get 0 2312; CHECK-NEXT: v128.store 0 2313; CHECK-NEXT: # fallthrough-return 2314 %q = ptrtoint <2 x i64>* %p to i32 2315 %r = add nsw i32 %q, 16 2316 %s = inttoptr i32 %r to <2 x i64>* 2317 store <2 x i64> %v , <2 x i64>* %s 2318 ret void 2319} 2320 2321define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2322; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: 2323; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> () 2324; CHECK-NEXT: # %bb.0: 2325; CHECK-NEXT: local.get 1 2326; CHECK-NEXT: i32.const 16 2327; CHECK-NEXT: i32.add 2328; CHECK-NEXT: local.get 0 2329; CHECK-NEXT: v128.store 0 2330; CHECK-NEXT: # fallthrough-return 2331 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2332 store <2 x i64> %v , <2 x i64>* %s 2333 ret void 2334} 2335 2336define void @store_v2i64_to_numeric_address(<2 x i64> %v) { 2337; CHECK-LABEL: store_v2i64_to_numeric_address: 2338; CHECK: .functype store_v2i64_to_numeric_address (v128) -> () 2339; CHECK-NEXT: # %bb.0: 2340; CHECK-NEXT: i32.const 0 2341; CHECK-NEXT: local.get 0 2342; CHECK-NEXT: v128.store 32 2343; CHECK-NEXT: # fallthrough-return 2344 %s = inttoptr i32 32 to <2 x i64>* 2345 store <2 x i64> %v , <2 x i64>* %s 2346 ret void 2347} 2348 2349define void @store_v2i64_to_global_address(<2 x i64> %v) { 2350; CHECK-LABEL: store_v2i64_to_global_address: 2351; CHECK: .functype store_v2i64_to_global_address (v128) -> () 2352; CHECK-NEXT: # %bb.0: 2353; CHECK-NEXT: i32.const 0 2354; CHECK-NEXT: local.get 0 2355; CHECK-NEXT: v128.store gv_v2i64 2356; CHECK-NEXT: # fallthrough-return 2357 store <2 x i64> %v , <2 x i64>* @gv_v2i64 2358 ret void 2359} 2360 2361; ============================================================================== 2362; 4 x float 2363; ============================================================================== 2364define <4 x float> @load_v4f32(<4 x float>* %p) { 2365; CHECK-LABEL: load_v4f32: 2366; CHECK: .functype load_v4f32 (i32) -> (v128) 2367; CHECK-NEXT: # %bb.0: 2368; CHECK-NEXT: local.get 0 2369; CHECK-NEXT: v128.load 0 2370; CHECK-NEXT: # fallthrough-return 2371 %v = load <4 x float>, <4 x float>* %p 2372 ret <4 x float> %v 2373} 2374 2375define <4 x float> @load_splat_v4f32(float* %p) { 2376; CHECK-LABEL: load_splat_v4f32: 2377; CHECK: .functype load_splat_v4f32 (i32) -> (v128) 2378; CHECK-NEXT: # %bb.0: 2379; CHECK-NEXT: local.get 0 2380; CHECK-NEXT: v32x4.load_splat 0 2381; CHECK-NEXT: # fallthrough-return 2382 %e = load float, float* %p 2383 %v1 = insertelement <4 x float> undef, float %e, i32 0 2384 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2385 ret <4 x float> %v2 2386} 2387 2388define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { 2389; CHECK-LABEL: load_v4f32_with_folded_offset: 2390; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128) 2391; CHECK-NEXT: # %bb.0: 2392; CHECK-NEXT: local.get 0 2393; CHECK-NEXT: v128.load 16 2394; CHECK-NEXT: # fallthrough-return 2395 %q = ptrtoint <4 x float>* %p to i32 2396 %r = add nuw i32 %q, 16 2397 %s = inttoptr i32 %r to <4 x float>* 2398 %v = load <4 x float>, <4 x float>* %s 2399 ret <4 x float> %v 2400} 2401 2402define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { 2403; CHECK-LABEL: load_splat_v4f32_with_folded_offset: 2404; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) 2405; CHECK-NEXT: # %bb.0: 2406; CHECK-NEXT: local.get 0 2407; CHECK-NEXT: v32x4.load_splat 16 2408; CHECK-NEXT: # fallthrough-return 2409 %q = ptrtoint float* %p to i32 2410 %r = add nuw i32 %q, 16 2411 %s = inttoptr i32 %r to float* 2412 %e = load float, float* %s 2413 %v1 = insertelement <4 x float> undef, float %e, i32 0 2414 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2415 ret <4 x float> %v2 2416} 2417 2418define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) { 2419; CHECK-LABEL: load_v4f32_with_folded_gep_offset: 2420; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128) 2421; CHECK-NEXT: # %bb.0: 2422; CHECK-NEXT: local.get 0 2423; CHECK-NEXT: v128.load 16 2424; CHECK-NEXT: # fallthrough-return 2425 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2426 %v = load <4 x float>, <4 x float>* %s 2427 ret <4 x float> %v 2428} 2429 2430define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { 2431; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: 2432; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) 2433; CHECK-NEXT: # %bb.0: 2434; CHECK-NEXT: local.get 0 2435; CHECK-NEXT: v32x4.load_splat 4 2436; CHECK-NEXT: # fallthrough-return 2437 %s = getelementptr inbounds float, float* %p, i32 1 2438 %e = load float, float* %s 2439 %v1 = insertelement <4 x float> undef, float %e, i32 0 2440 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2441 ret <4 x float> %v2 2442} 2443 2444define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) { 2445; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: 2446; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2447; CHECK-NEXT: # %bb.0: 2448; CHECK-NEXT: local.get 0 2449; CHECK-NEXT: i32.const -16 2450; CHECK-NEXT: i32.add 2451; CHECK-NEXT: v128.load 0 2452; CHECK-NEXT: # fallthrough-return 2453 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2454 %v = load <4 x float>, <4 x float>* %s 2455 ret <4 x float> %v 2456} 2457 2458define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) { 2459; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: 2460; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2461; CHECK-NEXT: # %bb.0: 2462; CHECK-NEXT: local.get 0 2463; CHECK-NEXT: i32.const -4 2464; CHECK-NEXT: i32.add 2465; CHECK-NEXT: v32x4.load_splat 0 2466; CHECK-NEXT: # fallthrough-return 2467 %s = getelementptr inbounds float, float* %p, i32 -1 2468 %e = load float, float* %s 2469 %v1 = insertelement <4 x float> undef, float %e, i32 0 2470 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2471 ret <4 x float> %v2 2472} 2473 2474define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { 2475; CHECK-LABEL: load_v4f32_with_unfolded_offset: 2476; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128) 2477; CHECK-NEXT: # %bb.0: 2478; CHECK-NEXT: local.get 0 2479; CHECK-NEXT: i32.const 16 2480; CHECK-NEXT: i32.add 2481; CHECK-NEXT: v128.load 0 2482; CHECK-NEXT: # fallthrough-return 2483 %q = ptrtoint <4 x float>* %p to i32 2484 %r = add nsw i32 %q, 16 2485 %s = inttoptr i32 %r to <4 x float>* 2486 %v = load <4 x float>, <4 x float>* %s 2487 ret <4 x float> %v 2488} 2489 2490define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { 2491; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: 2492; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128) 2493; CHECK-NEXT: # %bb.0: 2494; CHECK-NEXT: local.get 0 2495; CHECK-NEXT: i32.const 16 2496; CHECK-NEXT: i32.add 2497; CHECK-NEXT: v32x4.load_splat 0 2498; CHECK-NEXT: # fallthrough-return 2499 %q = ptrtoint float* %p to i32 2500 %r = add nsw i32 %q, 16 2501 %s = inttoptr i32 %r to float* 2502 %e = load float, float* %s 2503 %v1 = insertelement <4 x float> undef, float %e, i32 0 2504 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2505 ret <4 x float> %v2 2506} 2507 2508define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) { 2509; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: 2510; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2511; CHECK-NEXT: # %bb.0: 2512; CHECK-NEXT: local.get 0 2513; CHECK-NEXT: i32.const 16 2514; CHECK-NEXT: i32.add 2515; CHECK-NEXT: v128.load 0 2516; CHECK-NEXT: # fallthrough-return 2517 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2518 %v = load <4 x float>, <4 x float>* %s 2519 ret <4 x float> %v 2520} 2521 2522define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { 2523; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: 2524; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2525; CHECK-NEXT: # %bb.0: 2526; CHECK-NEXT: local.get 0 2527; CHECK-NEXT: i32.const 4 2528; CHECK-NEXT: i32.add 2529; CHECK-NEXT: v32x4.load_splat 0 2530; CHECK-NEXT: # fallthrough-return 2531 %s = getelementptr float, float* %p, i32 1 2532 %e = load float, float* %s 2533 %v1 = insertelement <4 x float> undef, float %e, i32 0 2534 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2535 ret <4 x float> %v2 2536} 2537 2538define <4 x float> @load_v4f32_from_numeric_address() { 2539; CHECK-LABEL: load_v4f32_from_numeric_address: 2540; CHECK: .functype load_v4f32_from_numeric_address () -> (v128) 2541; CHECK-NEXT: # %bb.0: 2542; CHECK-NEXT: i32.const 0 2543; CHECK-NEXT: v128.load 32 2544; CHECK-NEXT: # fallthrough-return 2545 %s = inttoptr i32 32 to <4 x float>* 2546 %v = load <4 x float>, <4 x float>* %s 2547 ret <4 x float> %v 2548} 2549 2550define <4 x float> @load_splat_v4f32_from_numeric_address() { 2551; CHECK-LABEL: load_splat_v4f32_from_numeric_address: 2552; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) 2553; CHECK-NEXT: # %bb.0: 2554; CHECK-NEXT: i32.const 0 2555; CHECK-NEXT: v32x4.load_splat 32 2556; CHECK-NEXT: # fallthrough-return 2557 %s = inttoptr i32 32 to float* 2558 %e = load float, float* %s 2559 %v1 = insertelement <4 x float> undef, float %e, i32 0 2560 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2561 ret <4 x float> %v2 2562} 2563 2564@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.> 2565define <4 x float> @load_v4f32_from_global_address() { 2566; CHECK-LABEL: load_v4f32_from_global_address: 2567; CHECK: .functype load_v4f32_from_global_address () -> (v128) 2568; CHECK-NEXT: # %bb.0: 2569; CHECK-NEXT: i32.const 0 2570; CHECK-NEXT: v128.load gv_v4f32 2571; CHECK-NEXT: # fallthrough-return 2572 %v = load <4 x float>, <4 x float>* @gv_v4f32 2573 ret <4 x float> %v 2574} 2575 2576@gv_f32 = global float 42. 2577define <4 x float> @load_splat_v4f32_from_global_address() { 2578; CHECK-LABEL: load_splat_v4f32_from_global_address: 2579; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) 2580; CHECK-NEXT: # %bb.0: 2581; CHECK-NEXT: i32.const 0 2582; CHECK-NEXT: v32x4.load_splat gv_f32 2583; CHECK-NEXT: # fallthrough-return 2584 %e = load float, float* @gv_f32 2585 %v1 = insertelement <4 x float> undef, float %e, i32 0 2586 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2587 ret <4 x float> %v2 2588} 2589 2590define void @store_v4f32(<4 x float> %v, <4 x float>* %p) { 2591; CHECK-LABEL: store_v4f32: 2592; CHECK: .functype store_v4f32 (v128, i32) -> () 2593; CHECK-NEXT: # %bb.0: 2594; CHECK-NEXT: local.get 1 2595; CHECK-NEXT: local.get 0 2596; CHECK-NEXT: v128.store 0 2597; CHECK-NEXT: # fallthrough-return 2598 store <4 x float> %v , <4 x float>* %p 2599 ret void 2600} 2601 2602define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { 2603; CHECK-LABEL: store_v4f32_with_folded_offset: 2604; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> () 2605; CHECK-NEXT: # %bb.0: 2606; CHECK-NEXT: local.get 1 2607; CHECK-NEXT: local.get 0 2608; CHECK-NEXT: v128.store 16 2609; CHECK-NEXT: # fallthrough-return 2610 %q = ptrtoint <4 x float>* %p to i32 2611 %r = add nuw i32 %q, 16 2612 %s = inttoptr i32 %r to <4 x float>* 2613 store <4 x float> %v , <4 x float>* %s 2614 ret void 2615} 2616 2617define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2618; CHECK-LABEL: store_v4f32_with_folded_gep_offset: 2619; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> () 2620; CHECK-NEXT: # %bb.0: 2621; CHECK-NEXT: local.get 1 2622; CHECK-NEXT: local.get 0 2623; CHECK-NEXT: v128.store 16 2624; CHECK-NEXT: # fallthrough-return 2625 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2626 store <4 x float> %v , <4 x float>* %s 2627 ret void 2628} 2629 2630define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) { 2631; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: 2632; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> () 2633; CHECK-NEXT: # %bb.0: 2634; CHECK-NEXT: local.get 1 2635; CHECK-NEXT: i32.const -16 2636; CHECK-NEXT: i32.add 2637; CHECK-NEXT: local.get 0 2638; CHECK-NEXT: v128.store 0 2639; CHECK-NEXT: # fallthrough-return 2640 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2641 store <4 x float> %v , <4 x float>* %s 2642 ret void 2643} 2644 2645define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) { 2646; CHECK-LABEL: store_v4f32_with_unfolded_offset: 2647; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> () 2648; CHECK-NEXT: # %bb.0: 2649; CHECK-NEXT: local.get 1 2650; CHECK-NEXT: i32.const 16 2651; CHECK-NEXT: i32.add 2652; CHECK-NEXT: local.get 0 2653; CHECK-NEXT: v128.store 0 2654; CHECK-NEXT: # fallthrough-return 2655 %q = ptrtoint <4 x float>* %p to i32 2656 %r = add nsw i32 %q, 16 2657 %s = inttoptr i32 %r to <4 x float>* 2658 store <4 x float> %v , <4 x float>* %s 2659 ret void 2660} 2661 2662define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2663; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: 2664; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> () 2665; CHECK-NEXT: # %bb.0: 2666; CHECK-NEXT: local.get 1 2667; CHECK-NEXT: i32.const 16 2668; CHECK-NEXT: i32.add 2669; CHECK-NEXT: local.get 0 2670; CHECK-NEXT: v128.store 0 2671; CHECK-NEXT: # fallthrough-return 2672 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2673 store <4 x float> %v , <4 x float>* %s 2674 ret void 2675} 2676 2677define void @store_v4f32_to_numeric_address(<4 x float> %v) { 2678; CHECK-LABEL: store_v4f32_to_numeric_address: 2679; CHECK: .functype store_v4f32_to_numeric_address (v128) -> () 2680; CHECK-NEXT: # %bb.0: 2681; CHECK-NEXT: i32.const 0 2682; CHECK-NEXT: local.get 0 2683; CHECK-NEXT: v128.store 32 2684; CHECK-NEXT: # fallthrough-return 2685 %s = inttoptr i32 32 to <4 x float>* 2686 store <4 x float> %v , <4 x float>* %s 2687 ret void 2688} 2689 2690define void @store_v4f32_to_global_address(<4 x float> %v) { 2691; CHECK-LABEL: store_v4f32_to_global_address: 2692; CHECK: .functype store_v4f32_to_global_address (v128) -> () 2693; CHECK-NEXT: # %bb.0: 2694; CHECK-NEXT: i32.const 0 2695; CHECK-NEXT: local.get 0 2696; CHECK-NEXT: v128.store gv_v4f32 2697; CHECK-NEXT: # fallthrough-return 2698 store <4 x float> %v , <4 x float>* @gv_v4f32 2699 ret void 2700} 2701 2702; ============================================================================== 2703; 2 x double 2704; ============================================================================== 2705define <2 x double> @load_v2f64(<2 x double>* %p) { 2706; CHECK-LABEL: load_v2f64: 2707; CHECK: .functype load_v2f64 (i32) -> (v128) 2708; CHECK-NEXT: # %bb.0: 2709; CHECK-NEXT: local.get 0 2710; CHECK-NEXT: v128.load 0 2711; CHECK-NEXT: # fallthrough-return 2712 %v = load <2 x double>, <2 x double>* %p 2713 ret <2 x double> %v 2714} 2715 2716define <2 x double> @load_splat_v2f64(double* %p) { 2717; CHECK-LABEL: load_splat_v2f64: 2718; CHECK: .functype load_splat_v2f64 (i32) -> (v128) 2719; CHECK-NEXT: # %bb.0: 2720; CHECK-NEXT: local.get 0 2721; CHECK-NEXT: v64x2.load_splat 0 2722; CHECK-NEXT: # fallthrough-return 2723 %e = load double, double* %p 2724 %v1 = insertelement <2 x double> undef, double %e, i32 0 2725 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2726 ret <2 x double> %v2 2727} 2728 2729define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { 2730; CHECK-LABEL: load_v2f64_with_folded_offset: 2731; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128) 2732; CHECK-NEXT: # %bb.0: 2733; CHECK-NEXT: local.get 0 2734; CHECK-NEXT: v128.load 16 2735; CHECK-NEXT: # fallthrough-return 2736 %q = ptrtoint <2 x double>* %p to i32 2737 %r = add nuw i32 %q, 16 2738 %s = inttoptr i32 %r to <2 x double>* 2739 %v = load <2 x double>, <2 x double>* %s 2740 ret <2 x double> %v 2741} 2742 2743define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { 2744; CHECK-LABEL: load_splat_v2f64_with_folded_offset: 2745; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) 2746; CHECK-NEXT: # %bb.0: 2747; CHECK-NEXT: local.get 0 2748; CHECK-NEXT: v64x2.load_splat 16 2749; CHECK-NEXT: # fallthrough-return 2750 %q = ptrtoint double* %p to i32 2751 %r = add nuw i32 %q, 16 2752 %s = inttoptr i32 %r to double* 2753 %e = load double, double* %s 2754 %v1 = insertelement <2 x double> undef, double %e, i32 0 2755 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2756 ret <2 x double> %v2 2757} 2758 2759define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) { 2760; CHECK-LABEL: load_v2f64_with_folded_gep_offset: 2761; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128) 2762; CHECK-NEXT: # %bb.0: 2763; CHECK-NEXT: local.get 0 2764; CHECK-NEXT: v128.load 16 2765; CHECK-NEXT: # fallthrough-return 2766 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 2767 %v = load <2 x double>, <2 x double>* %s 2768 ret <2 x double> %v 2769} 2770 2771define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { 2772; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: 2773; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) 2774; CHECK-NEXT: # %bb.0: 2775; CHECK-NEXT: local.get 0 2776; CHECK-NEXT: v64x2.load_splat 8 2777; CHECK-NEXT: # fallthrough-return 2778 %s = getelementptr inbounds double, double* %p, i32 1 2779 %e = load double, double* %s 2780 %v1 = insertelement <2 x double> undef, double %e, i32 0 2781 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2782 ret <2 x double> %v2 2783} 2784 2785define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) { 2786; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: 2787; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 2788; CHECK-NEXT: # %bb.0: 2789; CHECK-NEXT: local.get 0 2790; CHECK-NEXT: i32.const -16 2791; CHECK-NEXT: i32.add 2792; CHECK-NEXT: v128.load 0 2793; CHECK-NEXT: # fallthrough-return 2794 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 2795 %v = load <2 x double>, <2 x double>* %s 2796 ret <2 x double> %v 2797} 2798 2799define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) { 2800; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: 2801; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 2802; CHECK-NEXT: # %bb.0: 2803; CHECK-NEXT: local.get 0 2804; CHECK-NEXT: i32.const -8 2805; CHECK-NEXT: i32.add 2806; CHECK-NEXT: v64x2.load_splat 0 2807; CHECK-NEXT: # fallthrough-return 2808 %s = getelementptr inbounds double, double* %p, i32 -1 2809 %e = load double, double* %s 2810 %v1 = insertelement <2 x double> undef, double %e, i32 0 2811 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2812 ret <2 x double> %v2 2813} 2814 2815define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { 2816; CHECK-LABEL: load_v2f64_with_unfolded_offset: 2817; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128) 2818; CHECK-NEXT: # %bb.0: 2819; CHECK-NEXT: local.get 0 2820; CHECK-NEXT: i32.const 16 2821; CHECK-NEXT: i32.add 2822; CHECK-NEXT: v128.load 0 2823; CHECK-NEXT: # fallthrough-return 2824 %q = ptrtoint <2 x double>* %p to i32 2825 %r = add nsw i32 %q, 16 2826 %s = inttoptr i32 %r to <2 x double>* 2827 %v = load <2 x double>, <2 x double>* %s 2828 ret <2 x double> %v 2829} 2830 2831define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { 2832; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: 2833; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128) 2834; CHECK-NEXT: # %bb.0: 2835; CHECK-NEXT: local.get 0 2836; CHECK-NEXT: i32.const 16 2837; CHECK-NEXT: i32.add 2838; CHECK-NEXT: v64x2.load_splat 0 2839; CHECK-NEXT: # fallthrough-return 2840 %q = ptrtoint double* %p to i32 2841 %r = add nsw i32 %q, 16 2842 %s = inttoptr i32 %r to double* 2843 %e = load double, double* %s 2844 %v1 = insertelement <2 x double> undef, double %e, i32 0 2845 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2846 ret <2 x double> %v2 2847} 2848 2849define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) { 2850; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: 2851; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128) 2852; CHECK-NEXT: # %bb.0: 2853; CHECK-NEXT: local.get 0 2854; CHECK-NEXT: i32.const 16 2855; CHECK-NEXT: i32.add 2856; CHECK-NEXT: v128.load 0 2857; CHECK-NEXT: # fallthrough-return 2858 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 2859 %v = load <2 x double>, <2 x double>* %s 2860 ret <2 x double> %v 2861} 2862 2863define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { 2864; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: 2865; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128) 2866; CHECK-NEXT: # %bb.0: 2867; CHECK-NEXT: local.get 0 2868; CHECK-NEXT: i32.const 8 2869; CHECK-NEXT: i32.add 2870; CHECK-NEXT: v64x2.load_splat 0 2871; CHECK-NEXT: # fallthrough-return 2872 %s = getelementptr double, double* %p, i32 1 2873 %e = load double, double* %s 2874 %v1 = insertelement <2 x double> undef, double %e, i32 0 2875 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2876 ret <2 x double> %v2 2877} 2878 2879define <2 x double> @load_v2f64_from_numeric_address() { 2880; CHECK-LABEL: load_v2f64_from_numeric_address: 2881; CHECK: .functype load_v2f64_from_numeric_address () -> (v128) 2882; CHECK-NEXT: # %bb.0: 2883; CHECK-NEXT: i32.const 0 2884; CHECK-NEXT: v128.load 32 2885; CHECK-NEXT: # fallthrough-return 2886 %s = inttoptr i32 32 to <2 x double>* 2887 %v = load <2 x double>, <2 x double>* %s 2888 ret <2 x double> %v 2889} 2890 2891define <2 x double> @load_splat_v2f64_from_numeric_address() { 2892; CHECK-LABEL: load_splat_v2f64_from_numeric_address: 2893; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) 2894; CHECK-NEXT: # %bb.0: 2895; CHECK-NEXT: i32.const 0 2896; CHECK-NEXT: v64x2.load_splat 32 2897; CHECK-NEXT: # fallthrough-return 2898 %s = inttoptr i32 32 to double* 2899 %e = load double, double* %s 2900 %v1 = insertelement <2 x double> undef, double %e, i32 0 2901 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2902 ret <2 x double> %v2 2903} 2904 2905@gv_v2f64 = global <2 x double> <double 42., double 42.> 2906define <2 x double> @load_v2f64_from_global_address() { 2907; CHECK-LABEL: load_v2f64_from_global_address: 2908; CHECK: .functype load_v2f64_from_global_address () -> (v128) 2909; CHECK-NEXT: # %bb.0: 2910; CHECK-NEXT: i32.const 0 2911; CHECK-NEXT: v128.load gv_v2f64 2912; CHECK-NEXT: # fallthrough-return 2913 %v = load <2 x double>, <2 x double>* @gv_v2f64 2914 ret <2 x double> %v 2915} 2916 2917@gv_f64 = global double 42. 2918define <2 x double> @load_splat_v2f64_from_global_address() { 2919; CHECK-LABEL: load_splat_v2f64_from_global_address: 2920; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) 2921; CHECK-NEXT: # %bb.0: 2922; CHECK-NEXT: i32.const 0 2923; CHECK-NEXT: v64x2.load_splat gv_f64 2924; CHECK-NEXT: # fallthrough-return 2925 %e = load double, double* @gv_f64 2926 %v1 = insertelement <2 x double> undef, double %e, i32 0 2927 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2928 ret <2 x double> %v2 2929} 2930 2931define void @store_v2f64(<2 x double> %v, <2 x double>* %p) { 2932; CHECK-LABEL: store_v2f64: 2933; CHECK: .functype store_v2f64 (v128, i32) -> () 2934; CHECK-NEXT: # %bb.0: 2935; CHECK-NEXT: local.get 1 2936; CHECK-NEXT: local.get 0 2937; CHECK-NEXT: v128.store 0 2938; CHECK-NEXT: # fallthrough-return 2939 store <2 x double> %v , <2 x double>* %p 2940 ret void 2941} 2942 2943define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { 2944; CHECK-LABEL: store_v2f64_with_folded_offset: 2945; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> () 2946; CHECK-NEXT: # %bb.0: 2947; CHECK-NEXT: local.get 1 2948; CHECK-NEXT: local.get 0 2949; CHECK-NEXT: v128.store 16 2950; CHECK-NEXT: # fallthrough-return 2951 %q = ptrtoint <2 x double>* %p to i32 2952 %r = add nuw i32 %q, 16 2953 %s = inttoptr i32 %r to <2 x double>* 2954 store <2 x double> %v , <2 x double>* %s 2955 ret void 2956} 2957 2958define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) { 2959; CHECK-LABEL: store_v2f64_with_folded_gep_offset: 2960; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> () 2961; CHECK-NEXT: # %bb.0: 2962; CHECK-NEXT: local.get 1 2963; CHECK-NEXT: local.get 0 2964; CHECK-NEXT: v128.store 16 2965; CHECK-NEXT: # fallthrough-return 2966 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 2967 store <2 x double> %v , <2 x double>* %s 2968 ret void 2969} 2970 2971define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) { 2972; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: 2973; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> () 2974; CHECK-NEXT: # %bb.0: 2975; CHECK-NEXT: local.get 1 2976; CHECK-NEXT: i32.const -16 2977; CHECK-NEXT: i32.add 2978; CHECK-NEXT: local.get 0 2979; CHECK-NEXT: v128.store 0 2980; CHECK-NEXT: # fallthrough-return 2981 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 2982 store <2 x double> %v , <2 x double>* %s 2983 ret void 2984} 2985 2986define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) { 2987; CHECK-LABEL: store_v2f64_with_unfolded_offset: 2988; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> () 2989; CHECK-NEXT: # %bb.0: 2990; CHECK-NEXT: local.get 1 2991; CHECK-NEXT: i32.const 16 2992; CHECK-NEXT: i32.add 2993; CHECK-NEXT: local.get 0 2994; CHECK-NEXT: v128.store 0 2995; CHECK-NEXT: # fallthrough-return 2996 %q = ptrtoint <2 x double>* %p to i32 2997 %r = add nsw i32 %q, 16 2998 %s = inttoptr i32 %r to <2 x double>* 2999 store <2 x double> %v , <2 x double>* %s 3000 ret void 3001} 3002 3003define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3004; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: 3005; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> () 3006; CHECK-NEXT: # %bb.0: 3007; CHECK-NEXT: local.get 1 3008; CHECK-NEXT: i32.const 16 3009; CHECK-NEXT: i32.add 3010; CHECK-NEXT: local.get 0 3011; CHECK-NEXT: v128.store 0 3012; CHECK-NEXT: # fallthrough-return 3013 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3014 store <2 x double> %v , <2 x double>* %s 3015 ret void 3016} 3017 3018define void @store_v2f64_to_numeric_address(<2 x double> %v) { 3019; CHECK-LABEL: store_v2f64_to_numeric_address: 3020; CHECK: .functype store_v2f64_to_numeric_address (v128) -> () 3021; CHECK-NEXT: # %bb.0: 3022; CHECK-NEXT: i32.const 0 3023; CHECK-NEXT: local.get 0 3024; CHECK-NEXT: v128.store 32 3025; CHECK-NEXT: # fallthrough-return 3026 %s = inttoptr i32 32 to <2 x double>* 3027 store <2 x double> %v , <2 x double>* %s 3028 ret void 3029} 3030 3031define void @store_v2f64_to_global_address(<2 x double> %v) { 3032; CHECK-LABEL: store_v2f64_to_global_address: 3033; CHECK: .functype store_v2f64_to_global_address (v128) -> () 3034; CHECK-NEXT: # %bb.0: 3035; CHECK-NEXT: i32.const 0 3036; CHECK-NEXT: local.get 0 3037; CHECK-NEXT: v128.store gv_v2f64 3038; CHECK-NEXT: # fallthrough-return 3039 store <2 x double> %v , <2 x double>* @gv_v2f64 3040 ret void 3041} 3042