1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test SIMD loads and stores 5 6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7target triple = "wasm32-unknown-unknown" 8 9; ============================================================================== 10; 16 x i8 11; ============================================================================== 12define <16 x i8> @load_v16i8(<16 x i8>* %p) { 13; CHECK-LABEL: load_v16i8: 14; CHECK: .functype load_v16i8 (i32) -> (v128) 15; CHECK-NEXT: # %bb.0: 16; CHECK-NEXT: local.get 0 17; CHECK-NEXT: v128.load 0 18; CHECK-NEXT: # fallthrough-return 19 %v = load <16 x i8>, <16 x i8>* %p 20 ret <16 x i8> %v 21} 22 23define <16 x i8> @load_splat_v16i8(i8* %p) { 24; CHECK-LABEL: load_splat_v16i8: 25; CHECK: .functype load_splat_v16i8 (i32) -> (v128) 26; CHECK-NEXT: # %bb.0: 27; CHECK-NEXT: local.get 0 28; CHECK-NEXT: v8x16.load_splat 0 29; CHECK-NEXT: # fallthrough-return 30 %e = load i8, i8* %p 31 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 32 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 33 ret <16 x i8> %v2 34} 35 36define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { 37; CHECK-LABEL: load_v16i8_with_folded_offset: 38; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128) 39; CHECK-NEXT: # %bb.0: 40; CHECK-NEXT: local.get 0 41; CHECK-NEXT: v128.load 16 42; CHECK-NEXT: # fallthrough-return 43 %q = ptrtoint <16 x i8>* %p to i32 44 %r = add nuw i32 %q, 16 45 %s = inttoptr i32 %r to <16 x i8>* 46 %v = load <16 x i8>, <16 x i8>* %s 47 ret <16 x i8> %v 48} 49 50define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { 51; CHECK-LABEL: load_splat_v16i8_with_folded_offset: 52; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) 53; CHECK-NEXT: # %bb.0: 54; CHECK-NEXT: local.get 0 55; CHECK-NEXT: v8x16.load_splat 16 56; CHECK-NEXT: # fallthrough-return 57 %q = ptrtoint i8* %p to i32 58 %r = add nuw i32 %q, 16 59 %s = inttoptr i32 %r to i8* 60 %e = load i8, i8* %s 61 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 62 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 63 ret <16 x i8> %v2 64} 65 66define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) { 67; CHECK-LABEL: load_v16i8_with_folded_gep_offset: 68; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128) 69; CHECK-NEXT: # %bb.0: 70; CHECK-NEXT: local.get 0 71; CHECK-NEXT: v128.load 16 72; CHECK-NEXT: # fallthrough-return 73 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 74 %v = load <16 x i8>, <16 x i8>* %s 75 ret <16 x i8> %v 76} 77 78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { 79; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: 80; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) 81; CHECK-NEXT: # %bb.0: 82; CHECK-NEXT: local.get 0 83; CHECK-NEXT: v8x16.load_splat 1 84; CHECK-NEXT: # fallthrough-return 85 %s = getelementptr inbounds i8, i8* %p, i32 1 86 %e = load i8, i8* %s 87 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 88 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 89 ret <16 x i8> %v2 90} 91 92define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) { 93; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: 94; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 95; CHECK-NEXT: # %bb.0: 96; CHECK-NEXT: local.get 0 97; CHECK-NEXT: i32.const -16 98; CHECK-NEXT: i32.add 99; CHECK-NEXT: v128.load 0 100; CHECK-NEXT: # fallthrough-return 101 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 102 %v = load <16 x i8>, <16 x i8>* %s 103 ret <16 x i8> %v 104} 105 106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { 107; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: 108; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 109; CHECK-NEXT: # %bb.0: 110; CHECK-NEXT: local.get 0 111; CHECK-NEXT: i32.const -1 112; CHECK-NEXT: i32.add 113; CHECK-NEXT: v8x16.load_splat 0 114; CHECK-NEXT: # fallthrough-return 115 %s = getelementptr inbounds i8, i8* %p, i32 -1 116 %e = load i8, i8* %s 117 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 118 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 119 ret <16 x i8> %v2 120} 121 122define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { 123; CHECK-LABEL: load_v16i8_with_unfolded_offset: 124; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128) 125; CHECK-NEXT: # %bb.0: 126; CHECK-NEXT: local.get 0 127; CHECK-NEXT: i32.const 16 128; CHECK-NEXT: i32.add 129; CHECK-NEXT: v128.load 0 130; CHECK-NEXT: # fallthrough-return 131 %q = ptrtoint <16 x i8>* %p to i32 132 %r = add nsw i32 %q, 16 133 %s = inttoptr i32 %r to <16 x i8>* 134 %v = load <16 x i8>, <16 x i8>* %s 135 ret <16 x i8> %v 136} 137 138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { 139; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: 140; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128) 141; CHECK-NEXT: # %bb.0: 142; CHECK-NEXT: local.get 0 143; CHECK-NEXT: i32.const 16 144; CHECK-NEXT: i32.add 145; CHECK-NEXT: v8x16.load_splat 0 146; CHECK-NEXT: # fallthrough-return 147 %q = ptrtoint i8* %p to i32 148 %r = add nsw i32 %q, 16 149 %s = inttoptr i32 %r to i8* 150 %e = load i8, i8* %s 151 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 152 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 153 ret <16 x i8> %v2 154} 155 156define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) { 157; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: 158; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128) 159; CHECK-NEXT: # %bb.0: 160; CHECK-NEXT: local.get 0 161; CHECK-NEXT: i32.const 16 162; CHECK-NEXT: i32.add 163; CHECK-NEXT: v128.load 0 164; CHECK-NEXT: # fallthrough-return 165 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 166 %v = load <16 x i8>, <16 x i8>* %s 167 ret <16 x i8> %v 168} 169 170define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { 171; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: 172; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128) 173; CHECK-NEXT: # %bb.0: 174; CHECK-NEXT: local.get 0 175; CHECK-NEXT: i32.const 1 176; CHECK-NEXT: i32.add 177; CHECK-NEXT: v8x16.load_splat 0 178; CHECK-NEXT: # fallthrough-return 179 %s = getelementptr i8, i8* %p, i32 1 180 %e = load i8, i8* %s 181 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 182 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 183 ret <16 x i8> %v2 184} 185 186define <16 x i8> @load_v16i8_from_numeric_address() { 187; CHECK-LABEL: load_v16i8_from_numeric_address: 188; CHECK: .functype load_v16i8_from_numeric_address () -> (v128) 189; CHECK-NEXT: # %bb.0: 190; CHECK-NEXT: i32.const 0 191; CHECK-NEXT: v128.load 32 192; CHECK-NEXT: # fallthrough-return 193 %s = inttoptr i32 32 to <16 x i8>* 194 %v = load <16 x i8>, <16 x i8>* %s 195 ret <16 x i8> %v 196} 197 198define <16 x i8> @load_splat_v16i8_from_numeric_address() { 199; CHECK-LABEL: load_splat_v16i8_from_numeric_address: 200; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) 201; CHECK-NEXT: # %bb.0: 202; CHECK-NEXT: i32.const 0 203; CHECK-NEXT: v8x16.load_splat 32 204; CHECK-NEXT: # fallthrough-return 205 %s = inttoptr i32 32 to i8* 206 %e = load i8, i8* %s 207 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 208 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 209 ret <16 x i8> %v2 210} 211 212@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 213define <16 x i8> @load_v16i8_from_global_address() { 214; CHECK-LABEL: load_v16i8_from_global_address: 215; CHECK: .functype load_v16i8_from_global_address () -> (v128) 216; CHECK-NEXT: # %bb.0: 217; CHECK-NEXT: i32.const 0 218; CHECK-NEXT: v128.load gv_v16i8 219; CHECK-NEXT: # fallthrough-return 220 %v = load <16 x i8>, <16 x i8>* @gv_v16i8 221 ret <16 x i8> %v 222} 223 224@gv_i8 = global i8 42 225define <16 x i8> @load_splat_v16i8_from_global_address() { 226; CHECK-LABEL: load_splat_v16i8_from_global_address: 227; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) 228; CHECK-NEXT: # %bb.0: 229; CHECK-NEXT: i32.const 0 230; CHECK-NEXT: v8x16.load_splat gv_i8 231; CHECK-NEXT: # fallthrough-return 232 %e = load i8, i8* @gv_i8 233 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 234 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 235 ret <16 x i8> %v2 236} 237 238define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) { 239; CHECK-LABEL: store_v16i8: 240; CHECK: .functype store_v16i8 (v128, i32) -> () 241; CHECK-NEXT: # %bb.0: 242; CHECK-NEXT: local.get 1 243; CHECK-NEXT: local.get 0 244; CHECK-NEXT: v128.store 0 245; CHECK-NEXT: # fallthrough-return 246 store <16 x i8> %v , <16 x i8>* %p 247 ret void 248} 249 250define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { 251; CHECK-LABEL: store_v16i8_with_folded_offset: 252; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> () 253; CHECK-NEXT: # %bb.0: 254; CHECK-NEXT: local.get 1 255; CHECK-NEXT: local.get 0 256; CHECK-NEXT: v128.store 16 257; CHECK-NEXT: # fallthrough-return 258 %q = ptrtoint <16 x i8>* %p to i32 259 %r = add nuw i32 %q, 16 260 %s = inttoptr i32 %r to <16 x i8>* 261 store <16 x i8> %v , <16 x i8>* %s 262 ret void 263} 264 265define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 266; CHECK-LABEL: store_v16i8_with_folded_gep_offset: 267; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> () 268; CHECK-NEXT: # %bb.0: 269; CHECK-NEXT: local.get 1 270; CHECK-NEXT: local.get 0 271; CHECK-NEXT: v128.store 16 272; CHECK-NEXT: # fallthrough-return 273 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 274 store <16 x i8> %v , <16 x i8>* %s 275 ret void 276} 277 278define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) { 279; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: 280; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> () 281; CHECK-NEXT: # %bb.0: 282; CHECK-NEXT: local.get 1 283; CHECK-NEXT: i32.const -16 284; CHECK-NEXT: i32.add 285; CHECK-NEXT: local.get 0 286; CHECK-NEXT: v128.store 0 287; CHECK-NEXT: # fallthrough-return 288 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 289 store <16 x i8> %v , <16 x i8>* %s 290 ret void 291} 292 293define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) { 294; CHECK-LABEL: store_v16i8_with_unfolded_offset: 295; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> () 296; CHECK-NEXT: # %bb.0: 297; CHECK-NEXT: local.get 1 298; CHECK-NEXT: i32.const -16 299; CHECK-NEXT: i32.add 300; CHECK-NEXT: local.get 0 301; CHECK-NEXT: v128.store 0 302; CHECK-NEXT: # fallthrough-return 303 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 304 store <16 x i8> %v , <16 x i8>* %s 305 ret void 306} 307 308define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 309; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: 310; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> () 311; CHECK-NEXT: # %bb.0: 312; CHECK-NEXT: local.get 1 313; CHECK-NEXT: i32.const 16 314; CHECK-NEXT: i32.add 315; CHECK-NEXT: local.get 0 316; CHECK-NEXT: v128.store 0 317; CHECK-NEXT: # fallthrough-return 318 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 319 store <16 x i8> %v , <16 x i8>* %s 320 ret void 321} 322 323define void @store_v16i8_to_numeric_address(<16 x i8> %v) { 324; CHECK-LABEL: store_v16i8_to_numeric_address: 325; CHECK: .functype store_v16i8_to_numeric_address (v128) -> () 326; CHECK-NEXT: # %bb.0: 327; CHECK-NEXT: i32.const 0 328; CHECK-NEXT: local.get 0 329; CHECK-NEXT: v128.store 32 330; CHECK-NEXT: # fallthrough-return 331 %s = inttoptr i32 32 to <16 x i8>* 332 store <16 x i8> %v , <16 x i8>* %s 333 ret void 334} 335 336define void @store_v16i8_to_global_address(<16 x i8> %v) { 337; CHECK-LABEL: store_v16i8_to_global_address: 338; CHECK: .functype store_v16i8_to_global_address (v128) -> () 339; CHECK-NEXT: # %bb.0: 340; CHECK-NEXT: i32.const 0 341; CHECK-NEXT: local.get 0 342; CHECK-NEXT: v128.store gv_v16i8 343; CHECK-NEXT: # fallthrough-return 344 store <16 x i8> %v , <16 x i8>* @gv_v16i8 345 ret void 346} 347 348; ============================================================================== 349; 8 x i16 350; ============================================================================== 351define <8 x i16> @load_v8i16(<8 x i16>* %p) { 352; CHECK-LABEL: load_v8i16: 353; CHECK: .functype load_v8i16 (i32) -> (v128) 354; CHECK-NEXT: # %bb.0: 355; CHECK-NEXT: local.get 0 356; CHECK-NEXT: v128.load 0 357; CHECK-NEXT: # fallthrough-return 358 %v = load <8 x i16>, <8 x i16>* %p 359 ret <8 x i16> %v 360} 361 362define <8 x i16> @load_splat_v8i16(i16* %p) { 363; CHECK-LABEL: load_splat_v8i16: 364; CHECK: .functype load_splat_v8i16 (i32) -> (v128) 365; CHECK-NEXT: # %bb.0: 366; CHECK-NEXT: local.get 0 367; CHECK-NEXT: v16x8.load_splat 0 368; CHECK-NEXT: # fallthrough-return 369 %e = load i16, i16* %p 370 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 371 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 372 ret <8 x i16> %v2 373} 374 375define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) { 376; CHECK-LABEL: load_sext_v8i16: 377; CHECK: .functype load_sext_v8i16 (i32) -> (v128) 378; CHECK-NEXT: # %bb.0: 379; CHECK-NEXT: local.get 0 380; CHECK-NEXT: i16x8.load8x8_s 0 381; CHECK-NEXT: # fallthrough-return 382 %v = load <8 x i8>, <8 x i8>* %p 383 %v2 = sext <8 x i8> %v to <8 x i16> 384 ret <8 x i16> %v2 385} 386 387define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) { 388; CHECK-LABEL: load_zext_v8i16: 389; CHECK: .functype load_zext_v8i16 (i32) -> (v128) 390; CHECK-NEXT: # %bb.0: 391; CHECK-NEXT: local.get 0 392; CHECK-NEXT: i16x8.load8x8_u 0 393; CHECK-NEXT: # fallthrough-return 394 %v = load <8 x i8>, <8 x i8>* %p 395 %v2 = zext <8 x i8> %v to <8 x i16> 396 ret <8 x i16> %v2 397} 398 399define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) { 400; CHECK-LABEL: load_ext_v8i16: 401; CHECK: .functype load_ext_v8i16 (i32) -> (v128) 402; CHECK-NEXT: # %bb.0: 403; CHECK-NEXT: local.get 0 404; CHECK-NEXT: i16x8.load8x8_u 0 405; CHECK-NEXT: # fallthrough-return 406 %v = load <8 x i8>, <8 x i8>* %p 407 ret <8 x i8> %v 408} 409 410define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { 411; CHECK-LABEL: load_v8i16_with_folded_offset: 412; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128) 413; CHECK-NEXT: # %bb.0: 414; CHECK-NEXT: local.get 0 415; CHECK-NEXT: v128.load 16 416; CHECK-NEXT: # fallthrough-return 417 %q = ptrtoint <8 x i16>* %p to i32 418 %r = add nuw i32 %q, 16 419 %s = inttoptr i32 %r to <8 x i16>* 420 %v = load <8 x i16>, <8 x i16>* %s 421 ret <8 x i16> %v 422} 423 424define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { 425; CHECK-LABEL: load_splat_v8i16_with_folded_offset: 426; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) 427; CHECK-NEXT: # %bb.0: 428; CHECK-NEXT: local.get 0 429; CHECK-NEXT: v16x8.load_splat 16 430; CHECK-NEXT: # fallthrough-return 431 %q = ptrtoint i16* %p to i32 432 %r = add nuw i32 %q, 16 433 %s = inttoptr i32 %r to i16* 434 %e = load i16, i16* %s 435 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 436 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 437 ret <8 x i16> %v2 438} 439 440define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) { 441; CHECK-LABEL: load_sext_v8i16_with_folded_offset: 442; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128) 443; CHECK-NEXT: # %bb.0: 444; CHECK-NEXT: local.get 0 445; CHECK-NEXT: i16x8.load8x8_s 16 446; CHECK-NEXT: # fallthrough-return 447 %q = ptrtoint <8 x i8>* %p to i32 448 %r = add nuw i32 %q, 16 449 %s = inttoptr i32 %r to <8 x i8>* 450 %v = load <8 x i8>, <8 x i8>* %s 451 %v2 = sext <8 x i8> %v to <8 x i16> 452 ret <8 x i16> %v2 453} 454 455define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) { 456; CHECK-LABEL: load_zext_v8i16_with_folded_offset: 457; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128) 458; CHECK-NEXT: # %bb.0: 459; CHECK-NEXT: local.get 0 460; CHECK-NEXT: i16x8.load8x8_u 16 461; CHECK-NEXT: # fallthrough-return 462 %q = ptrtoint <8 x i8>* %p to i32 463 %r = add nuw i32 %q, 16 464 %s = inttoptr i32 %r to <8 x i8>* 465 %v = load <8 x i8>, <8 x i8>* %s 466 %v2 = zext <8 x i8> %v to <8 x i16> 467 ret <8 x i16> %v2 468} 469 470define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) { 471; CHECK-LABEL: load_ext_v8i16_with_folded_offset: 472; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128) 473; CHECK-NEXT: # %bb.0: 474; CHECK-NEXT: local.get 0 475; CHECK-NEXT: i16x8.load8x8_u 16 476; CHECK-NEXT: # fallthrough-return 477 %q = ptrtoint <8 x i8>* %p to i32 478 %r = add nuw i32 %q, 16 479 %s = inttoptr i32 %r to <8 x i8>* 480 %v = load <8 x i8>, <8 x i8>* %s 481 ret <8 x i8> %v 482} 483 484define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) { 485; CHECK-LABEL: load_v8i16_with_folded_gep_offset: 486; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128) 487; CHECK-NEXT: # %bb.0: 488; CHECK-NEXT: local.get 0 489; CHECK-NEXT: v128.load 16 490; CHECK-NEXT: # fallthrough-return 491 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 492 %v = load <8 x i16>, <8 x i16>* %s 493 ret <8 x i16> %v 494} 495 496define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { 497; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: 498; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) 499; CHECK-NEXT: # %bb.0: 500; CHECK-NEXT: local.get 0 501; CHECK-NEXT: v16x8.load_splat 2 502; CHECK-NEXT: # fallthrough-return 503 %s = getelementptr inbounds i16, i16* %p, i32 1 504 %e = load i16, i16* %s 505 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 506 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 507 ret <8 x i16> %v2 508} 509 510define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 511; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: 512; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128) 513; CHECK-NEXT: # %bb.0: 514; CHECK-NEXT: local.get 0 515; CHECK-NEXT: i16x8.load8x8_s 8 516; CHECK-NEXT: # fallthrough-return 517 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 518 %v = load <8 x i8>, <8 x i8>* %s 519 %v2 = sext <8 x i8> %v to <8 x i16> 520 ret <8 x i16> %v2 521} 522 523define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 524; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: 525; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128) 526; CHECK-NEXT: # %bb.0: 527; CHECK-NEXT: local.get 0 528; CHECK-NEXT: i16x8.load8x8_u 8 529; CHECK-NEXT: # fallthrough-return 530 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 531 %v = load <8 x i8>, <8 x i8>* %s 532 %v2 = zext <8 x i8> %v to <8 x i16> 533 ret <8 x i16> %v2 534} 535 536define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 537; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: 538; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128) 539; CHECK-NEXT: # %bb.0: 540; CHECK-NEXT: local.get 0 541; CHECK-NEXT: i16x8.load8x8_u 8 542; CHECK-NEXT: # fallthrough-return 543 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 544 %v = load <8 x i8>, <8 x i8>* %s 545 ret <8 x i8> %v 546} 547 548define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) { 549; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: 550; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 551; CHECK-NEXT: # %bb.0: 552; CHECK-NEXT: local.get 0 553; CHECK-NEXT: i32.const -16 554; CHECK-NEXT: i32.add 555; CHECK-NEXT: v128.load 0 556; CHECK-NEXT: # fallthrough-return 557 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 558 %v = load <8 x i16>, <8 x i16>* %s 559 ret <8 x i16> %v 560} 561 562define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { 563; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: 564; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 565; CHECK-NEXT: # %bb.0: 566; CHECK-NEXT: local.get 0 567; CHECK-NEXT: i32.const -2 568; CHECK-NEXT: i32.add 569; CHECK-NEXT: v16x8.load_splat 0 570; CHECK-NEXT: # fallthrough-return 571 %s = getelementptr inbounds i16, i16* %p, i32 -1 572 %e = load i16, i16* %s 573 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 574 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 575 ret <8 x i16> %v2 576} 577 578define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 579; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: 580; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 581; CHECK-NEXT: # %bb.0: 582; CHECK-NEXT: local.get 0 583; CHECK-NEXT: i32.const -8 584; CHECK-NEXT: i32.add 585; CHECK-NEXT: i16x8.load8x8_s 0 586; CHECK-NEXT: # fallthrough-return 587 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 588 %v = load <8 x i8>, <8 x i8>* %s 589 %v2 = sext <8 x i8> %v to <8 x i16> 590 ret <8 x i16> %v2 591} 592 593define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 594; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: 595; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 596; CHECK-NEXT: # %bb.0: 597; CHECK-NEXT: local.get 0 598; CHECK-NEXT: i32.const -8 599; CHECK-NEXT: i32.add 600; CHECK-NEXT: i16x8.load8x8_u 0 601; CHECK-NEXT: # fallthrough-return 602 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 603 %v = load <8 x i8>, <8 x i8>* %s 604 %v2 = zext <8 x i8> %v to <8 x i16> 605 ret <8 x i16> %v2 606} 607 608define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 609; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: 610; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 611; CHECK-NEXT: # %bb.0: 612; CHECK-NEXT: local.get 0 613; CHECK-NEXT: i32.const -8 614; CHECK-NEXT: i32.add 615; CHECK-NEXT: i16x8.load8x8_u 0 616; CHECK-NEXT: # fallthrough-return 617 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 618 %v = load <8 x i8>, <8 x i8>* %s 619 ret <8 x i8> %v 620} 621 622define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { 623; CHECK-LABEL: load_v8i16_with_unfolded_offset: 624; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128) 625; CHECK-NEXT: # %bb.0: 626; CHECK-NEXT: local.get 0 627; CHECK-NEXT: i32.const 16 628; CHECK-NEXT: i32.add 629; CHECK-NEXT: v128.load 0 630; CHECK-NEXT: # fallthrough-return 631 %q = ptrtoint <8 x i16>* %p to i32 632 %r = add nsw i32 %q, 16 633 %s = inttoptr i32 %r to <8 x i16>* 634 %v = load <8 x i16>, <8 x i16>* %s 635 ret <8 x i16> %v 636} 637 638define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { 639; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: 640; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128) 641; CHECK-NEXT: # %bb.0: 642; CHECK-NEXT: local.get 0 643; CHECK-NEXT: i32.const 16 644; CHECK-NEXT: i32.add 645; CHECK-NEXT: v16x8.load_splat 0 646; CHECK-NEXT: # fallthrough-return 647 %q = ptrtoint i16* %p to i32 648 %r = add nsw i32 %q, 16 649 %s = inttoptr i32 %r to i16* 650 %e = load i16, i16* %s 651 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 652 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 653 ret <8 x i16> %v2 654} 655 656define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 657; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: 658; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128) 659; CHECK-NEXT: # %bb.0: 660; CHECK-NEXT: local.get 0 661; CHECK-NEXT: i32.const 16 662; CHECK-NEXT: i32.add 663; CHECK-NEXT: i16x8.load8x8_s 0 664; CHECK-NEXT: # fallthrough-return 665 %q = ptrtoint <8 x i8>* %p to i32 666 %r = add nsw i32 %q, 16 667 %s = inttoptr i32 %r to <8 x i8>* 668 %v = load <8 x i8>, <8 x i8>* %s 669 %v2 = sext <8 x i8> %v to <8 x i16> 670 ret <8 x i16> %v2 671} 672 673define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 674; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: 675; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128) 676; CHECK-NEXT: # %bb.0: 677; CHECK-NEXT: local.get 0 678; CHECK-NEXT: i32.const 16 679; CHECK-NEXT: i32.add 680; CHECK-NEXT: i16x8.load8x8_u 0 681; CHECK-NEXT: # fallthrough-return 682 %q = ptrtoint <8 x i8>* %p to i32 683 %r = add nsw i32 %q, 16 684 %s = inttoptr i32 %r to <8 x i8>* 685 %v = load <8 x i8>, <8 x i8>* %s 686 %v2 = zext <8 x i8> %v to <8 x i16> 687 ret <8 x i16> %v2 688} 689 690define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 691; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: 692; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128) 693; CHECK-NEXT: # %bb.0: 694; CHECK-NEXT: local.get 0 695; CHECK-NEXT: i32.const 16 696; CHECK-NEXT: i32.add 697; CHECK-NEXT: i16x8.load8x8_u 0 698; CHECK-NEXT: # fallthrough-return 699 %q = ptrtoint <8 x i8>* %p to i32 700 %r = add nsw i32 %q, 16 701 %s = inttoptr i32 %r to <8 x i8>* 702 %v = load <8 x i8>, <8 x i8>* %s 703 ret <8 x i8> %v 704} 705 706define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) { 707; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: 708; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128) 709; CHECK-NEXT: # %bb.0: 710; CHECK-NEXT: local.get 0 711; CHECK-NEXT: i32.const 16 712; CHECK-NEXT: i32.add 713; CHECK-NEXT: v128.load 0 714; CHECK-NEXT: # fallthrough-return 715 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 716 %v = load <8 x i16>, <8 x i16>* %s 717 ret <8 x i16> %v 718} 719 720define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { 721; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: 722; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128) 723; CHECK-NEXT: # %bb.0: 724; CHECK-NEXT: local.get 0 725; CHECK-NEXT: i32.const 2 726; CHECK-NEXT: i32.add 727; CHECK-NEXT: v16x8.load_splat 0 728; CHECK-NEXT: # fallthrough-return 729 %s = getelementptr i16, i16* %p, i32 1 730 %e = load i16, i16* %s 731 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 732 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 733 ret <8 x i16> %v2 734} 735 736define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 737; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: 738; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 739; CHECK-NEXT: # %bb.0: 740; CHECK-NEXT: local.get 0 741; CHECK-NEXT: i32.const 8 742; CHECK-NEXT: i32.add 743; CHECK-NEXT: i16x8.load8x8_s 0 744; CHECK-NEXT: # fallthrough-return 745 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 746 %v = load <8 x i8>, <8 x i8>* %s 747 %v2 = sext <8 x i8> %v to <8 x i16> 748 ret <8 x i16> %v2 749} 750 751define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 752; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: 753; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 754; CHECK-NEXT: # %bb.0: 755; CHECK-NEXT: local.get 0 756; CHECK-NEXT: i32.const 8 757; CHECK-NEXT: i32.add 758; CHECK-NEXT: i16x8.load8x8_u 0 759; CHECK-NEXT: # fallthrough-return 760 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 761 %v = load <8 x i8>, <8 x i8>* %s 762 %v2 = zext <8 x i8> %v to <8 x i16> 763 ret <8 x i16> %v2 764} 765 766define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 767; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: 768; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 769; CHECK-NEXT: # %bb.0: 770; CHECK-NEXT: local.get 0 771; CHECK-NEXT: i32.const 8 772; CHECK-NEXT: i32.add 773; CHECK-NEXT: i16x8.load8x8_u 0 774; CHECK-NEXT: # fallthrough-return 775 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 776 %v = load <8 x i8>, <8 x i8>* %s 777 ret <8 x i8> %v 778} 779 780define <8 x i16> @load_v8i16_from_numeric_address() { 781; CHECK-LABEL: load_v8i16_from_numeric_address: 782; CHECK: .functype load_v8i16_from_numeric_address () -> (v128) 783; CHECK-NEXT: # %bb.0: 784; CHECK-NEXT: i32.const 0 785; CHECK-NEXT: v128.load 32 786; CHECK-NEXT: # fallthrough-return 787 %s = inttoptr i32 32 to <8 x i16>* 788 %v = load <8 x i16>, <8 x i16>* %s 789 ret <8 x i16> %v 790} 791 792define <8 x i16> @load_splat_v8i16_from_numeric_address() { 793; CHECK-LABEL: load_splat_v8i16_from_numeric_address: 794; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) 795; CHECK-NEXT: # %bb.0: 796; CHECK-NEXT: i32.const 0 797; CHECK-NEXT: v16x8.load_splat 32 798; CHECK-NEXT: # fallthrough-return 799 %s = inttoptr i32 32 to i16* 800 %e = load i16, i16* %s 801 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 802 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 803 ret <8 x i16> %v2 804} 805 806define <8 x i16> @load_sext_v8i16_from_numeric_address() { 807; CHECK-LABEL: load_sext_v8i16_from_numeric_address: 808; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128) 809; CHECK-NEXT: # %bb.0: 810; CHECK-NEXT: i32.const 0 811; CHECK-NEXT: i16x8.load8x8_s 32 812; CHECK-NEXT: # fallthrough-return 813 %s = inttoptr i32 32 to <8 x i8>* 814 %v = load <8 x i8>, <8 x i8>* %s 815 %v2 = sext <8 x i8> %v to <8 x i16> 816 ret <8 x i16> %v2 817} 818 819define <8 x i16> @load_zext_v8i16_from_numeric_address() { 820; CHECK-LABEL: load_zext_v8i16_from_numeric_address: 821; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128) 822; CHECK-NEXT: # %bb.0: 823; CHECK-NEXT: i32.const 0 824; CHECK-NEXT: i16x8.load8x8_u 32 825; CHECK-NEXT: # fallthrough-return 826 %s = inttoptr i32 32 to <8 x i8>* 827 %v = load <8 x i8>, <8 x i8>* %s 828 %v2 = zext <8 x i8> %v to <8 x i16> 829 ret <8 x i16> %v2 830} 831 832define <8 x i8> @load_ext_v8i16_from_numeric_address() { 833; CHECK-LABEL: load_ext_v8i16_from_numeric_address: 834; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128) 835; CHECK-NEXT: # %bb.0: 836; CHECK-NEXT: i32.const 0 837; CHECK-NEXT: i16x8.load8x8_u 32 838; CHECK-NEXT: # fallthrough-return 839 %s = inttoptr i32 32 to <8 x i8>* 840 %v = load <8 x i8>, <8 x i8>* %s 841 ret <8 x i8> %v 842} 843 844@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 845define <8 x i16> @load_v8i16_from_global_address() { 846; CHECK-LABEL: load_v8i16_from_global_address: 847; CHECK: .functype load_v8i16_from_global_address () -> (v128) 848; CHECK-NEXT: # %bb.0: 849; CHECK-NEXT: i32.const 0 850; CHECK-NEXT: v128.load gv_v8i16 851; CHECK-NEXT: # fallthrough-return 852 %v = load <8 x i16>, <8 x i16>* @gv_v8i16 853 ret <8 x i16> %v 854} 855 856@gv_i16 = global i16 42 857define <8 x i16> @load_splat_v8i16_from_global_address() { 858; CHECK-LABEL: load_splat_v8i16_from_global_address: 859; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) 860; CHECK-NEXT: # %bb.0: 861; CHECK-NEXT: i32.const 0 862; CHECK-NEXT: v16x8.load_splat gv_i16 863; CHECK-NEXT: # fallthrough-return 864 %e = load i16, i16* @gv_i16 865 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 866 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 867 ret <8 x i16> %v2 868} 869 870@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 871define <8 x i16> @load_sext_v8i16_from_global_address() { 872; CHECK-LABEL: load_sext_v8i16_from_global_address: 873; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128) 874; CHECK-NEXT: # %bb.0: 875; CHECK-NEXT: i32.const 0 876; CHECK-NEXT: i16x8.load8x8_s gv_v8i8 877; CHECK-NEXT: # fallthrough-return 878 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 879 %v2 = sext <8 x i8> %v to <8 x i16> 880 ret <8 x i16> %v2 881} 882 883define <8 x i16> @load_zext_v8i16_from_global_address() { 884; CHECK-LABEL: load_zext_v8i16_from_global_address: 885; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128) 886; CHECK-NEXT: # %bb.0: 887; CHECK-NEXT: i32.const 0 888; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 889; CHECK-NEXT: # fallthrough-return 890 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 891 %v2 = zext <8 x i8> %v to <8 x i16> 892 ret <8 x i16> %v2 893} 894 895define <8 x i8> @load_ext_v8i16_from_global_address() { 896; CHECK-LABEL: load_ext_v8i16_from_global_address: 897; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128) 898; CHECK-NEXT: # %bb.0: 899; CHECK-NEXT: i32.const 0 900; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 901; CHECK-NEXT: # fallthrough-return 902 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 903 ret <8 x i8> %v 904} 905 906 907define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) { 908; CHECK-LABEL: store_v8i16: 909; CHECK: .functype store_v8i16 (v128, i32) -> () 910; CHECK-NEXT: # %bb.0: 911; CHECK-NEXT: local.get 1 912; CHECK-NEXT: local.get 0 913; CHECK-NEXT: v128.store 0 914; CHECK-NEXT: # fallthrough-return 915 store <8 x i16> %v , <8 x i16>* %p 916 ret void 917} 918 919define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { 920; CHECK-LABEL: store_v8i16_with_folded_offset: 921; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () 922; CHECK-NEXT: # %bb.0: 923; CHECK-NEXT: local.get 1 924; CHECK-NEXT: local.get 0 925; CHECK-NEXT: v128.store 16 926; CHECK-NEXT: # fallthrough-return 927 %q = ptrtoint <8 x i16>* %p to i32 928 %r = add nuw i32 %q, 16 929 %s = inttoptr i32 %r to <8 x i16>* 930 store <8 x i16> %v , <8 x i16>* %s 931 ret void 932} 933 934define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 935; CHECK-LABEL: store_v8i16_with_folded_gep_offset: 936; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () 937; CHECK-NEXT: # %bb.0: 938; CHECK-NEXT: local.get 1 939; CHECK-NEXT: local.get 0 940; CHECK-NEXT: v128.store 16 941; CHECK-NEXT: # fallthrough-return 942 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 943 store <8 x i16> %v , <8 x i16>* %s 944 ret void 945} 946 947define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { 948; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: 949; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 950; CHECK-NEXT: # %bb.0: 951; CHECK-NEXT: local.get 1 952; CHECK-NEXT: i32.const -16 953; CHECK-NEXT: i32.add 954; CHECK-NEXT: local.get 0 955; CHECK-NEXT: v128.store 0 956; CHECK-NEXT: # fallthrough-return 957 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 958 store <8 x i16> %v , <8 x i16>* %s 959 ret void 960} 961 962define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { 963; CHECK-LABEL: store_v8i16_with_unfolded_offset: 964; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () 965; CHECK-NEXT: # %bb.0: 966; CHECK-NEXT: local.get 1 967; CHECK-NEXT: i32.const -16 968; CHECK-NEXT: i32.add 969; CHECK-NEXT: local.get 0 970; CHECK-NEXT: v128.store 0 971; CHECK-NEXT: # fallthrough-return 972 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 973 store <8 x i16> %v , <8 x i16>* %s 974 ret void 975} 976 977define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 978; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: 979; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () 980; CHECK-NEXT: # %bb.0: 981; CHECK-NEXT: local.get 1 982; CHECK-NEXT: i32.const 16 983; CHECK-NEXT: i32.add 984; CHECK-NEXT: local.get 0 985; CHECK-NEXT: v128.store 0 986; CHECK-NEXT: # fallthrough-return 987 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 988 store <8 x i16> %v , <8 x i16>* %s 989 ret void 990} 991 992define void @store_v8i16_to_numeric_address(<8 x i16> %v) { 993; CHECK-LABEL: store_v8i16_to_numeric_address: 994; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () 995; CHECK-NEXT: # %bb.0: 996; CHECK-NEXT: i32.const 0 997; CHECK-NEXT: local.get 0 998; CHECK-NEXT: v128.store 32 999; CHECK-NEXT: # fallthrough-return 1000 %s = inttoptr i32 32 to <8 x i16>* 1001 store <8 x i16> %v , <8 x i16>* %s 1002 ret void 1003} 1004 1005define void @store_v8i16_to_global_address(<8 x i16> %v) { 1006; CHECK-LABEL: store_v8i16_to_global_address: 1007; CHECK: .functype store_v8i16_to_global_address (v128) -> () 1008; CHECK-NEXT: # %bb.0: 1009; CHECK-NEXT: i32.const 0 1010; CHECK-NEXT: local.get 0 1011; CHECK-NEXT: v128.store gv_v8i16 1012; CHECK-NEXT: # fallthrough-return 1013 store <8 x i16> %v , <8 x i16>* @gv_v8i16 1014 ret void 1015} 1016 1017; ============================================================================== 1018; 4 x i32 1019; ============================================================================== 1020define <4 x i32> @load_v4i32(<4 x i32>* %p) { 1021; CHECK-LABEL: load_v4i32: 1022; CHECK: .functype load_v4i32 (i32) -> (v128) 1023; CHECK-NEXT: # %bb.0: 1024; CHECK-NEXT: local.get 0 1025; CHECK-NEXT: v128.load 0 1026; CHECK-NEXT: # fallthrough-return 1027 %v = load <4 x i32>, <4 x i32>* %p 1028 ret <4 x i32> %v 1029} 1030 1031define <4 x i32> @load_splat_v4i32(i32* %addr) { 1032; CHECK-LABEL: load_splat_v4i32: 1033; CHECK: .functype load_splat_v4i32 (i32) -> (v128) 1034; CHECK-NEXT: # %bb.0: 1035; CHECK-NEXT: local.get 0 1036; CHECK-NEXT: v32x4.load_splat 0 1037; CHECK-NEXT: # fallthrough-return 1038 %e = load i32, i32* %addr, align 4 1039 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1040 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1041 ret <4 x i32> %v2 1042} 1043 1044define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) { 1045; CHECK-LABEL: load_sext_v4i32: 1046; CHECK: .functype load_sext_v4i32 (i32) -> (v128) 1047; CHECK-NEXT: # %bb.0: 1048; CHECK-NEXT: local.get 0 1049; CHECK-NEXT: i32x4.load16x4_s 0 1050; CHECK-NEXT: # fallthrough-return 1051 %v = load <4 x i16>, <4 x i16>* %p 1052 %v2 = sext <4 x i16> %v to <4 x i32> 1053 ret <4 x i32> %v2 1054} 1055 1056define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) { 1057; CHECK-LABEL: load_zext_v4i32: 1058; CHECK: .functype load_zext_v4i32 (i32) -> (v128) 1059; CHECK-NEXT: # %bb.0: 1060; CHECK-NEXT: local.get 0 1061; CHECK-NEXT: i32x4.load16x4_u 0 1062; CHECK-NEXT: # fallthrough-return 1063 %v = load <4 x i16>, <4 x i16>* %p 1064 %v2 = zext <4 x i16> %v to <4 x i32> 1065 ret <4 x i32> %v2 1066} 1067 1068define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) { 1069; CHECK-LABEL: load_ext_v4i32: 1070; CHECK: .functype load_ext_v4i32 (i32) -> (v128) 1071; CHECK-NEXT: # %bb.0: 1072; CHECK-NEXT: local.get 0 1073; CHECK-NEXT: i32x4.load16x4_u 0 1074; CHECK-NEXT: # fallthrough-return 1075 %v = load <4 x i16>, <4 x i16>* %p 1076 ret <4 x i16> %v 1077} 1078 1079define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { 1080; CHECK-LABEL: load_v4i32_with_folded_offset: 1081; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128) 1082; CHECK-NEXT: # %bb.0: 1083; CHECK-NEXT: local.get 0 1084; CHECK-NEXT: v128.load 16 1085; CHECK-NEXT: # fallthrough-return 1086 %q = ptrtoint <4 x i32>* %p to i32 1087 %r = add nuw i32 %q, 16 1088 %s = inttoptr i32 %r to <4 x i32>* 1089 %v = load <4 x i32>, <4 x i32>* %s 1090 ret <4 x i32> %v 1091} 1092 1093define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { 1094; CHECK-LABEL: load_splat_v4i32_with_folded_offset: 1095; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) 1096; CHECK-NEXT: # %bb.0: 1097; CHECK-NEXT: local.get 0 1098; CHECK-NEXT: v32x4.load_splat 16 1099; CHECK-NEXT: # fallthrough-return 1100 %q = ptrtoint i32* %p to i32 1101 %r = add nuw i32 %q, 16 1102 %s = inttoptr i32 %r to i32* 1103 %e = load i32, i32* %s 1104 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1105 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1106 ret <4 x i32> %v2 1107} 1108 1109define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) { 1110; CHECK-LABEL: load_sext_v4i32_with_folded_offset: 1111; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128) 1112; CHECK-NEXT: # %bb.0: 1113; CHECK-NEXT: local.get 0 1114; CHECK-NEXT: i32x4.load16x4_s 16 1115; CHECK-NEXT: # fallthrough-return 1116 %q = ptrtoint <4 x i16>* %p to i32 1117 %r = add nuw i32 %q, 16 1118 %s = inttoptr i32 %r to <4 x i16>* 1119 %v = load <4 x i16>, <4 x i16>* %s 1120 %v2 = sext <4 x i16> %v to <4 x i32> 1121 ret <4 x i32> %v2 1122} 1123 1124define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) { 1125; CHECK-LABEL: load_zext_v4i32_with_folded_offset: 1126; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128) 1127; CHECK-NEXT: # %bb.0: 1128; CHECK-NEXT: local.get 0 1129; CHECK-NEXT: i32x4.load16x4_u 16 1130; CHECK-NEXT: # fallthrough-return 1131 %q = ptrtoint <4 x i16>* %p to i32 1132 %r = add nuw i32 %q, 16 1133 %s = inttoptr i32 %r to <4 x i16>* 1134 %v = load <4 x i16>, <4 x i16>* %s 1135 %v2 = zext <4 x i16> %v to <4 x i32> 1136 ret <4 x i32> %v2 1137} 1138 1139define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) { 1140; CHECK-LABEL: load_ext_v4i32_with_folded_offset: 1141; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128) 1142; CHECK-NEXT: # %bb.0: 1143; CHECK-NEXT: local.get 0 1144; CHECK-NEXT: i32x4.load16x4_u 16 1145; CHECK-NEXT: # fallthrough-return 1146 %q = ptrtoint <4 x i16>* %p to i32 1147 %r = add nuw i32 %q, 16 1148 %s = inttoptr i32 %r to <4 x i16>* 1149 %v = load <4 x i16>, <4 x i16>* %s 1150 ret <4 x i16> %v 1151} 1152 1153define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) { 1154; CHECK-LABEL: load_v4i32_with_folded_gep_offset: 1155; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128) 1156; CHECK-NEXT: # %bb.0: 1157; CHECK-NEXT: local.get 0 1158; CHECK-NEXT: v128.load 16 1159; CHECK-NEXT: # fallthrough-return 1160 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1161 %v = load <4 x i32>, <4 x i32>* %s 1162 ret <4 x i32> %v 1163} 1164 1165define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { 1166; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: 1167; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) 1168; CHECK-NEXT: # %bb.0: 1169; CHECK-NEXT: local.get 0 1170; CHECK-NEXT: v32x4.load_splat 4 1171; CHECK-NEXT: # fallthrough-return 1172 %s = getelementptr inbounds i32, i32* %p, i32 1 1173 %e = load i32, i32* %s 1174 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1175 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1176 ret <4 x i32> %v2 1177} 1178 1179define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1180; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset: 1181; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128) 1182; CHECK-NEXT: # %bb.0: 1183; CHECK-NEXT: local.get 0 1184; CHECK-NEXT: i32x4.load16x4_s 8 1185; CHECK-NEXT: # fallthrough-return 1186 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1187 %v = load <4 x i16>, <4 x i16>* %s 1188 %v2 = sext <4 x i16> %v to <4 x i32> 1189 ret <4 x i32> %v2 1190} 1191 1192define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1193; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset: 1194; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128) 1195; CHECK-NEXT: # %bb.0: 1196; CHECK-NEXT: local.get 0 1197; CHECK-NEXT: i32x4.load16x4_u 8 1198; CHECK-NEXT: # fallthrough-return 1199 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1200 %v = load <4 x i16>, <4 x i16>* %s 1201 %v2 = zext <4 x i16> %v to <4 x i32> 1202 ret <4 x i32> %v2 1203} 1204 1205define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1206; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: 1207; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128) 1208; CHECK-NEXT: # %bb.0: 1209; CHECK-NEXT: local.get 0 1210; CHECK-NEXT: i32x4.load16x4_u 8 1211; CHECK-NEXT: # fallthrough-return 1212 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1213 %v = load <4 x i16>, <4 x i16>* %s 1214 ret <4 x i16> %v 1215} 1216 1217define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) { 1218; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: 1219; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1220; CHECK-NEXT: # %bb.0: 1221; CHECK-NEXT: local.get 0 1222; CHECK-NEXT: i32.const -16 1223; CHECK-NEXT: i32.add 1224; CHECK-NEXT: v128.load 0 1225; CHECK-NEXT: # fallthrough-return 1226 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1227 %v = load <4 x i32>, <4 x i32>* %s 1228 ret <4 x i32> %v 1229} 1230 1231define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { 1232; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: 1233; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1234; CHECK-NEXT: # %bb.0: 1235; CHECK-NEXT: local.get 0 1236; CHECK-NEXT: i32.const -4 1237; CHECK-NEXT: i32.add 1238; CHECK-NEXT: v32x4.load_splat 0 1239; CHECK-NEXT: # fallthrough-return 1240 %s = getelementptr inbounds i32, i32* %p, i32 -1 1241 %e = load i32, i32* %s 1242 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1243 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1244 ret <4 x i32> %v2 1245} 1246 1247define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1248; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset: 1249; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1250; CHECK-NEXT: # %bb.0: 1251; CHECK-NEXT: local.get 0 1252; CHECK-NEXT: i32.const -8 1253; CHECK-NEXT: i32.add 1254; CHECK-NEXT: i32x4.load16x4_s 0 1255; CHECK-NEXT: # fallthrough-return 1256 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1257 %v = load <4 x i16>, <4 x i16>* %s 1258 %v2 = sext <4 x i16> %v to <4 x i32> 1259 ret <4 x i32> %v2 1260} 1261 1262define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1263; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset: 1264; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1265; CHECK-NEXT: # %bb.0: 1266; CHECK-NEXT: local.get 0 1267; CHECK-NEXT: i32.const -8 1268; CHECK-NEXT: i32.add 1269; CHECK-NEXT: i32x4.load16x4_u 0 1270; CHECK-NEXT: # fallthrough-return 1271 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1272 %v = load <4 x i16>, <4 x i16>* %s 1273 %v2 = zext <4 x i16> %v to <4 x i32> 1274 ret <4 x i32> %v2 1275} 1276 1277define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1278; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: 1279; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1280; CHECK-NEXT: # %bb.0: 1281; CHECK-NEXT: local.get 0 1282; CHECK-NEXT: i32.const -8 1283; CHECK-NEXT: i32.add 1284; CHECK-NEXT: i32x4.load16x4_u 0 1285; CHECK-NEXT: # fallthrough-return 1286 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1287 %v = load <4 x i16>, <4 x i16>* %s 1288 ret <4 x i16> %v 1289} 1290 1291define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { 1292; CHECK-LABEL: load_v4i32_with_unfolded_offset: 1293; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128) 1294; CHECK-NEXT: # %bb.0: 1295; CHECK-NEXT: local.get 0 1296; CHECK-NEXT: i32.const 16 1297; CHECK-NEXT: i32.add 1298; CHECK-NEXT: v128.load 0 1299; CHECK-NEXT: # fallthrough-return 1300 %q = ptrtoint <4 x i32>* %p to i32 1301 %r = add nsw i32 %q, 16 1302 %s = inttoptr i32 %r to <4 x i32>* 1303 %v = load <4 x i32>, <4 x i32>* %s 1304 ret <4 x i32> %v 1305} 1306 1307define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { 1308; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: 1309; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128) 1310; CHECK-NEXT: # %bb.0: 1311; CHECK-NEXT: local.get 0 1312; CHECK-NEXT: i32.const 16 1313; CHECK-NEXT: i32.add 1314; CHECK-NEXT: v32x4.load_splat 0 1315; CHECK-NEXT: # fallthrough-return 1316 %q = ptrtoint i32* %p to i32 1317 %r = add nsw i32 %q, 16 1318 %s = inttoptr i32 %r to i32* 1319 %e = load i32, i32* %s 1320 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1321 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1322 ret <4 x i32> %v2 1323} 1324 1325define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1326; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset: 1327; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128) 1328; CHECK-NEXT: # %bb.0: 1329; CHECK-NEXT: local.get 0 1330; CHECK-NEXT: i32.const 16 1331; CHECK-NEXT: i32.add 1332; CHECK-NEXT: i32x4.load16x4_s 0 1333; CHECK-NEXT: # fallthrough-return 1334 %q = ptrtoint <4 x i16>* %p to i32 1335 %r = add nsw i32 %q, 16 1336 %s = inttoptr i32 %r to <4 x i16>* 1337 %v = load <4 x i16>, <4 x i16>* %s 1338 %v2 = sext <4 x i16> %v to <4 x i32> 1339 ret <4 x i32> %v2 1340} 1341 1342define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1343; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset: 1344; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128) 1345; CHECK-NEXT: # %bb.0: 1346; CHECK-NEXT: local.get 0 1347; CHECK-NEXT: i32.const 16 1348; CHECK-NEXT: i32.add 1349; CHECK-NEXT: i32x4.load16x4_u 0 1350; CHECK-NEXT: # fallthrough-return 1351 %q = ptrtoint <4 x i16>* %p to i32 1352 %r = add nsw i32 %q, 16 1353 %s = inttoptr i32 %r to <4 x i16>* 1354 %v = load <4 x i16>, <4 x i16>* %s 1355 %v2 = zext <4 x i16> %v to <4 x i32> 1356 ret <4 x i32> %v2 1357} 1358 1359define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1360; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: 1361; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128) 1362; CHECK-NEXT: # %bb.0: 1363; CHECK-NEXT: local.get 0 1364; CHECK-NEXT: i32.const 16 1365; CHECK-NEXT: i32.add 1366; CHECK-NEXT: i32x4.load16x4_u 0 1367; CHECK-NEXT: # fallthrough-return 1368 %q = ptrtoint <4 x i16>* %p to i32 1369 %r = add nsw i32 %q, 16 1370 %s = inttoptr i32 %r to <4 x i16>* 1371 %v = load <4 x i16>, <4 x i16>* %s 1372 ret <4 x i16> %v 1373} 1374 1375define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) { 1376; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: 1377; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1378; CHECK-NEXT: # %bb.0: 1379; CHECK-NEXT: local.get 0 1380; CHECK-NEXT: i32.const 16 1381; CHECK-NEXT: i32.add 1382; CHECK-NEXT: v128.load 0 1383; CHECK-NEXT: # fallthrough-return 1384 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1385 %v = load <4 x i32>, <4 x i32>* %s 1386 ret <4 x i32> %v 1387} 1388 1389define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { 1390; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: 1391; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1392; CHECK-NEXT: # %bb.0: 1393; CHECK-NEXT: local.get 0 1394; CHECK-NEXT: i32.const 4 1395; CHECK-NEXT: i32.add 1396; CHECK-NEXT: v32x4.load_splat 0 1397; CHECK-NEXT: # fallthrough-return 1398 %s = getelementptr i32, i32* %p, i32 1 1399 %e = load i32, i32* %s 1400 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1401 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1402 ret <4 x i32> %v2 1403} 1404 1405define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1406; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset: 1407; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1408; CHECK-NEXT: # %bb.0: 1409; CHECK-NEXT: local.get 0 1410; CHECK-NEXT: i32.const 8 1411; CHECK-NEXT: i32.add 1412; CHECK-NEXT: i32x4.load16x4_s 0 1413; CHECK-NEXT: # fallthrough-return 1414 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1415 %v = load <4 x i16>, <4 x i16>* %s 1416 %v2 = sext <4 x i16> %v to <4 x i32> 1417 ret <4 x i32> %v2 1418} 1419 1420define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1421; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset: 1422; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1423; CHECK-NEXT: # %bb.0: 1424; CHECK-NEXT: local.get 0 1425; CHECK-NEXT: i32.const 8 1426; CHECK-NEXT: i32.add 1427; CHECK-NEXT: i32x4.load16x4_u 0 1428; CHECK-NEXT: # fallthrough-return 1429 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1430 %v = load <4 x i16>, <4 x i16>* %s 1431 %v2 = zext <4 x i16> %v to <4 x i32> 1432 ret <4 x i32> %v2 1433} 1434 1435define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1436; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: 1437; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1438; CHECK-NEXT: # %bb.0: 1439; CHECK-NEXT: local.get 0 1440; CHECK-NEXT: i32.const 8 1441; CHECK-NEXT: i32.add 1442; CHECK-NEXT: i32x4.load16x4_u 0 1443; CHECK-NEXT: # fallthrough-return 1444 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1445 %v = load <4 x i16>, <4 x i16>* %s 1446 ret <4 x i16> %v 1447} 1448 1449define <4 x i32> @load_v4i32_from_numeric_address() { 1450; CHECK-LABEL: load_v4i32_from_numeric_address: 1451; CHECK: .functype load_v4i32_from_numeric_address () -> (v128) 1452; CHECK-NEXT: # %bb.0: 1453; CHECK-NEXT: i32.const 0 1454; CHECK-NEXT: v128.load 32 1455; CHECK-NEXT: # fallthrough-return 1456 %s = inttoptr i32 32 to <4 x i32>* 1457 %v = load <4 x i32>, <4 x i32>* %s 1458 ret <4 x i32> %v 1459} 1460 1461define <4 x i32> @load_splat_v4i32_from_numeric_address() { 1462; CHECK-LABEL: load_splat_v4i32_from_numeric_address: 1463; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) 1464; CHECK-NEXT: # %bb.0: 1465; CHECK-NEXT: i32.const 0 1466; CHECK-NEXT: v32x4.load_splat 32 1467; CHECK-NEXT: # fallthrough-return 1468 %s = inttoptr i32 32 to i32* 1469 %e = load i32, i32* %s 1470 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1471 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1472 ret <4 x i32> %v2 1473} 1474 1475define <4 x i32> @load_sext_v4i32_from_numeric_address() { 1476; CHECK-LABEL: load_sext_v4i32_from_numeric_address: 1477; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128) 1478; CHECK-NEXT: # %bb.0: 1479; CHECK-NEXT: i32.const 0 1480; CHECK-NEXT: i32x4.load16x4_s 32 1481; CHECK-NEXT: # fallthrough-return 1482 %s = inttoptr i32 32 to <4 x i16>* 1483 %v = load <4 x i16>, <4 x i16>* %s 1484 %v2 = sext <4 x i16> %v to <4 x i32> 1485 ret <4 x i32> %v2 1486} 1487 1488define <4 x i32> @load_zext_v4i32_from_numeric_address() { 1489; CHECK-LABEL: load_zext_v4i32_from_numeric_address: 1490; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128) 1491; CHECK-NEXT: # %bb.0: 1492; CHECK-NEXT: i32.const 0 1493; CHECK-NEXT: i32x4.load16x4_u 32 1494; CHECK-NEXT: # fallthrough-return 1495 %s = inttoptr i32 32 to <4 x i16>* 1496 %v = load <4 x i16>, <4 x i16>* %s 1497 %v2 = zext <4 x i16> %v to <4 x i32> 1498 ret <4 x i32> %v2 1499} 1500 1501define <4 x i16> @load_ext_v4i32_from_numeric_address() { 1502; CHECK-LABEL: load_ext_v4i32_from_numeric_address: 1503; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128) 1504; CHECK-NEXT: # %bb.0: 1505; CHECK-NEXT: i32.const 0 1506; CHECK-NEXT: i32x4.load16x4_u 32 1507; CHECK-NEXT: # fallthrough-return 1508 %s = inttoptr i32 32 to <4 x i16>* 1509 %v = load <4 x i16>, <4 x i16>* %s 1510 ret <4 x i16> %v 1511} 1512 1513@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42> 1514define <4 x i32> @load_v4i32_from_global_address() { 1515; CHECK-LABEL: load_v4i32_from_global_address: 1516; CHECK: .functype load_v4i32_from_global_address () -> (v128) 1517; CHECK-NEXT: # %bb.0: 1518; CHECK-NEXT: i32.const 0 1519; CHECK-NEXT: v128.load gv_v4i32 1520; CHECK-NEXT: # fallthrough-return 1521 %v = load <4 x i32>, <4 x i32>* @gv_v4i32 1522 ret <4 x i32> %v 1523} 1524 1525@gv_i32 = global i32 42 1526define <4 x i32> @load_splat_v4i32_from_global_address() { 1527; CHECK-LABEL: load_splat_v4i32_from_global_address: 1528; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) 1529; CHECK-NEXT: # %bb.0: 1530; CHECK-NEXT: i32.const 0 1531; CHECK-NEXT: v32x4.load_splat gv_i32 1532; CHECK-NEXT: # fallthrough-return 1533 %e = load i32, i32* @gv_i32 1534 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1535 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1536 ret <4 x i32> %v2 1537} 1538 1539@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42> 1540define <4 x i32> @load_sext_v4i32_from_global_address() { 1541; CHECK-LABEL: load_sext_v4i32_from_global_address: 1542; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128) 1543; CHECK-NEXT: # %bb.0: 1544; CHECK-NEXT: i32.const 0 1545; CHECK-NEXT: i32x4.load16x4_s gv_v4i16 1546; CHECK-NEXT: # fallthrough-return 1547 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1548 %v2 = sext <4 x i16> %v to <4 x i32> 1549 ret <4 x i32> %v2 1550} 1551 1552define <4 x i32> @load_zext_v4i32_from_global_address() { 1553; CHECK-LABEL: load_zext_v4i32_from_global_address: 1554; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128) 1555; CHECK-NEXT: # %bb.0: 1556; CHECK-NEXT: i32.const 0 1557; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1558; CHECK-NEXT: # fallthrough-return 1559 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1560 %v2 = zext <4 x i16> %v to <4 x i32> 1561 ret <4 x i32> %v2 1562} 1563 1564define <4 x i16> @load_ext_v4i32_from_global_address() { 1565; CHECK-LABEL: load_ext_v4i32_from_global_address: 1566; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128) 1567; CHECK-NEXT: # %bb.0: 1568; CHECK-NEXT: i32.const 0 1569; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1570; CHECK-NEXT: # fallthrough-return 1571 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1572 ret <4 x i16> %v 1573} 1574 1575define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) { 1576; CHECK-LABEL: store_v4i32: 1577; CHECK: .functype store_v4i32 (v128, i32) -> () 1578; CHECK-NEXT: # %bb.0: 1579; CHECK-NEXT: local.get 1 1580; CHECK-NEXT: local.get 0 1581; CHECK-NEXT: v128.store 0 1582; CHECK-NEXT: # fallthrough-return 1583 store <4 x i32> %v , <4 x i32>* %p 1584 ret void 1585} 1586 1587define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { 1588; CHECK-LABEL: store_v4i32_with_folded_offset: 1589; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () 1590; CHECK-NEXT: # %bb.0: 1591; CHECK-NEXT: local.get 1 1592; CHECK-NEXT: local.get 0 1593; CHECK-NEXT: v128.store 16 1594; CHECK-NEXT: # fallthrough-return 1595 %q = ptrtoint <4 x i32>* %p to i32 1596 %r = add nuw i32 %q, 16 1597 %s = inttoptr i32 %r to <4 x i32>* 1598 store <4 x i32> %v , <4 x i32>* %s 1599 ret void 1600} 1601 1602define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1603; CHECK-LABEL: store_v4i32_with_folded_gep_offset: 1604; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () 1605; CHECK-NEXT: # %bb.0: 1606; CHECK-NEXT: local.get 1 1607; CHECK-NEXT: local.get 0 1608; CHECK-NEXT: v128.store 16 1609; CHECK-NEXT: # fallthrough-return 1610 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1611 store <4 x i32> %v , <4 x i32>* %s 1612 ret void 1613} 1614 1615define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { 1616; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: 1617; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1618; CHECK-NEXT: # %bb.0: 1619; CHECK-NEXT: local.get 1 1620; CHECK-NEXT: i32.const -16 1621; CHECK-NEXT: i32.add 1622; CHECK-NEXT: local.get 0 1623; CHECK-NEXT: v128.store 0 1624; CHECK-NEXT: # fallthrough-return 1625 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1626 store <4 x i32> %v , <4 x i32>* %s 1627 ret void 1628} 1629 1630define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { 1631; CHECK-LABEL: store_v4i32_with_unfolded_offset: 1632; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () 1633; CHECK-NEXT: # %bb.0: 1634; CHECK-NEXT: local.get 1 1635; CHECK-NEXT: i32.const -16 1636; CHECK-NEXT: i32.add 1637; CHECK-NEXT: local.get 0 1638; CHECK-NEXT: v128.store 0 1639; CHECK-NEXT: # fallthrough-return 1640 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1641 store <4 x i32> %v , <4 x i32>* %s 1642 ret void 1643} 1644 1645define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1646; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: 1647; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1648; CHECK-NEXT: # %bb.0: 1649; CHECK-NEXT: local.get 1 1650; CHECK-NEXT: i32.const 16 1651; CHECK-NEXT: i32.add 1652; CHECK-NEXT: local.get 0 1653; CHECK-NEXT: v128.store 0 1654; CHECK-NEXT: # fallthrough-return 1655 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1656 store <4 x i32> %v , <4 x i32>* %s 1657 ret void 1658} 1659 1660define void @store_v4i32_to_numeric_address(<4 x i32> %v) { 1661; CHECK-LABEL: store_v4i32_to_numeric_address: 1662; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () 1663; CHECK-NEXT: # %bb.0: 1664; CHECK-NEXT: i32.const 0 1665; CHECK-NEXT: local.get 0 1666; CHECK-NEXT: v128.store 32 1667; CHECK-NEXT: # fallthrough-return 1668 %s = inttoptr i32 32 to <4 x i32>* 1669 store <4 x i32> %v , <4 x i32>* %s 1670 ret void 1671} 1672 1673define void @store_v4i32_to_global_address(<4 x i32> %v) { 1674; CHECK-LABEL: store_v4i32_to_global_address: 1675; CHECK: .functype store_v4i32_to_global_address (v128) -> () 1676; CHECK-NEXT: # %bb.0: 1677; CHECK-NEXT: i32.const 0 1678; CHECK-NEXT: local.get 0 1679; CHECK-NEXT: v128.store gv_v4i32 1680; CHECK-NEXT: # fallthrough-return 1681 store <4 x i32> %v , <4 x i32>* @gv_v4i32 1682 ret void 1683} 1684 1685; ============================================================================== 1686; 2 x i64 1687; ============================================================================== 1688define <2 x i64> @load_v2i64(<2 x i64>* %p) { 1689; CHECK-LABEL: load_v2i64: 1690; CHECK: .functype load_v2i64 (i32) -> (v128) 1691; CHECK-NEXT: # %bb.0: 1692; CHECK-NEXT: local.get 0 1693; CHECK-NEXT: v128.load 0 1694; CHECK-NEXT: # fallthrough-return 1695 %v = load <2 x i64>, <2 x i64>* %p 1696 ret <2 x i64> %v 1697} 1698 1699define <2 x i64> @load_splat_v2i64(i64* %p) { 1700; CHECK-LABEL: load_splat_v2i64: 1701; CHECK: .functype load_splat_v2i64 (i32) -> (v128) 1702; CHECK-NEXT: # %bb.0: 1703; CHECK-NEXT: local.get 0 1704; CHECK-NEXT: v64x2.load_splat 0 1705; CHECK-NEXT: # fallthrough-return 1706 %e = load i64, i64* %p 1707 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1708 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1709 ret <2 x i64> %v2 1710} 1711 1712define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) { 1713; CHECK-LABEL: load_sext_v2i64: 1714; CHECK: .functype load_sext_v2i64 (i32) -> (v128) 1715; CHECK-NEXT: # %bb.0: 1716; CHECK-NEXT: local.get 0 1717; CHECK-NEXT: i64x2.load32x2_s 0 1718; CHECK-NEXT: # fallthrough-return 1719 %v = load <2 x i32>, <2 x i32>* %p 1720 %v2 = sext <2 x i32> %v to <2 x i64> 1721 ret <2 x i64> %v2 1722} 1723 1724define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) { 1725; CHECK-LABEL: load_zext_v2i64: 1726; CHECK: .functype load_zext_v2i64 (i32) -> (v128) 1727; CHECK-NEXT: # %bb.0: 1728; CHECK-NEXT: local.get 0 1729; CHECK-NEXT: i64x2.load32x2_u 0 1730; CHECK-NEXT: # fallthrough-return 1731 %v = load <2 x i32>, <2 x i32>* %p 1732 %v2 = zext <2 x i32> %v to <2 x i64> 1733 ret <2 x i64> %v2 1734} 1735 1736define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) { 1737; CHECK-LABEL: load_ext_v2i64: 1738; CHECK: .functype load_ext_v2i64 (i32) -> (v128) 1739; CHECK-NEXT: # %bb.0: 1740; CHECK-NEXT: local.get 0 1741; CHECK-NEXT: i64x2.load32x2_u 0 1742; CHECK-NEXT: # fallthrough-return 1743 %v = load <2 x i32>, <2 x i32>* %p 1744 ret <2 x i32> %v 1745} 1746 1747define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { 1748; CHECK-LABEL: load_v2i64_with_folded_offset: 1749; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128) 1750; CHECK-NEXT: # %bb.0: 1751; CHECK-NEXT: local.get 0 1752; CHECK-NEXT: v128.load 16 1753; CHECK-NEXT: # fallthrough-return 1754 %q = ptrtoint <2 x i64>* %p to i32 1755 %r = add nuw i32 %q, 16 1756 %s = inttoptr i32 %r to <2 x i64>* 1757 %v = load <2 x i64>, <2 x i64>* %s 1758 ret <2 x i64> %v 1759} 1760 1761define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { 1762; CHECK-LABEL: load_splat_v2i64_with_folded_offset: 1763; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) 1764; CHECK-NEXT: # %bb.0: 1765; CHECK-NEXT: local.get 0 1766; CHECK-NEXT: v64x2.load_splat 16 1767; CHECK-NEXT: # fallthrough-return 1768 %q = ptrtoint i64* %p to i32 1769 %r = add nuw i32 %q, 16 1770 %s = inttoptr i32 %r to i64* 1771 %e = load i64, i64* %s 1772 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1773 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1774 ret <2 x i64> %v2 1775} 1776 1777define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) { 1778; CHECK-LABEL: load_sext_v2i64_with_folded_offset: 1779; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128) 1780; CHECK-NEXT: # %bb.0: 1781; CHECK-NEXT: local.get 0 1782; CHECK-NEXT: i64x2.load32x2_s 16 1783; CHECK-NEXT: # fallthrough-return 1784 %q = ptrtoint <2 x i32>* %p to i32 1785 %r = add nuw i32 %q, 16 1786 %s = inttoptr i32 %r to <2 x i32>* 1787 %v = load <2 x i32>, <2 x i32>* %s 1788 %v2 = sext <2 x i32> %v to <2 x i64> 1789 ret <2 x i64> %v2 1790} 1791 1792define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) { 1793; CHECK-LABEL: load_zext_v2i64_with_folded_offset: 1794; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128) 1795; CHECK-NEXT: # %bb.0: 1796; CHECK-NEXT: local.get 0 1797; CHECK-NEXT: i64x2.load32x2_u 16 1798; CHECK-NEXT: # fallthrough-return 1799 %q = ptrtoint <2 x i32>* %p to i32 1800 %r = add nuw i32 %q, 16 1801 %s = inttoptr i32 %r to <2 x i32>* 1802 %v = load <2 x i32>, <2 x i32>* %s 1803 %v2 = zext <2 x i32> %v to <2 x i64> 1804 ret <2 x i64> %v2 1805} 1806 1807define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) { 1808; CHECK-LABEL: load_ext_v2i64_with_folded_offset: 1809; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128) 1810; CHECK-NEXT: # %bb.0: 1811; CHECK-NEXT: local.get 0 1812; CHECK-NEXT: i64x2.load32x2_u 16 1813; CHECK-NEXT: # fallthrough-return 1814 %q = ptrtoint <2 x i32>* %p to i32 1815 %r = add nuw i32 %q, 16 1816 %s = inttoptr i32 %r to <2 x i32>* 1817 %v = load <2 x i32>, <2 x i32>* %s 1818 ret <2 x i32> %v 1819} 1820 1821define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) { 1822; CHECK-LABEL: load_v2i64_with_folded_gep_offset: 1823; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128) 1824; CHECK-NEXT: # %bb.0: 1825; CHECK-NEXT: local.get 0 1826; CHECK-NEXT: v128.load 16 1827; CHECK-NEXT: # fallthrough-return 1828 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 1829 %v = load <2 x i64>, <2 x i64>* %s 1830 ret <2 x i64> %v 1831} 1832 1833define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { 1834; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: 1835; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) 1836; CHECK-NEXT: # %bb.0: 1837; CHECK-NEXT: local.get 0 1838; CHECK-NEXT: v64x2.load_splat 8 1839; CHECK-NEXT: # fallthrough-return 1840 %s = getelementptr inbounds i64, i64* %p, i32 1 1841 %e = load i64, i64* %s 1842 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1843 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1844 ret <2 x i64> %v2 1845} 1846 1847define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 1848; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: 1849; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128) 1850; CHECK-NEXT: # %bb.0: 1851; CHECK-NEXT: local.get 0 1852; CHECK-NEXT: i64x2.load32x2_s 8 1853; CHECK-NEXT: # fallthrough-return 1854 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 1855 %v = load <2 x i32>, <2 x i32>* %s 1856 %v2 = sext <2 x i32> %v to <2 x i64> 1857 ret <2 x i64> %v2 1858} 1859 1860define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 1861; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: 1862; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128) 1863; CHECK-NEXT: # %bb.0: 1864; CHECK-NEXT: local.get 0 1865; CHECK-NEXT: i64x2.load32x2_u 8 1866; CHECK-NEXT: # fallthrough-return 1867 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 1868 %v = load <2 x i32>, <2 x i32>* %s 1869 %v2 = zext <2 x i32> %v to <2 x i64> 1870 ret <2 x i64> %v2 1871} 1872 1873define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 1874; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: 1875; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128) 1876; CHECK-NEXT: # %bb.0: 1877; CHECK-NEXT: local.get 0 1878; CHECK-NEXT: i64x2.load32x2_u 8 1879; CHECK-NEXT: # fallthrough-return 1880 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 1881 %v = load <2 x i32>, <2 x i32>* %s 1882 ret <2 x i32> %v 1883} 1884 1885define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) { 1886; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: 1887; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1888; CHECK-NEXT: # %bb.0: 1889; CHECK-NEXT: local.get 0 1890; CHECK-NEXT: i32.const -16 1891; CHECK-NEXT: i32.add 1892; CHECK-NEXT: v128.load 0 1893; CHECK-NEXT: # fallthrough-return 1894 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 1895 %v = load <2 x i64>, <2 x i64>* %s 1896 ret <2 x i64> %v 1897} 1898 1899define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { 1900; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: 1901; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1902; CHECK-NEXT: # %bb.0: 1903; CHECK-NEXT: local.get 0 1904; CHECK-NEXT: i32.const -8 1905; CHECK-NEXT: i32.add 1906; CHECK-NEXT: v64x2.load_splat 0 1907; CHECK-NEXT: # fallthrough-return 1908 %s = getelementptr inbounds i64, i64* %p, i32 -1 1909 %e = load i64, i64* %s 1910 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1911 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1912 ret <2 x i64> %v2 1913} 1914 1915define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 1916; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: 1917; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1918; CHECK-NEXT: # %bb.0: 1919; CHECK-NEXT: local.get 0 1920; CHECK-NEXT: i32.const -8 1921; CHECK-NEXT: i32.add 1922; CHECK-NEXT: i64x2.load32x2_s 0 1923; CHECK-NEXT: # fallthrough-return 1924 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 1925 %v = load <2 x i32>, <2 x i32>* %s 1926 %v2 = sext <2 x i32> %v to <2 x i64> 1927 ret <2 x i64> %v2 1928} 1929 1930define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 1931; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: 1932; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1933; CHECK-NEXT: # %bb.0: 1934; CHECK-NEXT: local.get 0 1935; CHECK-NEXT: i32.const -8 1936; CHECK-NEXT: i32.add 1937; CHECK-NEXT: i64x2.load32x2_u 0 1938; CHECK-NEXT: # fallthrough-return 1939 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 1940 %v = load <2 x i32>, <2 x i32>* %s 1941 %v2 = zext <2 x i32> %v to <2 x i64> 1942 ret <2 x i64> %v2 1943} 1944 1945define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 1946; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: 1947; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 1948; CHECK-NEXT: # %bb.0: 1949; CHECK-NEXT: local.get 0 1950; CHECK-NEXT: i32.const -8 1951; CHECK-NEXT: i32.add 1952; CHECK-NEXT: i64x2.load32x2_u 0 1953; CHECK-NEXT: # fallthrough-return 1954 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 1955 %v = load <2 x i32>, <2 x i32>* %s 1956 ret <2 x i32> %v 1957} 1958 1959define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { 1960; CHECK-LABEL: load_v2i64_with_unfolded_offset: 1961; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128) 1962; CHECK-NEXT: # %bb.0: 1963; CHECK-NEXT: local.get 0 1964; CHECK-NEXT: i32.const 16 1965; CHECK-NEXT: i32.add 1966; CHECK-NEXT: v128.load 0 1967; CHECK-NEXT: # fallthrough-return 1968 %q = ptrtoint <2 x i64>* %p to i32 1969 %r = add nsw i32 %q, 16 1970 %s = inttoptr i32 %r to <2 x i64>* 1971 %v = load <2 x i64>, <2 x i64>* %s 1972 ret <2 x i64> %v 1973} 1974 1975define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { 1976; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: 1977; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128) 1978; CHECK-NEXT: # %bb.0: 1979; CHECK-NEXT: local.get 0 1980; CHECK-NEXT: i32.const 16 1981; CHECK-NEXT: i32.add 1982; CHECK-NEXT: v64x2.load_splat 0 1983; CHECK-NEXT: # fallthrough-return 1984 %q = ptrtoint i64* %p to i32 1985 %r = add nsw i32 %q, 16 1986 %s = inttoptr i32 %r to i64* 1987 %e = load i64, i64* %s 1988 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 1989 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 1990 ret <2 x i64> %v2 1991} 1992 1993define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 1994; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: 1995; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128) 1996; CHECK-NEXT: # %bb.0: 1997; CHECK-NEXT: local.get 0 1998; CHECK-NEXT: i32.const 16 1999; CHECK-NEXT: i32.add 2000; CHECK-NEXT: i64x2.load32x2_s 0 2001; CHECK-NEXT: # fallthrough-return 2002 %q = ptrtoint <2 x i32>* %p to i32 2003 %r = add nsw i32 %q, 16 2004 %s = inttoptr i32 %r to <2 x i32>* 2005 %v = load <2 x i32>, <2 x i32>* %s 2006 %v2 = sext <2 x i32> %v to <2 x i64> 2007 ret <2 x i64> %v2 2008} 2009 2010define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2011; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: 2012; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128) 2013; CHECK-NEXT: # %bb.0: 2014; CHECK-NEXT: local.get 0 2015; CHECK-NEXT: i32.const 16 2016; CHECK-NEXT: i32.add 2017; CHECK-NEXT: i64x2.load32x2_u 0 2018; CHECK-NEXT: # fallthrough-return 2019 %q = ptrtoint <2 x i32>* %p to i32 2020 %r = add nsw i32 %q, 16 2021 %s = inttoptr i32 %r to <2 x i32>* 2022 %v = load <2 x i32>, <2 x i32>* %s 2023 %v2 = zext <2 x i32> %v to <2 x i64> 2024 ret <2 x i64> %v2 2025} 2026 2027define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2028; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: 2029; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128) 2030; CHECK-NEXT: # %bb.0: 2031; CHECK-NEXT: local.get 0 2032; CHECK-NEXT: i32.const 16 2033; CHECK-NEXT: i32.add 2034; CHECK-NEXT: i64x2.load32x2_u 0 2035; CHECK-NEXT: # fallthrough-return 2036 %q = ptrtoint <2 x i32>* %p to i32 2037 %r = add nsw i32 %q, 16 2038 %s = inttoptr i32 %r to <2 x i32>* 2039 %v = load <2 x i32>, <2 x i32>* %s 2040 ret <2 x i32> %v 2041} 2042 2043define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) { 2044; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: 2045; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2046; CHECK-NEXT: # %bb.0: 2047; CHECK-NEXT: local.get 0 2048; CHECK-NEXT: i32.const 16 2049; CHECK-NEXT: i32.add 2050; CHECK-NEXT: v128.load 0 2051; CHECK-NEXT: # fallthrough-return 2052 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2053 %v = load <2 x i64>, <2 x i64>* %s 2054 ret <2 x i64> %v 2055} 2056 2057define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { 2058; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: 2059; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2060; CHECK-NEXT: # %bb.0: 2061; CHECK-NEXT: local.get 0 2062; CHECK-NEXT: i32.const 8 2063; CHECK-NEXT: i32.add 2064; CHECK-NEXT: v64x2.load_splat 0 2065; CHECK-NEXT: # fallthrough-return 2066 %s = getelementptr i64, i64* %p, i32 1 2067 %e = load i64, i64* %s 2068 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2069 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2070 ret <2 x i64> %v2 2071} 2072 2073define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2074; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: 2075; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2076; CHECK-NEXT: # %bb.0: 2077; CHECK-NEXT: local.get 0 2078; CHECK-NEXT: i32.const 8 2079; CHECK-NEXT: i32.add 2080; CHECK-NEXT: i64x2.load32x2_s 0 2081; CHECK-NEXT: # fallthrough-return 2082 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2083 %v = load <2 x i32>, <2 x i32>* %s 2084 %v2 = sext <2 x i32> %v to <2 x i64> 2085 ret <2 x i64> %v2 2086} 2087 2088define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2089; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: 2090; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2091; CHECK-NEXT: # %bb.0: 2092; CHECK-NEXT: local.get 0 2093; CHECK-NEXT: i32.const 8 2094; CHECK-NEXT: i32.add 2095; CHECK-NEXT: i64x2.load32x2_u 0 2096; CHECK-NEXT: # fallthrough-return 2097 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2098 %v = load <2 x i32>, <2 x i32>* %s 2099 %v2 = zext <2 x i32> %v to <2 x i64> 2100 ret <2 x i64> %v2 2101} 2102 2103define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2104; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: 2105; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2106; CHECK-NEXT: # %bb.0: 2107; CHECK-NEXT: local.get 0 2108; CHECK-NEXT: i32.const 8 2109; CHECK-NEXT: i32.add 2110; CHECK-NEXT: i64x2.load32x2_u 0 2111; CHECK-NEXT: # fallthrough-return 2112 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2113 %v = load <2 x i32>, <2 x i32>* %s 2114 ret <2 x i32> %v 2115} 2116 2117define <2 x i64> @load_v2i64_from_numeric_address() { 2118; CHECK-LABEL: load_v2i64_from_numeric_address: 2119; CHECK: .functype load_v2i64_from_numeric_address () -> (v128) 2120; CHECK-NEXT: # %bb.0: 2121; CHECK-NEXT: i32.const 0 2122; CHECK-NEXT: v128.load 32 2123; CHECK-NEXT: # fallthrough-return 2124 %s = inttoptr i32 32 to <2 x i64>* 2125 %v = load <2 x i64>, <2 x i64>* %s 2126 ret <2 x i64> %v 2127} 2128 2129define <2 x i64> @load_splat_v2i64_from_numeric_address() { 2130; CHECK-LABEL: load_splat_v2i64_from_numeric_address: 2131; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) 2132; CHECK-NEXT: # %bb.0: 2133; CHECK-NEXT: i32.const 0 2134; CHECK-NEXT: v64x2.load_splat 32 2135; CHECK-NEXT: # fallthrough-return 2136 %s = inttoptr i32 32 to i64* 2137 %e = load i64, i64* %s 2138 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2139 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2140 ret <2 x i64> %v2 2141} 2142 2143define <2 x i64> @load_sext_v2i64_from_numeric_address() { 2144; CHECK-LABEL: load_sext_v2i64_from_numeric_address: 2145; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128) 2146; CHECK-NEXT: # %bb.0: 2147; CHECK-NEXT: i32.const 0 2148; CHECK-NEXT: i64x2.load32x2_s 32 2149; CHECK-NEXT: # fallthrough-return 2150 %s = inttoptr i32 32 to <2 x i32>* 2151 %v = load <2 x i32>, <2 x i32>* %s 2152 %v2 = sext <2 x i32> %v to <2 x i64> 2153 ret <2 x i64> %v2 2154} 2155 2156define <2 x i64> @load_zext_v2i64_from_numeric_address() { 2157; CHECK-LABEL: load_zext_v2i64_from_numeric_address: 2158; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128) 2159; CHECK-NEXT: # %bb.0: 2160; CHECK-NEXT: i32.const 0 2161; CHECK-NEXT: i64x2.load32x2_u 32 2162; CHECK-NEXT: # fallthrough-return 2163 %s = inttoptr i32 32 to <2 x i32>* 2164 %v = load <2 x i32>, <2 x i32>* %s 2165 %v2 = zext <2 x i32> %v to <2 x i64> 2166 ret <2 x i64> %v2 2167} 2168 2169define <2 x i32> @load_ext_v2i64_from_numeric_address() { 2170; CHECK-LABEL: load_ext_v2i64_from_numeric_address: 2171; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128) 2172; CHECK-NEXT: # %bb.0: 2173; CHECK-NEXT: i32.const 0 2174; CHECK-NEXT: i64x2.load32x2_u 32 2175; CHECK-NEXT: # fallthrough-return 2176 %s = inttoptr i32 32 to <2 x i32>* 2177 %v = load <2 x i32>, <2 x i32>* %s 2178 ret <2 x i32> %v 2179} 2180 2181@gv_v2i64 = global <2 x i64> <i64 42, i64 42> 2182define <2 x i64> @load_v2i64_from_global_address() { 2183; CHECK-LABEL: load_v2i64_from_global_address: 2184; CHECK: .functype load_v2i64_from_global_address () -> (v128) 2185; CHECK-NEXT: # %bb.0: 2186; CHECK-NEXT: i32.const 0 2187; CHECK-NEXT: v128.load gv_v2i64 2188; CHECK-NEXT: # fallthrough-return 2189 %v = load <2 x i64>, <2 x i64>* @gv_v2i64 2190 ret <2 x i64> %v 2191} 2192 2193@gv_i64 = global i64 42 2194define <2 x i64> @load_splat_v2i64_from_global_address() { 2195; CHECK-LABEL: load_splat_v2i64_from_global_address: 2196; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) 2197; CHECK-NEXT: # %bb.0: 2198; CHECK-NEXT: i32.const 0 2199; CHECK-NEXT: v64x2.load_splat gv_i64 2200; CHECK-NEXT: # fallthrough-return 2201 %e = load i64, i64* @gv_i64 2202 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2203 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2204 ret <2 x i64> %v2 2205} 2206 2207@gv_v2i32 = global <2 x i32> <i32 42, i32 42> 2208define <2 x i64> @load_sext_v2i64_from_global_address() { 2209; CHECK-LABEL: load_sext_v2i64_from_global_address: 2210; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128) 2211; CHECK-NEXT: # %bb.0: 2212; CHECK-NEXT: i32.const 0 2213; CHECK-NEXT: i64x2.load32x2_s gv_v2i32 2214; CHECK-NEXT: # fallthrough-return 2215 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2216 %v2 = sext <2 x i32> %v to <2 x i64> 2217 ret <2 x i64> %v2 2218} 2219 2220define <2 x i64> @load_zext_v2i64_from_global_address() { 2221; CHECK-LABEL: load_zext_v2i64_from_global_address: 2222; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128) 2223; CHECK-NEXT: # %bb.0: 2224; CHECK-NEXT: i32.const 0 2225; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2226; CHECK-NEXT: # fallthrough-return 2227 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2228 %v2 = zext <2 x i32> %v to <2 x i64> 2229 ret <2 x i64> %v2 2230} 2231 2232define <2 x i32> @load_ext_v2i64_from_global_address() { 2233; CHECK-LABEL: load_ext_v2i64_from_global_address: 2234; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128) 2235; CHECK-NEXT: # %bb.0: 2236; CHECK-NEXT: i32.const 0 2237; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2238; CHECK-NEXT: # fallthrough-return 2239 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2240 ret <2 x i32> %v 2241} 2242 2243define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) { 2244; CHECK-LABEL: store_v2i64: 2245; CHECK: .functype store_v2i64 (v128, i32) -> () 2246; CHECK-NEXT: # %bb.0: 2247; CHECK-NEXT: local.get 1 2248; CHECK-NEXT: local.get 0 2249; CHECK-NEXT: v128.store 0 2250; CHECK-NEXT: # fallthrough-return 2251 store <2 x i64> %v , <2 x i64>* %p 2252 ret void 2253} 2254 2255define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { 2256; CHECK-LABEL: store_v2i64_with_folded_offset: 2257; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> () 2258; CHECK-NEXT: # %bb.0: 2259; CHECK-NEXT: local.get 1 2260; CHECK-NEXT: local.get 0 2261; CHECK-NEXT: v128.store 16 2262; CHECK-NEXT: # fallthrough-return 2263 %q = ptrtoint <2 x i64>* %p to i32 2264 %r = add nuw i32 %q, 16 2265 %s = inttoptr i32 %r to <2 x i64>* 2266 store <2 x i64> %v , <2 x i64>* %s 2267 ret void 2268} 2269 2270define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2271; CHECK-LABEL: store_v2i64_with_folded_gep_offset: 2272; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> () 2273; CHECK-NEXT: # %bb.0: 2274; CHECK-NEXT: local.get 1 2275; CHECK-NEXT: local.get 0 2276; CHECK-NEXT: v128.store 16 2277; CHECK-NEXT: # fallthrough-return 2278 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2279 store <2 x i64> %v , <2 x i64>* %s 2280 ret void 2281} 2282 2283define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) { 2284; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: 2285; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> () 2286; CHECK-NEXT: # %bb.0: 2287; CHECK-NEXT: local.get 1 2288; CHECK-NEXT: i32.const -16 2289; CHECK-NEXT: i32.add 2290; CHECK-NEXT: local.get 0 2291; CHECK-NEXT: v128.store 0 2292; CHECK-NEXT: # fallthrough-return 2293 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2294 store <2 x i64> %v , <2 x i64>* %s 2295 ret void 2296} 2297 2298define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) { 2299; CHECK-LABEL: store_v2i64_with_unfolded_offset: 2300; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> () 2301; CHECK-NEXT: # %bb.0: 2302; CHECK-NEXT: local.get 1 2303; CHECK-NEXT: i32.const -16 2304; CHECK-NEXT: i32.add 2305; CHECK-NEXT: local.get 0 2306; CHECK-NEXT: v128.store 0 2307; CHECK-NEXT: # fallthrough-return 2308 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2309 store <2 x i64> %v , <2 x i64>* %s 2310 ret void 2311} 2312 2313define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2314; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: 2315; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> () 2316; CHECK-NEXT: # %bb.0: 2317; CHECK-NEXT: local.get 1 2318; CHECK-NEXT: i32.const 16 2319; CHECK-NEXT: i32.add 2320; CHECK-NEXT: local.get 0 2321; CHECK-NEXT: v128.store 0 2322; CHECK-NEXT: # fallthrough-return 2323 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2324 store <2 x i64> %v , <2 x i64>* %s 2325 ret void 2326} 2327 2328define void @store_v2i64_to_numeric_address(<2 x i64> %v) { 2329; CHECK-LABEL: store_v2i64_to_numeric_address: 2330; CHECK: .functype store_v2i64_to_numeric_address (v128) -> () 2331; CHECK-NEXT: # %bb.0: 2332; CHECK-NEXT: i32.const 0 2333; CHECK-NEXT: local.get 0 2334; CHECK-NEXT: v128.store 32 2335; CHECK-NEXT: # fallthrough-return 2336 %s = inttoptr i32 32 to <2 x i64>* 2337 store <2 x i64> %v , <2 x i64>* %s 2338 ret void 2339} 2340 2341define void @store_v2i64_to_global_address(<2 x i64> %v) { 2342; CHECK-LABEL: store_v2i64_to_global_address: 2343; CHECK: .functype store_v2i64_to_global_address (v128) -> () 2344; CHECK-NEXT: # %bb.0: 2345; CHECK-NEXT: i32.const 0 2346; CHECK-NEXT: local.get 0 2347; CHECK-NEXT: v128.store gv_v2i64 2348; CHECK-NEXT: # fallthrough-return 2349 store <2 x i64> %v , <2 x i64>* @gv_v2i64 2350 ret void 2351} 2352 2353; ============================================================================== 2354; 4 x float 2355; ============================================================================== 2356define <4 x float> @load_v4f32(<4 x float>* %p) { 2357; CHECK-LABEL: load_v4f32: 2358; CHECK: .functype load_v4f32 (i32) -> (v128) 2359; CHECK-NEXT: # %bb.0: 2360; CHECK-NEXT: local.get 0 2361; CHECK-NEXT: v128.load 0 2362; CHECK-NEXT: # fallthrough-return 2363 %v = load <4 x float>, <4 x float>* %p 2364 ret <4 x float> %v 2365} 2366 2367define <4 x float> @load_splat_v4f32(float* %p) { 2368; CHECK-LABEL: load_splat_v4f32: 2369; CHECK: .functype load_splat_v4f32 (i32) -> (v128) 2370; CHECK-NEXT: # %bb.0: 2371; CHECK-NEXT: local.get 0 2372; CHECK-NEXT: v32x4.load_splat 0 2373; CHECK-NEXT: # fallthrough-return 2374 %e = load float, float* %p 2375 %v1 = insertelement <4 x float> undef, float %e, i32 0 2376 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2377 ret <4 x float> %v2 2378} 2379 2380define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { 2381; CHECK-LABEL: load_v4f32_with_folded_offset: 2382; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128) 2383; CHECK-NEXT: # %bb.0: 2384; CHECK-NEXT: local.get 0 2385; CHECK-NEXT: v128.load 16 2386; CHECK-NEXT: # fallthrough-return 2387 %q = ptrtoint <4 x float>* %p to i32 2388 %r = add nuw i32 %q, 16 2389 %s = inttoptr i32 %r to <4 x float>* 2390 %v = load <4 x float>, <4 x float>* %s 2391 ret <4 x float> %v 2392} 2393 2394define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { 2395; CHECK-LABEL: load_splat_v4f32_with_folded_offset: 2396; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) 2397; CHECK-NEXT: # %bb.0: 2398; CHECK-NEXT: local.get 0 2399; CHECK-NEXT: v32x4.load_splat 16 2400; CHECK-NEXT: # fallthrough-return 2401 %q = ptrtoint float* %p to i32 2402 %r = add nuw i32 %q, 16 2403 %s = inttoptr i32 %r to float* 2404 %e = load float, float* %s 2405 %v1 = insertelement <4 x float> undef, float %e, i32 0 2406 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2407 ret <4 x float> %v2 2408} 2409 2410define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) { 2411; CHECK-LABEL: load_v4f32_with_folded_gep_offset: 2412; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128) 2413; CHECK-NEXT: # %bb.0: 2414; CHECK-NEXT: local.get 0 2415; CHECK-NEXT: v128.load 16 2416; CHECK-NEXT: # fallthrough-return 2417 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2418 %v = load <4 x float>, <4 x float>* %s 2419 ret <4 x float> %v 2420} 2421 2422define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { 2423; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: 2424; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) 2425; CHECK-NEXT: # %bb.0: 2426; CHECK-NEXT: local.get 0 2427; CHECK-NEXT: v32x4.load_splat 4 2428; CHECK-NEXT: # fallthrough-return 2429 %s = getelementptr inbounds float, float* %p, i32 1 2430 %e = load float, float* %s 2431 %v1 = insertelement <4 x float> undef, float %e, i32 0 2432 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2433 ret <4 x float> %v2 2434} 2435 2436define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) { 2437; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: 2438; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2439; CHECK-NEXT: # %bb.0: 2440; CHECK-NEXT: local.get 0 2441; CHECK-NEXT: i32.const -16 2442; CHECK-NEXT: i32.add 2443; CHECK-NEXT: v128.load 0 2444; CHECK-NEXT: # fallthrough-return 2445 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2446 %v = load <4 x float>, <4 x float>* %s 2447 ret <4 x float> %v 2448} 2449 2450define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) { 2451; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: 2452; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2453; CHECK-NEXT: # %bb.0: 2454; CHECK-NEXT: local.get 0 2455; CHECK-NEXT: i32.const -4 2456; CHECK-NEXT: i32.add 2457; CHECK-NEXT: v32x4.load_splat 0 2458; CHECK-NEXT: # fallthrough-return 2459 %s = getelementptr inbounds float, float* %p, i32 -1 2460 %e = load float, float* %s 2461 %v1 = insertelement <4 x float> undef, float %e, i32 0 2462 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2463 ret <4 x float> %v2 2464} 2465 2466define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { 2467; CHECK-LABEL: load_v4f32_with_unfolded_offset: 2468; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128) 2469; CHECK-NEXT: # %bb.0: 2470; CHECK-NEXT: local.get 0 2471; CHECK-NEXT: i32.const 16 2472; CHECK-NEXT: i32.add 2473; CHECK-NEXT: v128.load 0 2474; CHECK-NEXT: # fallthrough-return 2475 %q = ptrtoint <4 x float>* %p to i32 2476 %r = add nsw i32 %q, 16 2477 %s = inttoptr i32 %r to <4 x float>* 2478 %v = load <4 x float>, <4 x float>* %s 2479 ret <4 x float> %v 2480} 2481 2482define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { 2483; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: 2484; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128) 2485; CHECK-NEXT: # %bb.0: 2486; CHECK-NEXT: local.get 0 2487; CHECK-NEXT: i32.const 16 2488; CHECK-NEXT: i32.add 2489; CHECK-NEXT: v32x4.load_splat 0 2490; CHECK-NEXT: # fallthrough-return 2491 %q = ptrtoint float* %p to i32 2492 %r = add nsw i32 %q, 16 2493 %s = inttoptr i32 %r to float* 2494 %e = load float, float* %s 2495 %v1 = insertelement <4 x float> undef, float %e, i32 0 2496 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2497 ret <4 x float> %v2 2498} 2499 2500define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) { 2501; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: 2502; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2503; CHECK-NEXT: # %bb.0: 2504; CHECK-NEXT: local.get 0 2505; CHECK-NEXT: i32.const 16 2506; CHECK-NEXT: i32.add 2507; CHECK-NEXT: v128.load 0 2508; CHECK-NEXT: # fallthrough-return 2509 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2510 %v = load <4 x float>, <4 x float>* %s 2511 ret <4 x float> %v 2512} 2513 2514define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { 2515; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: 2516; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2517; CHECK-NEXT: # %bb.0: 2518; CHECK-NEXT: local.get 0 2519; CHECK-NEXT: i32.const 4 2520; CHECK-NEXT: i32.add 2521; CHECK-NEXT: v32x4.load_splat 0 2522; CHECK-NEXT: # fallthrough-return 2523 %s = getelementptr float, float* %p, i32 1 2524 %e = load float, float* %s 2525 %v1 = insertelement <4 x float> undef, float %e, i32 0 2526 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2527 ret <4 x float> %v2 2528} 2529 2530define <4 x float> @load_v4f32_from_numeric_address() { 2531; CHECK-LABEL: load_v4f32_from_numeric_address: 2532; CHECK: .functype load_v4f32_from_numeric_address () -> (v128) 2533; CHECK-NEXT: # %bb.0: 2534; CHECK-NEXT: i32.const 0 2535; CHECK-NEXT: v128.load 32 2536; CHECK-NEXT: # fallthrough-return 2537 %s = inttoptr i32 32 to <4 x float>* 2538 %v = load <4 x float>, <4 x float>* %s 2539 ret <4 x float> %v 2540} 2541 2542define <4 x float> @load_splat_v4f32_from_numeric_address() { 2543; CHECK-LABEL: load_splat_v4f32_from_numeric_address: 2544; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) 2545; CHECK-NEXT: # %bb.0: 2546; CHECK-NEXT: i32.const 0 2547; CHECK-NEXT: v32x4.load_splat 32 2548; CHECK-NEXT: # fallthrough-return 2549 %s = inttoptr i32 32 to float* 2550 %e = load float, float* %s 2551 %v1 = insertelement <4 x float> undef, float %e, i32 0 2552 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2553 ret <4 x float> %v2 2554} 2555 2556@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.> 2557define <4 x float> @load_v4f32_from_global_address() { 2558; CHECK-LABEL: load_v4f32_from_global_address: 2559; CHECK: .functype load_v4f32_from_global_address () -> (v128) 2560; CHECK-NEXT: # %bb.0: 2561; CHECK-NEXT: i32.const 0 2562; CHECK-NEXT: v128.load gv_v4f32 2563; CHECK-NEXT: # fallthrough-return 2564 %v = load <4 x float>, <4 x float>* @gv_v4f32 2565 ret <4 x float> %v 2566} 2567 2568@gv_f32 = global float 42. 2569define <4 x float> @load_splat_v4f32_from_global_address() { 2570; CHECK-LABEL: load_splat_v4f32_from_global_address: 2571; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) 2572; CHECK-NEXT: # %bb.0: 2573; CHECK-NEXT: i32.const 0 2574; CHECK-NEXT: v32x4.load_splat gv_f32 2575; CHECK-NEXT: # fallthrough-return 2576 %e = load float, float* @gv_f32 2577 %v1 = insertelement <4 x float> undef, float %e, i32 0 2578 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2579 ret <4 x float> %v2 2580} 2581 2582define void @store_v4f32(<4 x float> %v, <4 x float>* %p) { 2583; CHECK-LABEL: store_v4f32: 2584; CHECK: .functype store_v4f32 (v128, i32) -> () 2585; CHECK-NEXT: # %bb.0: 2586; CHECK-NEXT: local.get 1 2587; CHECK-NEXT: local.get 0 2588; CHECK-NEXT: v128.store 0 2589; CHECK-NEXT: # fallthrough-return 2590 store <4 x float> %v , <4 x float>* %p 2591 ret void 2592} 2593 2594define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { 2595; CHECK-LABEL: store_v4f32_with_folded_offset: 2596; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> () 2597; CHECK-NEXT: # %bb.0: 2598; CHECK-NEXT: local.get 1 2599; CHECK-NEXT: local.get 0 2600; CHECK-NEXT: v128.store 16 2601; CHECK-NEXT: # fallthrough-return 2602 %q = ptrtoint <4 x float>* %p to i32 2603 %r = add nuw i32 %q, 16 2604 %s = inttoptr i32 %r to <4 x float>* 2605 store <4 x float> %v , <4 x float>* %s 2606 ret void 2607} 2608 2609define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2610; CHECK-LABEL: store_v4f32_with_folded_gep_offset: 2611; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> () 2612; CHECK-NEXT: # %bb.0: 2613; CHECK-NEXT: local.get 1 2614; CHECK-NEXT: local.get 0 2615; CHECK-NEXT: v128.store 16 2616; CHECK-NEXT: # fallthrough-return 2617 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2618 store <4 x float> %v , <4 x float>* %s 2619 ret void 2620} 2621 2622define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) { 2623; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: 2624; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> () 2625; CHECK-NEXT: # %bb.0: 2626; CHECK-NEXT: local.get 1 2627; CHECK-NEXT: i32.const -16 2628; CHECK-NEXT: i32.add 2629; CHECK-NEXT: local.get 0 2630; CHECK-NEXT: v128.store 0 2631; CHECK-NEXT: # fallthrough-return 2632 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2633 store <4 x float> %v , <4 x float>* %s 2634 ret void 2635} 2636 2637define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) { 2638; CHECK-LABEL: store_v4f32_with_unfolded_offset: 2639; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> () 2640; CHECK-NEXT: # %bb.0: 2641; CHECK-NEXT: local.get 1 2642; CHECK-NEXT: i32.const -16 2643; CHECK-NEXT: i32.add 2644; CHECK-NEXT: local.get 0 2645; CHECK-NEXT: v128.store 0 2646; CHECK-NEXT: # fallthrough-return 2647 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2648 store <4 x float> %v , <4 x float>* %s 2649 ret void 2650} 2651 2652define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2653; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: 2654; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> () 2655; CHECK-NEXT: # %bb.0: 2656; CHECK-NEXT: local.get 1 2657; CHECK-NEXT: i32.const 16 2658; CHECK-NEXT: i32.add 2659; CHECK-NEXT: local.get 0 2660; CHECK-NEXT: v128.store 0 2661; CHECK-NEXT: # fallthrough-return 2662 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2663 store <4 x float> %v , <4 x float>* %s 2664 ret void 2665} 2666 2667define void @store_v4f32_to_numeric_address(<4 x float> %v) { 2668; CHECK-LABEL: store_v4f32_to_numeric_address: 2669; CHECK: .functype store_v4f32_to_numeric_address (v128) -> () 2670; CHECK-NEXT: # %bb.0: 2671; CHECK-NEXT: i32.const 0 2672; CHECK-NEXT: local.get 0 2673; CHECK-NEXT: v128.store 32 2674; CHECK-NEXT: # fallthrough-return 2675 %s = inttoptr i32 32 to <4 x float>* 2676 store <4 x float> %v , <4 x float>* %s 2677 ret void 2678} 2679 2680define void @store_v4f32_to_global_address(<4 x float> %v) { 2681; CHECK-LABEL: store_v4f32_to_global_address: 2682; CHECK: .functype store_v4f32_to_global_address (v128) -> () 2683; CHECK-NEXT: # %bb.0: 2684; CHECK-NEXT: i32.const 0 2685; CHECK-NEXT: local.get 0 2686; CHECK-NEXT: v128.store gv_v4f32 2687; CHECK-NEXT: # fallthrough-return 2688 store <4 x float> %v , <4 x float>* @gv_v4f32 2689 ret void 2690} 2691 2692; ============================================================================== 2693; 2 x double 2694; ============================================================================== 2695define <2 x double> @load_v2f64(<2 x double>* %p) { 2696; CHECK-LABEL: load_v2f64: 2697; CHECK: .functype load_v2f64 (i32) -> (v128) 2698; CHECK-NEXT: # %bb.0: 2699; CHECK-NEXT: local.get 0 2700; CHECK-NEXT: v128.load 0 2701; CHECK-NEXT: # fallthrough-return 2702 %v = load <2 x double>, <2 x double>* %p 2703 ret <2 x double> %v 2704} 2705 2706define <2 x double> @load_splat_v2f64(double* %p) { 2707; CHECK-LABEL: load_splat_v2f64: 2708; CHECK: .functype load_splat_v2f64 (i32) -> (v128) 2709; CHECK-NEXT: # %bb.0: 2710; CHECK-NEXT: local.get 0 2711; CHECK-NEXT: v64x2.load_splat 0 2712; CHECK-NEXT: # fallthrough-return 2713 %e = load double, double* %p 2714 %v1 = insertelement <2 x double> undef, double %e, i32 0 2715 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2716 ret <2 x double> %v2 2717} 2718 2719define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { 2720; CHECK-LABEL: load_v2f64_with_folded_offset: 2721; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128) 2722; CHECK-NEXT: # %bb.0: 2723; CHECK-NEXT: local.get 0 2724; CHECK-NEXT: v128.load 16 2725; CHECK-NEXT: # fallthrough-return 2726 %q = ptrtoint <2 x double>* %p to i32 2727 %r = add nuw i32 %q, 16 2728 %s = inttoptr i32 %r to <2 x double>* 2729 %v = load <2 x double>, <2 x double>* %s 2730 ret <2 x double> %v 2731} 2732 2733define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { 2734; CHECK-LABEL: load_splat_v2f64_with_folded_offset: 2735; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) 2736; CHECK-NEXT: # %bb.0: 2737; CHECK-NEXT: local.get 0 2738; CHECK-NEXT: v64x2.load_splat 16 2739; CHECK-NEXT: # fallthrough-return 2740 %q = ptrtoint double* %p to i32 2741 %r = add nuw i32 %q, 16 2742 %s = inttoptr i32 %r to double* 2743 %e = load double, double* %s 2744 %v1 = insertelement <2 x double> undef, double %e, i32 0 2745 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2746 ret <2 x double> %v2 2747} 2748 2749define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) { 2750; CHECK-LABEL: load_v2f64_with_folded_gep_offset: 2751; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128) 2752; CHECK-NEXT: # %bb.0: 2753; CHECK-NEXT: local.get 0 2754; CHECK-NEXT: v128.load 16 2755; CHECK-NEXT: # fallthrough-return 2756 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 2757 %v = load <2 x double>, <2 x double>* %s 2758 ret <2 x double> %v 2759} 2760 2761define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { 2762; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: 2763; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) 2764; CHECK-NEXT: # %bb.0: 2765; CHECK-NEXT: local.get 0 2766; CHECK-NEXT: v64x2.load_splat 8 2767; CHECK-NEXT: # fallthrough-return 2768 %s = getelementptr inbounds double, double* %p, i32 1 2769 %e = load double, double* %s 2770 %v1 = insertelement <2 x double> undef, double %e, i32 0 2771 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2772 ret <2 x double> %v2 2773} 2774 2775define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) { 2776; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: 2777; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 2778; CHECK-NEXT: # %bb.0: 2779; CHECK-NEXT: local.get 0 2780; CHECK-NEXT: i32.const -16 2781; CHECK-NEXT: i32.add 2782; CHECK-NEXT: v128.load 0 2783; CHECK-NEXT: # fallthrough-return 2784 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 2785 %v = load <2 x double>, <2 x double>* %s 2786 ret <2 x double> %v 2787} 2788 2789define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) { 2790; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: 2791; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 2792; CHECK-NEXT: # %bb.0: 2793; CHECK-NEXT: local.get 0 2794; CHECK-NEXT: i32.const -8 2795; CHECK-NEXT: i32.add 2796; CHECK-NEXT: v64x2.load_splat 0 2797; CHECK-NEXT: # fallthrough-return 2798 %s = getelementptr inbounds double, double* %p, i32 -1 2799 %e = load double, double* %s 2800 %v1 = insertelement <2 x double> undef, double %e, i32 0 2801 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2802 ret <2 x double> %v2 2803} 2804 2805define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { 2806; CHECK-LABEL: load_v2f64_with_unfolded_offset: 2807; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128) 2808; CHECK-NEXT: # %bb.0: 2809; CHECK-NEXT: local.get 0 2810; CHECK-NEXT: i32.const 16 2811; CHECK-NEXT: i32.add 2812; CHECK-NEXT: v128.load 0 2813; CHECK-NEXT: # fallthrough-return 2814 %q = ptrtoint <2 x double>* %p to i32 2815 %r = add nsw i32 %q, 16 2816 %s = inttoptr i32 %r to <2 x double>* 2817 %v = load <2 x double>, <2 x double>* %s 2818 ret <2 x double> %v 2819} 2820 2821define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { 2822; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: 2823; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128) 2824; CHECK-NEXT: # %bb.0: 2825; CHECK-NEXT: local.get 0 2826; CHECK-NEXT: i32.const 16 2827; CHECK-NEXT: i32.add 2828; CHECK-NEXT: v64x2.load_splat 0 2829; CHECK-NEXT: # fallthrough-return 2830 %q = ptrtoint double* %p to i32 2831 %r = add nsw i32 %q, 16 2832 %s = inttoptr i32 %r to double* 2833 %e = load double, double* %s 2834 %v1 = insertelement <2 x double> undef, double %e, i32 0 2835 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2836 ret <2 x double> %v2 2837} 2838 2839define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) { 2840; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: 2841; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128) 2842; CHECK-NEXT: # %bb.0: 2843; CHECK-NEXT: local.get 0 2844; CHECK-NEXT: i32.const 16 2845; CHECK-NEXT: i32.add 2846; CHECK-NEXT: v128.load 0 2847; CHECK-NEXT: # fallthrough-return 2848 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 2849 %v = load <2 x double>, <2 x double>* %s 2850 ret <2 x double> %v 2851} 2852 2853define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { 2854; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: 2855; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128) 2856; CHECK-NEXT: # %bb.0: 2857; CHECK-NEXT: local.get 0 2858; CHECK-NEXT: i32.const 8 2859; CHECK-NEXT: i32.add 2860; CHECK-NEXT: v64x2.load_splat 0 2861; CHECK-NEXT: # fallthrough-return 2862 %s = getelementptr double, double* %p, i32 1 2863 %e = load double, double* %s 2864 %v1 = insertelement <2 x double> undef, double %e, i32 0 2865 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2866 ret <2 x double> %v2 2867} 2868 2869define <2 x double> @load_v2f64_from_numeric_address() { 2870; CHECK-LABEL: load_v2f64_from_numeric_address: 2871; CHECK: .functype load_v2f64_from_numeric_address () -> (v128) 2872; CHECK-NEXT: # %bb.0: 2873; CHECK-NEXT: i32.const 0 2874; CHECK-NEXT: v128.load 32 2875; CHECK-NEXT: # fallthrough-return 2876 %s = inttoptr i32 32 to <2 x double>* 2877 %v = load <2 x double>, <2 x double>* %s 2878 ret <2 x double> %v 2879} 2880 2881define <2 x double> @load_splat_v2f64_from_numeric_address() { 2882; CHECK-LABEL: load_splat_v2f64_from_numeric_address: 2883; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) 2884; CHECK-NEXT: # %bb.0: 2885; CHECK-NEXT: i32.const 0 2886; CHECK-NEXT: v64x2.load_splat 32 2887; CHECK-NEXT: # fallthrough-return 2888 %s = inttoptr i32 32 to double* 2889 %e = load double, double* %s 2890 %v1 = insertelement <2 x double> undef, double %e, i32 0 2891 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2892 ret <2 x double> %v2 2893} 2894 2895@gv_v2f64 = global <2 x double> <double 42., double 42.> 2896define <2 x double> @load_v2f64_from_global_address() { 2897; CHECK-LABEL: load_v2f64_from_global_address: 2898; CHECK: .functype load_v2f64_from_global_address () -> (v128) 2899; CHECK-NEXT: # %bb.0: 2900; CHECK-NEXT: i32.const 0 2901; CHECK-NEXT: v128.load gv_v2f64 2902; CHECK-NEXT: # fallthrough-return 2903 %v = load <2 x double>, <2 x double>* @gv_v2f64 2904 ret <2 x double> %v 2905} 2906 2907@gv_f64 = global double 42. 2908define <2 x double> @load_splat_v2f64_from_global_address() { 2909; CHECK-LABEL: load_splat_v2f64_from_global_address: 2910; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) 2911; CHECK-NEXT: # %bb.0: 2912; CHECK-NEXT: i32.const 0 2913; CHECK-NEXT: v64x2.load_splat gv_f64 2914; CHECK-NEXT: # fallthrough-return 2915 %e = load double, double* @gv_f64 2916 %v1 = insertelement <2 x double> undef, double %e, i32 0 2917 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 2918 ret <2 x double> %v2 2919} 2920 2921define void @store_v2f64(<2 x double> %v, <2 x double>* %p) { 2922; CHECK-LABEL: store_v2f64: 2923; CHECK: .functype store_v2f64 (v128, i32) -> () 2924; CHECK-NEXT: # %bb.0: 2925; CHECK-NEXT: local.get 1 2926; CHECK-NEXT: local.get 0 2927; CHECK-NEXT: v128.store 0 2928; CHECK-NEXT: # fallthrough-return 2929 store <2 x double> %v , <2 x double>* %p 2930 ret void 2931} 2932 2933define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { 2934; CHECK-LABEL: store_v2f64_with_folded_offset: 2935; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> () 2936; CHECK-NEXT: # %bb.0: 2937; CHECK-NEXT: local.get 1 2938; CHECK-NEXT: local.get 0 2939; CHECK-NEXT: v128.store 16 2940; CHECK-NEXT: # fallthrough-return 2941 %q = ptrtoint <2 x double>* %p to i32 2942 %r = add nuw i32 %q, 16 2943 %s = inttoptr i32 %r to <2 x double>* 2944 store <2 x double> %v , <2 x double>* %s 2945 ret void 2946} 2947 2948define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) { 2949; CHECK-LABEL: store_v2f64_with_folded_gep_offset: 2950; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> () 2951; CHECK-NEXT: # %bb.0: 2952; CHECK-NEXT: local.get 1 2953; CHECK-NEXT: local.get 0 2954; CHECK-NEXT: v128.store 16 2955; CHECK-NEXT: # fallthrough-return 2956 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 2957 store <2 x double> %v , <2 x double>* %s 2958 ret void 2959} 2960 2961define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) { 2962; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: 2963; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> () 2964; CHECK-NEXT: # %bb.0: 2965; CHECK-NEXT: local.get 1 2966; CHECK-NEXT: i32.const -16 2967; CHECK-NEXT: i32.add 2968; CHECK-NEXT: local.get 0 2969; CHECK-NEXT: v128.store 0 2970; CHECK-NEXT: # fallthrough-return 2971 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 2972 store <2 x double> %v , <2 x double>* %s 2973 ret void 2974} 2975 2976define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) { 2977; CHECK-LABEL: store_v2f64_with_unfolded_offset: 2978; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> () 2979; CHECK-NEXT: # %bb.0: 2980; CHECK-NEXT: local.get 1 2981; CHECK-NEXT: i32.const -16 2982; CHECK-NEXT: i32.add 2983; CHECK-NEXT: local.get 0 2984; CHECK-NEXT: v128.store 0 2985; CHECK-NEXT: # fallthrough-return 2986 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 2987 store <2 x double> %v , <2 x double>* %s 2988 ret void 2989} 2990 2991define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) { 2992; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: 2993; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> () 2994; CHECK-NEXT: # %bb.0: 2995; CHECK-NEXT: local.get 1 2996; CHECK-NEXT: i32.const 16 2997; CHECK-NEXT: i32.add 2998; CHECK-NEXT: local.get 0 2999; CHECK-NEXT: v128.store 0 3000; CHECK-NEXT: # fallthrough-return 3001 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3002 store <2 x double> %v , <2 x double>* %s 3003 ret void 3004} 3005 3006define void @store_v2f64_to_numeric_address(<2 x double> %v) { 3007; CHECK-LABEL: store_v2f64_to_numeric_address: 3008; CHECK: .functype store_v2f64_to_numeric_address (v128) -> () 3009; CHECK-NEXT: # %bb.0: 3010; CHECK-NEXT: i32.const 0 3011; CHECK-NEXT: local.get 0 3012; CHECK-NEXT: v128.store 32 3013; CHECK-NEXT: # fallthrough-return 3014 %s = inttoptr i32 32 to <2 x double>* 3015 store <2 x double> %v , <2 x double>* %s 3016 ret void 3017} 3018 3019define void @store_v2f64_to_global_address(<2 x double> %v) { 3020; CHECK-LABEL: store_v2f64_to_global_address: 3021; CHECK: .functype store_v2f64_to_global_address (v128) -> () 3022; CHECK-NEXT: # %bb.0: 3023; CHECK-NEXT: i32.const 0 3024; CHECK-NEXT: local.get 0 3025; CHECK-NEXT: v128.store gv_v2f64 3026; CHECK-NEXT: # fallthrough-return 3027 store <2 x double> %v , <2 x double>* @gv_v2f64 3028 ret void 3029} 3030