1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test SIMD loads and stores 5 6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7target triple = "wasm32-unknown-unknown" 8 9; ============================================================================== 10; 16 x i8 11; ============================================================================== 12define <16 x i8> @load_v16i8(<16 x i8>* %p) { 13; CHECK-LABEL: load_v16i8: 14; CHECK: .functype load_v16i8 (i32) -> (v128) 15; CHECK-NEXT: # %bb.0: 16; CHECK-NEXT: local.get 0 17; CHECK-NEXT: v128.load 0 18; CHECK-NEXT: # fallthrough-return 19 %v = load <16 x i8>, <16 x i8>* %p 20 ret <16 x i8> %v 21} 22 23define <16 x i8> @load_splat_v16i8(i8* %p) { 24; CHECK-LABEL: load_splat_v16i8: 25; CHECK: .functype load_splat_v16i8 (i32) -> (v128) 26; CHECK-NEXT: # %bb.0: 27; CHECK-NEXT: local.get 0 28; CHECK-NEXT: v128.load8_splat 0 29; CHECK-NEXT: # fallthrough-return 30 %e = load i8, i8* %p 31 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 32 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 33 ret <16 x i8> %v2 34} 35 36define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { 37; CHECK-LABEL: load_v16i8_with_folded_offset: 38; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128) 39; CHECK-NEXT: # %bb.0: 40; CHECK-NEXT: local.get 0 41; CHECK-NEXT: v128.load 16 42; CHECK-NEXT: # fallthrough-return 43 %q = ptrtoint <16 x i8>* %p to i32 44 %r = add nuw i32 %q, 16 45 %s = inttoptr i32 %r to <16 x i8>* 46 %v = load <16 x i8>, <16 x i8>* %s 47 ret <16 x i8> %v 48} 49 50define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { 51; CHECK-LABEL: load_splat_v16i8_with_folded_offset: 52; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) 53; CHECK-NEXT: # %bb.0: 54; CHECK-NEXT: local.get 0 55; CHECK-NEXT: v128.load8_splat 16 56; CHECK-NEXT: # fallthrough-return 57 %q = ptrtoint i8* %p to i32 58 %r = add nuw i32 %q, 16 59 %s = inttoptr i32 %r to i8* 60 %e = load i8, i8* %s 61 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 62 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 63 ret <16 x i8> %v2 64} 65 66define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) { 67; CHECK-LABEL: load_v16i8_with_folded_gep_offset: 68; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128) 69; CHECK-NEXT: # %bb.0: 70; CHECK-NEXT: local.get 0 71; CHECK-NEXT: v128.load 16 72; CHECK-NEXT: # fallthrough-return 73 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 74 %v = load <16 x i8>, <16 x i8>* %s 75 ret <16 x i8> %v 76} 77 78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { 79; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: 80; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) 81; CHECK-NEXT: # %bb.0: 82; CHECK-NEXT: local.get 0 83; CHECK-NEXT: v128.load8_splat 1 84; CHECK-NEXT: # fallthrough-return 85 %s = getelementptr inbounds i8, i8* %p, i32 1 86 %e = load i8, i8* %s 87 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 88 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 89 ret <16 x i8> %v2 90} 91 92define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) { 93; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: 94; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 95; CHECK-NEXT: # %bb.0: 96; CHECK-NEXT: local.get 0 97; CHECK-NEXT: i32.const -16 98; CHECK-NEXT: i32.add 99; CHECK-NEXT: v128.load 0 100; CHECK-NEXT: # fallthrough-return 101 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 102 %v = load <16 x i8>, <16 x i8>* %s 103 ret <16 x i8> %v 104} 105 106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { 107; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: 108; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 109; CHECK-NEXT: # %bb.0: 110; CHECK-NEXT: local.get 0 111; CHECK-NEXT: i32.const -1 112; CHECK-NEXT: i32.add 113; CHECK-NEXT: v128.load8_splat 0 114; CHECK-NEXT: # fallthrough-return 115 %s = getelementptr inbounds i8, i8* %p, i32 -1 116 %e = load i8, i8* %s 117 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 118 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 119 ret <16 x i8> %v2 120} 121 122define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { 123; CHECK-LABEL: load_v16i8_with_unfolded_offset: 124; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128) 125; CHECK-NEXT: # %bb.0: 126; CHECK-NEXT: local.get 0 127; CHECK-NEXT: i32.const 16 128; CHECK-NEXT: i32.add 129; CHECK-NEXT: v128.load 0 130; CHECK-NEXT: # fallthrough-return 131 %q = ptrtoint <16 x i8>* %p to i32 132 %r = add nsw i32 %q, 16 133 %s = inttoptr i32 %r to <16 x i8>* 134 %v = load <16 x i8>, <16 x i8>* %s 135 ret <16 x i8> %v 136} 137 138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { 139; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: 140; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128) 141; CHECK-NEXT: # %bb.0: 142; CHECK-NEXT: local.get 0 143; CHECK-NEXT: i32.const 16 144; CHECK-NEXT: i32.add 145; CHECK-NEXT: v128.load8_splat 0 146; CHECK-NEXT: # fallthrough-return 147 %q = ptrtoint i8* %p to i32 148 %r = add nsw i32 %q, 16 149 %s = inttoptr i32 %r to i8* 150 %e = load i8, i8* %s 151 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 152 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 153 ret <16 x i8> %v2 154} 155 156define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) { 157; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: 158; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128) 159; CHECK-NEXT: # %bb.0: 160; CHECK-NEXT: local.get 0 161; CHECK-NEXT: i32.const 16 162; CHECK-NEXT: i32.add 163; CHECK-NEXT: v128.load 0 164; CHECK-NEXT: # fallthrough-return 165 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 166 %v = load <16 x i8>, <16 x i8>* %s 167 ret <16 x i8> %v 168} 169 170define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { 171; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: 172; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128) 173; CHECK-NEXT: # %bb.0: 174; CHECK-NEXT: local.get 0 175; CHECK-NEXT: i32.const 1 176; CHECK-NEXT: i32.add 177; CHECK-NEXT: v128.load8_splat 0 178; CHECK-NEXT: # fallthrough-return 179 %s = getelementptr i8, i8* %p, i32 1 180 %e = load i8, i8* %s 181 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 182 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 183 ret <16 x i8> %v2 184} 185 186define <16 x i8> @load_v16i8_from_numeric_address() { 187; CHECK-LABEL: load_v16i8_from_numeric_address: 188; CHECK: .functype load_v16i8_from_numeric_address () -> (v128) 189; CHECK-NEXT: # %bb.0: 190; CHECK-NEXT: i32.const 0 191; CHECK-NEXT: v128.load 32 192; CHECK-NEXT: # fallthrough-return 193 %s = inttoptr i32 32 to <16 x i8>* 194 %v = load <16 x i8>, <16 x i8>* %s 195 ret <16 x i8> %v 196} 197 198define <16 x i8> @load_splat_v16i8_from_numeric_address() { 199; CHECK-LABEL: load_splat_v16i8_from_numeric_address: 200; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) 201; CHECK-NEXT: # %bb.0: 202; CHECK-NEXT: i32.const 0 203; CHECK-NEXT: v128.load8_splat 32 204; CHECK-NEXT: # fallthrough-return 205 %s = inttoptr i32 32 to i8* 206 %e = load i8, i8* %s 207 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 208 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 209 ret <16 x i8> %v2 210} 211 212@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 213define <16 x i8> @load_v16i8_from_global_address() { 214; CHECK-LABEL: load_v16i8_from_global_address: 215; CHECK: .functype load_v16i8_from_global_address () -> (v128) 216; CHECK-NEXT: # %bb.0: 217; CHECK-NEXT: i32.const 0 218; CHECK-NEXT: v128.load gv_v16i8 219; CHECK-NEXT: # fallthrough-return 220 %v = load <16 x i8>, <16 x i8>* @gv_v16i8 221 ret <16 x i8> %v 222} 223 224@gv_i8 = global i8 42 225define <16 x i8> @load_splat_v16i8_from_global_address() { 226; CHECK-LABEL: load_splat_v16i8_from_global_address: 227; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) 228; CHECK-NEXT: # %bb.0: 229; CHECK-NEXT: i32.const 0 230; CHECK-NEXT: v128.load8_splat gv_i8 231; CHECK-NEXT: # fallthrough-return 232 %e = load i8, i8* @gv_i8 233 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 234 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 235 ret <16 x i8> %v2 236} 237 238define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) { 239; CHECK-LABEL: store_v16i8: 240; CHECK: .functype store_v16i8 (v128, i32) -> () 241; CHECK-NEXT: # %bb.0: 242; CHECK-NEXT: local.get 1 243; CHECK-NEXT: local.get 0 244; CHECK-NEXT: v128.store 0 245; CHECK-NEXT: # fallthrough-return 246 store <16 x i8> %v , <16 x i8>* %p 247 ret void 248} 249 250define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { 251; CHECK-LABEL: store_v16i8_with_folded_offset: 252; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> () 253; CHECK-NEXT: # %bb.0: 254; CHECK-NEXT: local.get 1 255; CHECK-NEXT: local.get 0 256; CHECK-NEXT: v128.store 16 257; CHECK-NEXT: # fallthrough-return 258 %q = ptrtoint <16 x i8>* %p to i32 259 %r = add nuw i32 %q, 16 260 %s = inttoptr i32 %r to <16 x i8>* 261 store <16 x i8> %v , <16 x i8>* %s 262 ret void 263} 264 265define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 266; CHECK-LABEL: store_v16i8_with_folded_gep_offset: 267; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> () 268; CHECK-NEXT: # %bb.0: 269; CHECK-NEXT: local.get 1 270; CHECK-NEXT: local.get 0 271; CHECK-NEXT: v128.store 16 272; CHECK-NEXT: # fallthrough-return 273 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 274 store <16 x i8> %v , <16 x i8>* %s 275 ret void 276} 277 278define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) { 279; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: 280; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> () 281; CHECK-NEXT: # %bb.0: 282; CHECK-NEXT: local.get 1 283; CHECK-NEXT: i32.const -16 284; CHECK-NEXT: i32.add 285; CHECK-NEXT: local.get 0 286; CHECK-NEXT: v128.store 0 287; CHECK-NEXT: # fallthrough-return 288 %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 289 store <16 x i8> %v , <16 x i8>* %s 290 ret void 291} 292 293define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) { 294; CHECK-LABEL: store_v16i8_with_unfolded_offset: 295; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> () 296; CHECK-NEXT: # %bb.0: 297; CHECK-NEXT: local.get 1 298; CHECK-NEXT: i32.const 16 299; CHECK-NEXT: i32.add 300; CHECK-NEXT: local.get 0 301; CHECK-NEXT: v128.store 0 302; CHECK-NEXT: # fallthrough-return 303 %q = ptrtoint <16 x i8>* %p to i32 304 %r = add nsw i32 %q, 16 305 %s = inttoptr i32 %r to <16 x i8>* 306 store <16 x i8> %v , <16 x i8>* %s 307 ret void 308} 309 310define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { 311; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: 312; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> () 313; CHECK-NEXT: # %bb.0: 314; CHECK-NEXT: local.get 1 315; CHECK-NEXT: i32.const 16 316; CHECK-NEXT: i32.add 317; CHECK-NEXT: local.get 0 318; CHECK-NEXT: v128.store 0 319; CHECK-NEXT: # fallthrough-return 320 %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 321 store <16 x i8> %v , <16 x i8>* %s 322 ret void 323} 324 325define void @store_v16i8_to_numeric_address(<16 x i8> %v) { 326; CHECK-LABEL: store_v16i8_to_numeric_address: 327; CHECK: .functype store_v16i8_to_numeric_address (v128) -> () 328; CHECK-NEXT: # %bb.0: 329; CHECK-NEXT: i32.const 0 330; CHECK-NEXT: local.get 0 331; CHECK-NEXT: v128.store 32 332; CHECK-NEXT: # fallthrough-return 333 %s = inttoptr i32 32 to <16 x i8>* 334 store <16 x i8> %v , <16 x i8>* %s 335 ret void 336} 337 338define void @store_v16i8_to_global_address(<16 x i8> %v) { 339; CHECK-LABEL: store_v16i8_to_global_address: 340; CHECK: .functype store_v16i8_to_global_address (v128) -> () 341; CHECK-NEXT: # %bb.0: 342; CHECK-NEXT: i32.const 0 343; CHECK-NEXT: local.get 0 344; CHECK-NEXT: v128.store gv_v16i8 345; CHECK-NEXT: # fallthrough-return 346 store <16 x i8> %v , <16 x i8>* @gv_v16i8 347 ret void 348} 349 350; ============================================================================== 351; 8 x i16 352; ============================================================================== 353define <8 x i16> @load_v8i16(<8 x i16>* %p) { 354; CHECK-LABEL: load_v8i16: 355; CHECK: .functype load_v8i16 (i32) -> (v128) 356; CHECK-NEXT: # %bb.0: 357; CHECK-NEXT: local.get 0 358; CHECK-NEXT: v128.load 0 359; CHECK-NEXT: # fallthrough-return 360 %v = load <8 x i16>, <8 x i16>* %p 361 ret <8 x i16> %v 362} 363 364define <8 x i16> @load_splat_v8i16(i16* %p) { 365; CHECK-LABEL: load_splat_v8i16: 366; CHECK: .functype load_splat_v8i16 (i32) -> (v128) 367; CHECK-NEXT: # %bb.0: 368; CHECK-NEXT: local.get 0 369; CHECK-NEXT: v128.load16_splat 0 370; CHECK-NEXT: # fallthrough-return 371 %e = load i16, i16* %p 372 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 373 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 374 ret <8 x i16> %v2 375} 376 377define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) { 378; CHECK-LABEL: load_sext_v8i16: 379; CHECK: .functype load_sext_v8i16 (i32) -> (v128) 380; CHECK-NEXT: # %bb.0: 381; CHECK-NEXT: local.get 0 382; CHECK-NEXT: i16x8.load8x8_s 0 383; CHECK-NEXT: # fallthrough-return 384 %v = load <8 x i8>, <8 x i8>* %p 385 %v2 = sext <8 x i8> %v to <8 x i16> 386 ret <8 x i16> %v2 387} 388 389define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) { 390; CHECK-LABEL: load_zext_v8i16: 391; CHECK: .functype load_zext_v8i16 (i32) -> (v128) 392; CHECK-NEXT: # %bb.0: 393; CHECK-NEXT: local.get 0 394; CHECK-NEXT: i16x8.load8x8_u 0 395; CHECK-NEXT: # fallthrough-return 396 %v = load <8 x i8>, <8 x i8>* %p 397 %v2 = zext <8 x i8> %v to <8 x i16> 398 ret <8 x i16> %v2 399} 400 401define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) { 402; CHECK-LABEL: load_ext_v8i16: 403; CHECK: .functype load_ext_v8i16 (i32) -> (v128) 404; CHECK-NEXT: # %bb.0: 405; CHECK-NEXT: local.get 0 406; CHECK-NEXT: i16x8.load8x8_u 0 407; CHECK-NEXT: # fallthrough-return 408 %v = load <8 x i8>, <8 x i8>* %p 409 ret <8 x i8> %v 410} 411 412define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { 413; CHECK-LABEL: load_v8i16_with_folded_offset: 414; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128) 415; CHECK-NEXT: # %bb.0: 416; CHECK-NEXT: local.get 0 417; CHECK-NEXT: v128.load 16 418; CHECK-NEXT: # fallthrough-return 419 %q = ptrtoint <8 x i16>* %p to i32 420 %r = add nuw i32 %q, 16 421 %s = inttoptr i32 %r to <8 x i16>* 422 %v = load <8 x i16>, <8 x i16>* %s 423 ret <8 x i16> %v 424} 425 426define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { 427; CHECK-LABEL: load_splat_v8i16_with_folded_offset: 428; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) 429; CHECK-NEXT: # %bb.0: 430; CHECK-NEXT: local.get 0 431; CHECK-NEXT: v128.load16_splat 16 432; CHECK-NEXT: # fallthrough-return 433 %q = ptrtoint i16* %p to i32 434 %r = add nuw i32 %q, 16 435 %s = inttoptr i32 %r to i16* 436 %e = load i16, i16* %s 437 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 438 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 439 ret <8 x i16> %v2 440} 441 442define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) { 443; CHECK-LABEL: load_sext_v8i16_with_folded_offset: 444; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128) 445; CHECK-NEXT: # %bb.0: 446; CHECK-NEXT: local.get 0 447; CHECK-NEXT: i16x8.load8x8_s 16 448; CHECK-NEXT: # fallthrough-return 449 %q = ptrtoint <8 x i8>* %p to i32 450 %r = add nuw i32 %q, 16 451 %s = inttoptr i32 %r to <8 x i8>* 452 %v = load <8 x i8>, <8 x i8>* %s 453 %v2 = sext <8 x i8> %v to <8 x i16> 454 ret <8 x i16> %v2 455} 456 457define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) { 458; CHECK-LABEL: load_zext_v8i16_with_folded_offset: 459; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128) 460; CHECK-NEXT: # %bb.0: 461; CHECK-NEXT: local.get 0 462; CHECK-NEXT: i16x8.load8x8_u 16 463; CHECK-NEXT: # fallthrough-return 464 %q = ptrtoint <8 x i8>* %p to i32 465 %r = add nuw i32 %q, 16 466 %s = inttoptr i32 %r to <8 x i8>* 467 %v = load <8 x i8>, <8 x i8>* %s 468 %v2 = zext <8 x i8> %v to <8 x i16> 469 ret <8 x i16> %v2 470} 471 472define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) { 473; CHECK-LABEL: load_ext_v8i16_with_folded_offset: 474; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128) 475; CHECK-NEXT: # %bb.0: 476; CHECK-NEXT: local.get 0 477; CHECK-NEXT: i16x8.load8x8_u 16 478; CHECK-NEXT: # fallthrough-return 479 %q = ptrtoint <8 x i8>* %p to i32 480 %r = add nuw i32 %q, 16 481 %s = inttoptr i32 %r to <8 x i8>* 482 %v = load <8 x i8>, <8 x i8>* %s 483 ret <8 x i8> %v 484} 485 486define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) { 487; CHECK-LABEL: load_v8i16_with_folded_gep_offset: 488; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128) 489; CHECK-NEXT: # %bb.0: 490; CHECK-NEXT: local.get 0 491; CHECK-NEXT: v128.load 16 492; CHECK-NEXT: # fallthrough-return 493 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 494 %v = load <8 x i16>, <8 x i16>* %s 495 ret <8 x i16> %v 496} 497 498define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { 499; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: 500; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) 501; CHECK-NEXT: # %bb.0: 502; CHECK-NEXT: local.get 0 503; CHECK-NEXT: v128.load16_splat 2 504; CHECK-NEXT: # fallthrough-return 505 %s = getelementptr inbounds i16, i16* %p, i32 1 506 %e = load i16, i16* %s 507 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 508 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 509 ret <8 x i16> %v2 510} 511 512define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 513; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: 514; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128) 515; CHECK-NEXT: # %bb.0: 516; CHECK-NEXT: local.get 0 517; CHECK-NEXT: i16x8.load8x8_s 8 518; CHECK-NEXT: # fallthrough-return 519 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 520 %v = load <8 x i8>, <8 x i8>* %s 521 %v2 = sext <8 x i8> %v to <8 x i16> 522 ret <8 x i16> %v2 523} 524 525define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 526; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: 527; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128) 528; CHECK-NEXT: # %bb.0: 529; CHECK-NEXT: local.get 0 530; CHECK-NEXT: i16x8.load8x8_u 8 531; CHECK-NEXT: # fallthrough-return 532 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 533 %v = load <8 x i8>, <8 x i8>* %s 534 %v2 = zext <8 x i8> %v to <8 x i16> 535 ret <8 x i16> %v2 536} 537 538define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { 539; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: 540; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128) 541; CHECK-NEXT: # %bb.0: 542; CHECK-NEXT: local.get 0 543; CHECK-NEXT: i16x8.load8x8_u 8 544; CHECK-NEXT: # fallthrough-return 545 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 546 %v = load <8 x i8>, <8 x i8>* %s 547 ret <8 x i8> %v 548} 549 550define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) { 551; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: 552; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 553; CHECK-NEXT: # %bb.0: 554; CHECK-NEXT: local.get 0 555; CHECK-NEXT: i32.const -16 556; CHECK-NEXT: i32.add 557; CHECK-NEXT: v128.load 0 558; CHECK-NEXT: # fallthrough-return 559 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 560 %v = load <8 x i16>, <8 x i16>* %s 561 ret <8 x i16> %v 562} 563 564define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { 565; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: 566; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 567; CHECK-NEXT: # %bb.0: 568; CHECK-NEXT: local.get 0 569; CHECK-NEXT: i32.const -2 570; CHECK-NEXT: i32.add 571; CHECK-NEXT: v128.load16_splat 0 572; CHECK-NEXT: # fallthrough-return 573 %s = getelementptr inbounds i16, i16* %p, i32 -1 574 %e = load i16, i16* %s 575 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 576 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 577 ret <8 x i16> %v2 578} 579 580define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 581; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: 582; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 583; CHECK-NEXT: # %bb.0: 584; CHECK-NEXT: local.get 0 585; CHECK-NEXT: i32.const -8 586; CHECK-NEXT: i32.add 587; CHECK-NEXT: i16x8.load8x8_s 0 588; CHECK-NEXT: # fallthrough-return 589 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 590 %v = load <8 x i8>, <8 x i8>* %s 591 %v2 = sext <8 x i8> %v to <8 x i16> 592 ret <8 x i16> %v2 593} 594 595define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 596; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: 597; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 598; CHECK-NEXT: # %bb.0: 599; CHECK-NEXT: local.get 0 600; CHECK-NEXT: i32.const -8 601; CHECK-NEXT: i32.add 602; CHECK-NEXT: i16x8.load8x8_u 0 603; CHECK-NEXT: # fallthrough-return 604 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 605 %v = load <8 x i8>, <8 x i8>* %s 606 %v2 = zext <8 x i8> %v to <8 x i16> 607 ret <8 x i16> %v2 608} 609 610define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { 611; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: 612; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 613; CHECK-NEXT: # %bb.0: 614; CHECK-NEXT: local.get 0 615; CHECK-NEXT: i32.const -8 616; CHECK-NEXT: i32.add 617; CHECK-NEXT: i16x8.load8x8_u 0 618; CHECK-NEXT: # fallthrough-return 619 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 620 %v = load <8 x i8>, <8 x i8>* %s 621 ret <8 x i8> %v 622} 623 624define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { 625; CHECK-LABEL: load_v8i16_with_unfolded_offset: 626; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128) 627; CHECK-NEXT: # %bb.0: 628; CHECK-NEXT: local.get 0 629; CHECK-NEXT: i32.const 16 630; CHECK-NEXT: i32.add 631; CHECK-NEXT: v128.load 0 632; CHECK-NEXT: # fallthrough-return 633 %q = ptrtoint <8 x i16>* %p to i32 634 %r = add nsw i32 %q, 16 635 %s = inttoptr i32 %r to <8 x i16>* 636 %v = load <8 x i16>, <8 x i16>* %s 637 ret <8 x i16> %v 638} 639 640define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { 641; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: 642; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128) 643; CHECK-NEXT: # %bb.0: 644; CHECK-NEXT: local.get 0 645; CHECK-NEXT: i32.const 16 646; CHECK-NEXT: i32.add 647; CHECK-NEXT: v128.load16_splat 0 648; CHECK-NEXT: # fallthrough-return 649 %q = ptrtoint i16* %p to i32 650 %r = add nsw i32 %q, 16 651 %s = inttoptr i32 %r to i16* 652 %e = load i16, i16* %s 653 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 654 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 655 ret <8 x i16> %v2 656} 657 658define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 659; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: 660; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128) 661; CHECK-NEXT: # %bb.0: 662; CHECK-NEXT: local.get 0 663; CHECK-NEXT: i32.const 16 664; CHECK-NEXT: i32.add 665; CHECK-NEXT: i16x8.load8x8_s 0 666; CHECK-NEXT: # fallthrough-return 667 %q = ptrtoint <8 x i8>* %p to i32 668 %r = add nsw i32 %q, 16 669 %s = inttoptr i32 %r to <8 x i8>* 670 %v = load <8 x i8>, <8 x i8>* %s 671 %v2 = sext <8 x i8> %v to <8 x i16> 672 ret <8 x i16> %v2 673} 674 675define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 676; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: 677; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128) 678; CHECK-NEXT: # %bb.0: 679; CHECK-NEXT: local.get 0 680; CHECK-NEXT: i32.const 16 681; CHECK-NEXT: i32.add 682; CHECK-NEXT: i16x8.load8x8_u 0 683; CHECK-NEXT: # fallthrough-return 684 %q = ptrtoint <8 x i8>* %p to i32 685 %r = add nsw i32 %q, 16 686 %s = inttoptr i32 %r to <8 x i8>* 687 %v = load <8 x i8>, <8 x i8>* %s 688 %v2 = zext <8 x i8> %v to <8 x i16> 689 ret <8 x i16> %v2 690} 691 692define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) { 693; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: 694; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128) 695; CHECK-NEXT: # %bb.0: 696; CHECK-NEXT: local.get 0 697; CHECK-NEXT: i32.const 16 698; CHECK-NEXT: i32.add 699; CHECK-NEXT: i16x8.load8x8_u 0 700; CHECK-NEXT: # fallthrough-return 701 %q = ptrtoint <8 x i8>* %p to i32 702 %r = add nsw i32 %q, 16 703 %s = inttoptr i32 %r to <8 x i8>* 704 %v = load <8 x i8>, <8 x i8>* %s 705 ret <8 x i8> %v 706} 707 708define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) { 709; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: 710; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128) 711; CHECK-NEXT: # %bb.0: 712; CHECK-NEXT: local.get 0 713; CHECK-NEXT: i32.const 16 714; CHECK-NEXT: i32.add 715; CHECK-NEXT: v128.load 0 716; CHECK-NEXT: # fallthrough-return 717 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 718 %v = load <8 x i16>, <8 x i16>* %s 719 ret <8 x i16> %v 720} 721 722define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { 723; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: 724; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128) 725; CHECK-NEXT: # %bb.0: 726; CHECK-NEXT: local.get 0 727; CHECK-NEXT: i32.const 2 728; CHECK-NEXT: i32.add 729; CHECK-NEXT: v128.load16_splat 0 730; CHECK-NEXT: # fallthrough-return 731 %s = getelementptr i16, i16* %p, i32 1 732 %e = load i16, i16* %s 733 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 734 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 735 ret <8 x i16> %v2 736} 737 738define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 739; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: 740; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 741; CHECK-NEXT: # %bb.0: 742; CHECK-NEXT: local.get 0 743; CHECK-NEXT: i32.const 8 744; CHECK-NEXT: i32.add 745; CHECK-NEXT: i16x8.load8x8_s 0 746; CHECK-NEXT: # fallthrough-return 747 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 748 %v = load <8 x i8>, <8 x i8>* %s 749 %v2 = sext <8 x i8> %v to <8 x i16> 750 ret <8 x i16> %v2 751} 752 753define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 754; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: 755; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 756; CHECK-NEXT: # %bb.0: 757; CHECK-NEXT: local.get 0 758; CHECK-NEXT: i32.const 8 759; CHECK-NEXT: i32.add 760; CHECK-NEXT: i16x8.load8x8_u 0 761; CHECK-NEXT: # fallthrough-return 762 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 763 %v = load <8 x i8>, <8 x i8>* %s 764 %v2 = zext <8 x i8> %v to <8 x i16> 765 ret <8 x i16> %v2 766} 767 768define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { 769; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: 770; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 771; CHECK-NEXT: # %bb.0: 772; CHECK-NEXT: local.get 0 773; CHECK-NEXT: i32.const 8 774; CHECK-NEXT: i32.add 775; CHECK-NEXT: i16x8.load8x8_u 0 776; CHECK-NEXT: # fallthrough-return 777 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 778 %v = load <8 x i8>, <8 x i8>* %s 779 ret <8 x i8> %v 780} 781 782define <8 x i16> @load_v8i16_from_numeric_address() { 783; CHECK-LABEL: load_v8i16_from_numeric_address: 784; CHECK: .functype load_v8i16_from_numeric_address () -> (v128) 785; CHECK-NEXT: # %bb.0: 786; CHECK-NEXT: i32.const 0 787; CHECK-NEXT: v128.load 32 788; CHECK-NEXT: # fallthrough-return 789 %s = inttoptr i32 32 to <8 x i16>* 790 %v = load <8 x i16>, <8 x i16>* %s 791 ret <8 x i16> %v 792} 793 794define <8 x i16> @load_splat_v8i16_from_numeric_address() { 795; CHECK-LABEL: load_splat_v8i16_from_numeric_address: 796; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) 797; CHECK-NEXT: # %bb.0: 798; CHECK-NEXT: i32.const 0 799; CHECK-NEXT: v128.load16_splat 32 800; CHECK-NEXT: # fallthrough-return 801 %s = inttoptr i32 32 to i16* 802 %e = load i16, i16* %s 803 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 804 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 805 ret <8 x i16> %v2 806} 807 808define <8 x i16> @load_sext_v8i16_from_numeric_address() { 809; CHECK-LABEL: load_sext_v8i16_from_numeric_address: 810; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128) 811; CHECK-NEXT: # %bb.0: 812; CHECK-NEXT: i32.const 0 813; CHECK-NEXT: i16x8.load8x8_s 32 814; CHECK-NEXT: # fallthrough-return 815 %s = inttoptr i32 32 to <8 x i8>* 816 %v = load <8 x i8>, <8 x i8>* %s 817 %v2 = sext <8 x i8> %v to <8 x i16> 818 ret <8 x i16> %v2 819} 820 821define <8 x i16> @load_zext_v8i16_from_numeric_address() { 822; CHECK-LABEL: load_zext_v8i16_from_numeric_address: 823; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128) 824; CHECK-NEXT: # %bb.0: 825; CHECK-NEXT: i32.const 0 826; CHECK-NEXT: i16x8.load8x8_u 32 827; CHECK-NEXT: # fallthrough-return 828 %s = inttoptr i32 32 to <8 x i8>* 829 %v = load <8 x i8>, <8 x i8>* %s 830 %v2 = zext <8 x i8> %v to <8 x i16> 831 ret <8 x i16> %v2 832} 833 834define <8 x i8> @load_ext_v8i16_from_numeric_address() { 835; CHECK-LABEL: load_ext_v8i16_from_numeric_address: 836; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128) 837; CHECK-NEXT: # %bb.0: 838; CHECK-NEXT: i32.const 0 839; CHECK-NEXT: i16x8.load8x8_u 32 840; CHECK-NEXT: # fallthrough-return 841 %s = inttoptr i32 32 to <8 x i8>* 842 %v = load <8 x i8>, <8 x i8>* %s 843 ret <8 x i8> %v 844} 845 846@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 847define <8 x i16> @load_v8i16_from_global_address() { 848; CHECK-LABEL: load_v8i16_from_global_address: 849; CHECK: .functype load_v8i16_from_global_address () -> (v128) 850; CHECK-NEXT: # %bb.0: 851; CHECK-NEXT: i32.const 0 852; CHECK-NEXT: v128.load gv_v8i16 853; CHECK-NEXT: # fallthrough-return 854 %v = load <8 x i16>, <8 x i16>* @gv_v8i16 855 ret <8 x i16> %v 856} 857 858@gv_i16 = global i16 42 859define <8 x i16> @load_splat_v8i16_from_global_address() { 860; CHECK-LABEL: load_splat_v8i16_from_global_address: 861; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) 862; CHECK-NEXT: # %bb.0: 863; CHECK-NEXT: i32.const 0 864; CHECK-NEXT: v128.load16_splat gv_i16 865; CHECK-NEXT: # fallthrough-return 866 %e = load i16, i16* @gv_i16 867 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 868 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 869 ret <8 x i16> %v2 870} 871 872@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 873define <8 x i16> @load_sext_v8i16_from_global_address() { 874; CHECK-LABEL: load_sext_v8i16_from_global_address: 875; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128) 876; CHECK-NEXT: # %bb.0: 877; CHECK-NEXT: i32.const 0 878; CHECK-NEXT: i16x8.load8x8_s gv_v8i8 879; CHECK-NEXT: # fallthrough-return 880 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 881 %v2 = sext <8 x i8> %v to <8 x i16> 882 ret <8 x i16> %v2 883} 884 885define <8 x i16> @load_zext_v8i16_from_global_address() { 886; CHECK-LABEL: load_zext_v8i16_from_global_address: 887; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128) 888; CHECK-NEXT: # %bb.0: 889; CHECK-NEXT: i32.const 0 890; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 891; CHECK-NEXT: # fallthrough-return 892 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 893 %v2 = zext <8 x i8> %v to <8 x i16> 894 ret <8 x i16> %v2 895} 896 897define <8 x i8> @load_ext_v8i16_from_global_address() { 898; CHECK-LABEL: load_ext_v8i16_from_global_address: 899; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128) 900; CHECK-NEXT: # %bb.0: 901; CHECK-NEXT: i32.const 0 902; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 903; CHECK-NEXT: # fallthrough-return 904 %v = load <8 x i8>, <8 x i8>* @gv_v8i8 905 ret <8 x i8> %v 906} 907 908 909define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) { 910; CHECK-LABEL: store_v8i16: 911; CHECK: .functype store_v8i16 (v128, i32) -> () 912; CHECK-NEXT: # %bb.0: 913; CHECK-NEXT: local.get 1 914; CHECK-NEXT: local.get 0 915; CHECK-NEXT: v128.store 0 916; CHECK-NEXT: # fallthrough-return 917 store <8 x i16> %v , <8 x i16>* %p 918 ret void 919} 920 921define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) { 922; CHECK-LABEL: store_narrowing_v8i16: 923; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> () 924; CHECK-NEXT: # %bb.0: 925; CHECK-NEXT: local.get 1 926; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 927; CHECK-NEXT: local.get 0 928; CHECK-NEXT: v128.and 929; CHECK-NEXT: local.get 0 930; CHECK-NEXT: i8x16.narrow_i16x8_u 931; CHECK-NEXT: i64x2.extract_lane 0 932; CHECK-NEXT: i64.store 0 933; CHECK-NEXT: # fallthrough-return 934 store <8 x i8> %v, <8 x i8>* %p 935 ret void 936} 937 938define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { 939; CHECK-LABEL: store_v8i16_with_folded_offset: 940; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () 941; CHECK-NEXT: # %bb.0: 942; CHECK-NEXT: local.get 1 943; CHECK-NEXT: local.get 0 944; CHECK-NEXT: v128.store 16 945; CHECK-NEXT: # fallthrough-return 946 %q = ptrtoint <8 x i16>* %p to i32 947 %r = add nuw i32 %q, 16 948 %s = inttoptr i32 %r to <8 x i16>* 949 store <8 x i16> %v , <8 x i16>* %s 950 ret void 951} 952 953define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) { 954; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset: 955; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> () 956; CHECK-NEXT: # %bb.0: 957; CHECK-NEXT: local.get 1 958; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 959; CHECK-NEXT: local.get 0 960; CHECK-NEXT: v128.and 961; CHECK-NEXT: local.get 0 962; CHECK-NEXT: i8x16.narrow_i16x8_u 963; CHECK-NEXT: i64x2.extract_lane 0 964; CHECK-NEXT: i64.store 16 965; CHECK-NEXT: # fallthrough-return 966 %q = ptrtoint <8 x i8>* %p to i32 967 %r = add nuw i32 %q, 16 968 %s = inttoptr i32 %r to <8 x i8>* 969 store <8 x i8> %v , <8 x i8>* %s 970 ret void 971} 972 973define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 974; CHECK-LABEL: store_v8i16_with_folded_gep_offset: 975; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () 976; CHECK-NEXT: # %bb.0: 977; CHECK-NEXT: local.get 1 978; CHECK-NEXT: local.get 0 979; CHECK-NEXT: v128.store 16 980; CHECK-NEXT: # fallthrough-return 981 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 982 store <8 x i16> %v , <8 x i16>* %s 983 ret void 984} 985 986define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { 987; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset: 988; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> () 989; CHECK-NEXT: # %bb.0: 990; CHECK-NEXT: local.get 1 991; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 992; CHECK-NEXT: local.get 0 993; CHECK-NEXT: v128.and 994; CHECK-NEXT: local.get 0 995; CHECK-NEXT: i8x16.narrow_i16x8_u 996; CHECK-NEXT: i64x2.extract_lane 0 997; CHECK-NEXT: i64.store 8 998; CHECK-NEXT: # fallthrough-return 999 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 1000 store <8 x i8> %v , <8 x i8>* %s 1001 ret void 1002} 1003 1004define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { 1005; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: 1006; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 1007; CHECK-NEXT: # %bb.0: 1008; CHECK-NEXT: local.get 1 1009; CHECK-NEXT: i32.const -16 1010; CHECK-NEXT: i32.add 1011; CHECK-NEXT: local.get 0 1012; CHECK-NEXT: v128.store 0 1013; CHECK-NEXT: # fallthrough-return 1014 %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 1015 store <8 x i16> %v , <8 x i16>* %s 1016 ret void 1017} 1018 1019define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) { 1020; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset: 1021; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 1022; CHECK-NEXT: # %bb.0: 1023; CHECK-NEXT: local.get 1 1024; CHECK-NEXT: i32.const -8 1025; CHECK-NEXT: i32.add 1026; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1027; CHECK-NEXT: local.get 0 1028; CHECK-NEXT: v128.and 1029; CHECK-NEXT: local.get 0 1030; CHECK-NEXT: i8x16.narrow_i16x8_u 1031; CHECK-NEXT: i64x2.extract_lane 0 1032; CHECK-NEXT: i64.store 0 1033; CHECK-NEXT: # fallthrough-return 1034 %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 1035 store <8 x i8> %v , <8 x i8>* %s 1036 ret void 1037} 1038 1039define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { 1040; CHECK-LABEL: store_v8i16_with_unfolded_offset: 1041; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () 1042; CHECK-NEXT: # %bb.0: 1043; CHECK-NEXT: local.get 1 1044; CHECK-NEXT: i32.const 16 1045; CHECK-NEXT: i32.add 1046; CHECK-NEXT: local.get 0 1047; CHECK-NEXT: v128.store 0 1048; CHECK-NEXT: # fallthrough-return 1049 %q = ptrtoint <8 x i16>* %p to i32 1050 %r = add nsw i32 %q, 16 1051 %s = inttoptr i32 %r to <8 x i16>* 1052 store <8 x i16> %v , <8 x i16>* %s 1053 ret void 1054} 1055 1056define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) { 1057; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset: 1058; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> () 1059; CHECK-NEXT: # %bb.0: 1060; CHECK-NEXT: local.get 1 1061; CHECK-NEXT: i32.const 16 1062; CHECK-NEXT: i32.add 1063; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1064; CHECK-NEXT: local.get 0 1065; CHECK-NEXT: v128.and 1066; CHECK-NEXT: local.get 0 1067; CHECK-NEXT: i8x16.narrow_i16x8_u 1068; CHECK-NEXT: i64x2.extract_lane 0 1069; CHECK-NEXT: i64.store 0 1070; CHECK-NEXT: # fallthrough-return 1071 %q = ptrtoint <8 x i8>* %p to i32 1072 %r = add nsw i32 %q, 16 1073 %s = inttoptr i32 %r to <8 x i8>* 1074 store <8 x i8> %v , <8 x i8>* %s 1075 ret void 1076} 1077 1078define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { 1079; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: 1080; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1081; CHECK-NEXT: # %bb.0: 1082; CHECK-NEXT: local.get 1 1083; CHECK-NEXT: i32.const 16 1084; CHECK-NEXT: i32.add 1085; CHECK-NEXT: local.get 0 1086; CHECK-NEXT: v128.store 0 1087; CHECK-NEXT: # fallthrough-return 1088 %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 1089 store <8 x i16> %v , <8 x i16>* %s 1090 ret void 1091} 1092 1093define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) { 1094; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset: 1095; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1096; CHECK-NEXT: # %bb.0: 1097; CHECK-NEXT: local.get 1 1098; CHECK-NEXT: i32.const 8 1099; CHECK-NEXT: i32.add 1100; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1101; CHECK-NEXT: local.get 0 1102; CHECK-NEXT: v128.and 1103; CHECK-NEXT: local.get 0 1104; CHECK-NEXT: i8x16.narrow_i16x8_u 1105; CHECK-NEXT: i64x2.extract_lane 0 1106; CHECK-NEXT: i64.store 0 1107; CHECK-NEXT: # fallthrough-return 1108 %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 1109 store <8 x i8> %v , <8 x i8>* %s 1110 ret void 1111} 1112 1113define void @store_v8i16_to_numeric_address(<8 x i16> %v) { 1114; CHECK-LABEL: store_v8i16_to_numeric_address: 1115; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () 1116; CHECK-NEXT: # %bb.0: 1117; CHECK-NEXT: i32.const 0 1118; CHECK-NEXT: local.get 0 1119; CHECK-NEXT: v128.store 32 1120; CHECK-NEXT: # fallthrough-return 1121 %s = inttoptr i32 32 to <8 x i16>* 1122 store <8 x i16> %v , <8 x i16>* %s 1123 ret void 1124} 1125 1126define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) { 1127; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address: 1128; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> () 1129; CHECK-NEXT: # %bb.0: 1130; CHECK-NEXT: i32.const 0 1131; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1132; CHECK-NEXT: local.get 0 1133; CHECK-NEXT: v128.and 1134; CHECK-NEXT: local.get 0 1135; CHECK-NEXT: i8x16.narrow_i16x8_u 1136; CHECK-NEXT: i64x2.extract_lane 0 1137; CHECK-NEXT: i64.store 32 1138; CHECK-NEXT: # fallthrough-return 1139 %s = inttoptr i32 32 to <8 x i8>* 1140 store <8 x i8> %v , <8 x i8>* %s 1141 ret void 1142} 1143 1144define void @store_v8i16_to_global_address(<8 x i16> %v) { 1145; CHECK-LABEL: store_v8i16_to_global_address: 1146; CHECK: .functype store_v8i16_to_global_address (v128) -> () 1147; CHECK-NEXT: # %bb.0: 1148; CHECK-NEXT: i32.const 0 1149; CHECK-NEXT: local.get 0 1150; CHECK-NEXT: v128.store gv_v8i16 1151; CHECK-NEXT: # fallthrough-return 1152 store <8 x i16> %v , <8 x i16>* @gv_v8i16 1153 ret void 1154} 1155 1156define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) { 1157; CHECK-LABEL: store_narrowing_v8i16_to_global_address: 1158; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> () 1159; CHECK-NEXT: # %bb.0: 1160; CHECK-NEXT: i32.const 0 1161; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 1162; CHECK-NEXT: local.get 0 1163; CHECK-NEXT: v128.and 1164; CHECK-NEXT: local.get 0 1165; CHECK-NEXT: i8x16.narrow_i16x8_u 1166; CHECK-NEXT: i64x2.extract_lane 0 1167; CHECK-NEXT: i64.store gv_v8i8 1168; CHECK-NEXT: # fallthrough-return 1169 store <8 x i8> %v , <8 x i8>* @gv_v8i8 1170 ret void 1171} 1172 1173; ============================================================================== 1174; 4 x i32 1175; ============================================================================== 1176define <4 x i32> @load_v4i32(<4 x i32>* %p) { 1177; CHECK-LABEL: load_v4i32: 1178; CHECK: .functype load_v4i32 (i32) -> (v128) 1179; CHECK-NEXT: # %bb.0: 1180; CHECK-NEXT: local.get 0 1181; CHECK-NEXT: v128.load 0 1182; CHECK-NEXT: # fallthrough-return 1183 %v = load <4 x i32>, <4 x i32>* %p 1184 ret <4 x i32> %v 1185} 1186 1187define <4 x i32> @load_splat_v4i32(i32* %addr) { 1188; CHECK-LABEL: load_splat_v4i32: 1189; CHECK: .functype load_splat_v4i32 (i32) -> (v128) 1190; CHECK-NEXT: # %bb.0: 1191; CHECK-NEXT: local.get 0 1192; CHECK-NEXT: v128.load32_splat 0 1193; CHECK-NEXT: # fallthrough-return 1194 %e = load i32, i32* %addr, align 4 1195 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1196 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1197 ret <4 x i32> %v2 1198} 1199 1200define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) { 1201; CHECK-LABEL: load_sext_v4i32: 1202; CHECK: .functype load_sext_v4i32 (i32) -> (v128) 1203; CHECK-NEXT: # %bb.0: 1204; CHECK-NEXT: local.get 0 1205; CHECK-NEXT: i32x4.load16x4_s 0 1206; CHECK-NEXT: # fallthrough-return 1207 %v = load <4 x i16>, <4 x i16>* %p 1208 %v2 = sext <4 x i16> %v to <4 x i32> 1209 ret <4 x i32> %v2 1210} 1211 1212define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) { 1213; CHECK-LABEL: load_zext_v4i32: 1214; CHECK: .functype load_zext_v4i32 (i32) -> (v128) 1215; CHECK-NEXT: # %bb.0: 1216; CHECK-NEXT: local.get 0 1217; CHECK-NEXT: i32x4.load16x4_u 0 1218; CHECK-NEXT: # fallthrough-return 1219 %v = load <4 x i16>, <4 x i16>* %p 1220 %v2 = zext <4 x i16> %v to <4 x i32> 1221 ret <4 x i32> %v2 1222} 1223 1224define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) { 1225; CHECK-LABEL: load_ext_v4i32: 1226; CHECK: .functype load_ext_v4i32 (i32) -> (v128) 1227; CHECK-NEXT: # %bb.0: 1228; CHECK-NEXT: local.get 0 1229; CHECK-NEXT: i32x4.load16x4_u 0 1230; CHECK-NEXT: # fallthrough-return 1231 %v = load <4 x i16>, <4 x i16>* %p 1232 ret <4 x i16> %v 1233} 1234 1235define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { 1236; CHECK-LABEL: load_v4i32_with_folded_offset: 1237; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128) 1238; CHECK-NEXT: # %bb.0: 1239; CHECK-NEXT: local.get 0 1240; CHECK-NEXT: v128.load 16 1241; CHECK-NEXT: # fallthrough-return 1242 %q = ptrtoint <4 x i32>* %p to i32 1243 %r = add nuw i32 %q, 16 1244 %s = inttoptr i32 %r to <4 x i32>* 1245 %v = load <4 x i32>, <4 x i32>* %s 1246 ret <4 x i32> %v 1247} 1248 1249define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { 1250; CHECK-LABEL: load_splat_v4i32_with_folded_offset: 1251; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) 1252; CHECK-NEXT: # %bb.0: 1253; CHECK-NEXT: local.get 0 1254; CHECK-NEXT: v128.load32_splat 16 1255; CHECK-NEXT: # fallthrough-return 1256 %q = ptrtoint i32* %p to i32 1257 %r = add nuw i32 %q, 16 1258 %s = inttoptr i32 %r to i32* 1259 %e = load i32, i32* %s 1260 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1261 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1262 ret <4 x i32> %v2 1263} 1264 1265define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) { 1266; CHECK-LABEL: load_sext_v4i32_with_folded_offset: 1267; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128) 1268; CHECK-NEXT: # %bb.0: 1269; CHECK-NEXT: local.get 0 1270; CHECK-NEXT: i32x4.load16x4_s 16 1271; CHECK-NEXT: # fallthrough-return 1272 %q = ptrtoint <4 x i16>* %p to i32 1273 %r = add nuw i32 %q, 16 1274 %s = inttoptr i32 %r to <4 x i16>* 1275 %v = load <4 x i16>, <4 x i16>* %s 1276 %v2 = sext <4 x i16> %v to <4 x i32> 1277 ret <4 x i32> %v2 1278} 1279 1280define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) { 1281; CHECK-LABEL: load_zext_v4i32_with_folded_offset: 1282; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128) 1283; CHECK-NEXT: # %bb.0: 1284; CHECK-NEXT: local.get 0 1285; CHECK-NEXT: i32x4.load16x4_u 16 1286; CHECK-NEXT: # fallthrough-return 1287 %q = ptrtoint <4 x i16>* %p to i32 1288 %r = add nuw i32 %q, 16 1289 %s = inttoptr i32 %r to <4 x i16>* 1290 %v = load <4 x i16>, <4 x i16>* %s 1291 %v2 = zext <4 x i16> %v to <4 x i32> 1292 ret <4 x i32> %v2 1293} 1294 1295define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) { 1296; CHECK-LABEL: load_ext_v4i32_with_folded_offset: 1297; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128) 1298; CHECK-NEXT: # %bb.0: 1299; CHECK-NEXT: local.get 0 1300; CHECK-NEXT: i32x4.load16x4_u 16 1301; CHECK-NEXT: # fallthrough-return 1302 %q = ptrtoint <4 x i16>* %p to i32 1303 %r = add nuw i32 %q, 16 1304 %s = inttoptr i32 %r to <4 x i16>* 1305 %v = load <4 x i16>, <4 x i16>* %s 1306 ret <4 x i16> %v 1307} 1308 1309define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) { 1310; CHECK-LABEL: load_v4i32_with_folded_gep_offset: 1311; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128) 1312; CHECK-NEXT: # %bb.0: 1313; CHECK-NEXT: local.get 0 1314; CHECK-NEXT: v128.load 16 1315; CHECK-NEXT: # fallthrough-return 1316 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1317 %v = load <4 x i32>, <4 x i32>* %s 1318 ret <4 x i32> %v 1319} 1320 1321define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { 1322; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: 1323; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) 1324; CHECK-NEXT: # %bb.0: 1325; CHECK-NEXT: local.get 0 1326; CHECK-NEXT: v128.load32_splat 4 1327; CHECK-NEXT: # fallthrough-return 1328 %s = getelementptr inbounds i32, i32* %p, i32 1 1329 %e = load i32, i32* %s 1330 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1331 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1332 ret <4 x i32> %v2 1333} 1334 1335define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1336; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset: 1337; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128) 1338; CHECK-NEXT: # %bb.0: 1339; CHECK-NEXT: local.get 0 1340; CHECK-NEXT: i32x4.load16x4_s 8 1341; CHECK-NEXT: # fallthrough-return 1342 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1343 %v = load <4 x i16>, <4 x i16>* %s 1344 %v2 = sext <4 x i16> %v to <4 x i32> 1345 ret <4 x i32> %v2 1346} 1347 1348define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1349; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset: 1350; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128) 1351; CHECK-NEXT: # %bb.0: 1352; CHECK-NEXT: local.get 0 1353; CHECK-NEXT: i32x4.load16x4_u 8 1354; CHECK-NEXT: # fallthrough-return 1355 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1356 %v = load <4 x i16>, <4 x i16>* %s 1357 %v2 = zext <4 x i16> %v to <4 x i32> 1358 ret <4 x i32> %v2 1359} 1360 1361define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { 1362; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: 1363; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128) 1364; CHECK-NEXT: # %bb.0: 1365; CHECK-NEXT: local.get 0 1366; CHECK-NEXT: i32x4.load16x4_u 8 1367; CHECK-NEXT: # fallthrough-return 1368 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1369 %v = load <4 x i16>, <4 x i16>* %s 1370 ret <4 x i16> %v 1371} 1372 1373define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) { 1374; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: 1375; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1376; CHECK-NEXT: # %bb.0: 1377; CHECK-NEXT: local.get 0 1378; CHECK-NEXT: i32.const -16 1379; CHECK-NEXT: i32.add 1380; CHECK-NEXT: v128.load 0 1381; CHECK-NEXT: # fallthrough-return 1382 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1383 %v = load <4 x i32>, <4 x i32>* %s 1384 ret <4 x i32> %v 1385} 1386 1387define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { 1388; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: 1389; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1390; CHECK-NEXT: # %bb.0: 1391; CHECK-NEXT: local.get 0 1392; CHECK-NEXT: i32.const -4 1393; CHECK-NEXT: i32.add 1394; CHECK-NEXT: v128.load32_splat 0 1395; CHECK-NEXT: # fallthrough-return 1396 %s = getelementptr inbounds i32, i32* %p, i32 -1 1397 %e = load i32, i32* %s 1398 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1399 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1400 ret <4 x i32> %v2 1401} 1402 1403define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1404; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset: 1405; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1406; CHECK-NEXT: # %bb.0: 1407; CHECK-NEXT: local.get 0 1408; CHECK-NEXT: i32.const -8 1409; CHECK-NEXT: i32.add 1410; CHECK-NEXT: i32x4.load16x4_s 0 1411; CHECK-NEXT: # fallthrough-return 1412 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1413 %v = load <4 x i16>, <4 x i16>* %s 1414 %v2 = sext <4 x i16> %v to <4 x i32> 1415 ret <4 x i32> %v2 1416} 1417 1418define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1419; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset: 1420; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1421; CHECK-NEXT: # %bb.0: 1422; CHECK-NEXT: local.get 0 1423; CHECK-NEXT: i32.const -8 1424; CHECK-NEXT: i32.add 1425; CHECK-NEXT: i32x4.load16x4_u 0 1426; CHECK-NEXT: # fallthrough-return 1427 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1428 %v = load <4 x i16>, <4 x i16>* %s 1429 %v2 = zext <4 x i16> %v to <4 x i32> 1430 ret <4 x i32> %v2 1431} 1432 1433define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { 1434; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: 1435; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1436; CHECK-NEXT: # %bb.0: 1437; CHECK-NEXT: local.get 0 1438; CHECK-NEXT: i32.const -8 1439; CHECK-NEXT: i32.add 1440; CHECK-NEXT: i32x4.load16x4_u 0 1441; CHECK-NEXT: # fallthrough-return 1442 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1443 %v = load <4 x i16>, <4 x i16>* %s 1444 ret <4 x i16> %v 1445} 1446 1447define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { 1448; CHECK-LABEL: load_v4i32_with_unfolded_offset: 1449; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128) 1450; CHECK-NEXT: # %bb.0: 1451; CHECK-NEXT: local.get 0 1452; CHECK-NEXT: i32.const 16 1453; CHECK-NEXT: i32.add 1454; CHECK-NEXT: v128.load 0 1455; CHECK-NEXT: # fallthrough-return 1456 %q = ptrtoint <4 x i32>* %p to i32 1457 %r = add nsw i32 %q, 16 1458 %s = inttoptr i32 %r to <4 x i32>* 1459 %v = load <4 x i32>, <4 x i32>* %s 1460 ret <4 x i32> %v 1461} 1462 1463define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { 1464; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: 1465; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128) 1466; CHECK-NEXT: # %bb.0: 1467; CHECK-NEXT: local.get 0 1468; CHECK-NEXT: i32.const 16 1469; CHECK-NEXT: i32.add 1470; CHECK-NEXT: v128.load32_splat 0 1471; CHECK-NEXT: # fallthrough-return 1472 %q = ptrtoint i32* %p to i32 1473 %r = add nsw i32 %q, 16 1474 %s = inttoptr i32 %r to i32* 1475 %e = load i32, i32* %s 1476 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1477 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1478 ret <4 x i32> %v2 1479} 1480 1481define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1482; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset: 1483; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128) 1484; CHECK-NEXT: # %bb.0: 1485; CHECK-NEXT: local.get 0 1486; CHECK-NEXT: i32.const 16 1487; CHECK-NEXT: i32.add 1488; CHECK-NEXT: i32x4.load16x4_s 0 1489; CHECK-NEXT: # fallthrough-return 1490 %q = ptrtoint <4 x i16>* %p to i32 1491 %r = add nsw i32 %q, 16 1492 %s = inttoptr i32 %r to <4 x i16>* 1493 %v = load <4 x i16>, <4 x i16>* %s 1494 %v2 = sext <4 x i16> %v to <4 x i32> 1495 ret <4 x i32> %v2 1496} 1497 1498define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1499; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset: 1500; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128) 1501; CHECK-NEXT: # %bb.0: 1502; CHECK-NEXT: local.get 0 1503; CHECK-NEXT: i32.const 16 1504; CHECK-NEXT: i32.add 1505; CHECK-NEXT: i32x4.load16x4_u 0 1506; CHECK-NEXT: # fallthrough-return 1507 %q = ptrtoint <4 x i16>* %p to i32 1508 %r = add nsw i32 %q, 16 1509 %s = inttoptr i32 %r to <4 x i16>* 1510 %v = load <4 x i16>, <4 x i16>* %s 1511 %v2 = zext <4 x i16> %v to <4 x i32> 1512 ret <4 x i32> %v2 1513} 1514 1515define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) { 1516; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: 1517; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128) 1518; CHECK-NEXT: # %bb.0: 1519; CHECK-NEXT: local.get 0 1520; CHECK-NEXT: i32.const 16 1521; CHECK-NEXT: i32.add 1522; CHECK-NEXT: i32x4.load16x4_u 0 1523; CHECK-NEXT: # fallthrough-return 1524 %q = ptrtoint <4 x i16>* %p to i32 1525 %r = add nsw i32 %q, 16 1526 %s = inttoptr i32 %r to <4 x i16>* 1527 %v = load <4 x i16>, <4 x i16>* %s 1528 ret <4 x i16> %v 1529} 1530 1531define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) { 1532; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: 1533; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1534; CHECK-NEXT: # %bb.0: 1535; CHECK-NEXT: local.get 0 1536; CHECK-NEXT: i32.const 16 1537; CHECK-NEXT: i32.add 1538; CHECK-NEXT: v128.load 0 1539; CHECK-NEXT: # fallthrough-return 1540 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1541 %v = load <4 x i32>, <4 x i32>* %s 1542 ret <4 x i32> %v 1543} 1544 1545define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { 1546; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: 1547; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1548; CHECK-NEXT: # %bb.0: 1549; CHECK-NEXT: local.get 0 1550; CHECK-NEXT: i32.const 4 1551; CHECK-NEXT: i32.add 1552; CHECK-NEXT: v128.load32_splat 0 1553; CHECK-NEXT: # fallthrough-return 1554 %s = getelementptr i32, i32* %p, i32 1 1555 %e = load i32, i32* %s 1556 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1557 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1558 ret <4 x i32> %v2 1559} 1560 1561define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1562; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset: 1563; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1564; CHECK-NEXT: # %bb.0: 1565; CHECK-NEXT: local.get 0 1566; CHECK-NEXT: i32.const 8 1567; CHECK-NEXT: i32.add 1568; CHECK-NEXT: i32x4.load16x4_s 0 1569; CHECK-NEXT: # fallthrough-return 1570 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1571 %v = load <4 x i16>, <4 x i16>* %s 1572 %v2 = sext <4 x i16> %v to <4 x i32> 1573 ret <4 x i32> %v2 1574} 1575 1576define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1577; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset: 1578; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1579; CHECK-NEXT: # %bb.0: 1580; CHECK-NEXT: local.get 0 1581; CHECK-NEXT: i32.const 8 1582; CHECK-NEXT: i32.add 1583; CHECK-NEXT: i32x4.load16x4_u 0 1584; CHECK-NEXT: # fallthrough-return 1585 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1586 %v = load <4 x i16>, <4 x i16>* %s 1587 %v2 = zext <4 x i16> %v to <4 x i32> 1588 ret <4 x i32> %v2 1589} 1590 1591define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { 1592; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: 1593; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1594; CHECK-NEXT: # %bb.0: 1595; CHECK-NEXT: local.get 0 1596; CHECK-NEXT: i32.const 8 1597; CHECK-NEXT: i32.add 1598; CHECK-NEXT: i32x4.load16x4_u 0 1599; CHECK-NEXT: # fallthrough-return 1600 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1601 %v = load <4 x i16>, <4 x i16>* %s 1602 ret <4 x i16> %v 1603} 1604 1605define <4 x i32> @load_v4i32_from_numeric_address() { 1606; CHECK-LABEL: load_v4i32_from_numeric_address: 1607; CHECK: .functype load_v4i32_from_numeric_address () -> (v128) 1608; CHECK-NEXT: # %bb.0: 1609; CHECK-NEXT: i32.const 0 1610; CHECK-NEXT: v128.load 32 1611; CHECK-NEXT: # fallthrough-return 1612 %s = inttoptr i32 32 to <4 x i32>* 1613 %v = load <4 x i32>, <4 x i32>* %s 1614 ret <4 x i32> %v 1615} 1616 1617define <4 x i32> @load_splat_v4i32_from_numeric_address() { 1618; CHECK-LABEL: load_splat_v4i32_from_numeric_address: 1619; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) 1620; CHECK-NEXT: # %bb.0: 1621; CHECK-NEXT: i32.const 0 1622; CHECK-NEXT: v128.load32_splat 32 1623; CHECK-NEXT: # fallthrough-return 1624 %s = inttoptr i32 32 to i32* 1625 %e = load i32, i32* %s 1626 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1627 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1628 ret <4 x i32> %v2 1629} 1630 1631define <4 x i32> @load_sext_v4i32_from_numeric_address() { 1632; CHECK-LABEL: load_sext_v4i32_from_numeric_address: 1633; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128) 1634; CHECK-NEXT: # %bb.0: 1635; CHECK-NEXT: i32.const 0 1636; CHECK-NEXT: i32x4.load16x4_s 32 1637; CHECK-NEXT: # fallthrough-return 1638 %s = inttoptr i32 32 to <4 x i16>* 1639 %v = load <4 x i16>, <4 x i16>* %s 1640 %v2 = sext <4 x i16> %v to <4 x i32> 1641 ret <4 x i32> %v2 1642} 1643 1644define <4 x i32> @load_zext_v4i32_from_numeric_address() { 1645; CHECK-LABEL: load_zext_v4i32_from_numeric_address: 1646; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128) 1647; CHECK-NEXT: # %bb.0: 1648; CHECK-NEXT: i32.const 0 1649; CHECK-NEXT: i32x4.load16x4_u 32 1650; CHECK-NEXT: # fallthrough-return 1651 %s = inttoptr i32 32 to <4 x i16>* 1652 %v = load <4 x i16>, <4 x i16>* %s 1653 %v2 = zext <4 x i16> %v to <4 x i32> 1654 ret <4 x i32> %v2 1655} 1656 1657define <4 x i16> @load_ext_v4i32_from_numeric_address() { 1658; CHECK-LABEL: load_ext_v4i32_from_numeric_address: 1659; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128) 1660; CHECK-NEXT: # %bb.0: 1661; CHECK-NEXT: i32.const 0 1662; CHECK-NEXT: i32x4.load16x4_u 32 1663; CHECK-NEXT: # fallthrough-return 1664 %s = inttoptr i32 32 to <4 x i16>* 1665 %v = load <4 x i16>, <4 x i16>* %s 1666 ret <4 x i16> %v 1667} 1668 1669@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42> 1670define <4 x i32> @load_v4i32_from_global_address() { 1671; CHECK-LABEL: load_v4i32_from_global_address: 1672; CHECK: .functype load_v4i32_from_global_address () -> (v128) 1673; CHECK-NEXT: # %bb.0: 1674; CHECK-NEXT: i32.const 0 1675; CHECK-NEXT: v128.load gv_v4i32 1676; CHECK-NEXT: # fallthrough-return 1677 %v = load <4 x i32>, <4 x i32>* @gv_v4i32 1678 ret <4 x i32> %v 1679} 1680 1681@gv_i32 = global i32 42 1682define <4 x i32> @load_splat_v4i32_from_global_address() { 1683; CHECK-LABEL: load_splat_v4i32_from_global_address: 1684; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) 1685; CHECK-NEXT: # %bb.0: 1686; CHECK-NEXT: i32.const 0 1687; CHECK-NEXT: v128.load32_splat gv_i32 1688; CHECK-NEXT: # fallthrough-return 1689 %e = load i32, i32* @gv_i32 1690 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1691 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1692 ret <4 x i32> %v2 1693} 1694 1695@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42> 1696define <4 x i32> @load_sext_v4i32_from_global_address() { 1697; CHECK-LABEL: load_sext_v4i32_from_global_address: 1698; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128) 1699; CHECK-NEXT: # %bb.0: 1700; CHECK-NEXT: i32.const 0 1701; CHECK-NEXT: i32x4.load16x4_s gv_v4i16 1702; CHECK-NEXT: # fallthrough-return 1703 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1704 %v2 = sext <4 x i16> %v to <4 x i32> 1705 ret <4 x i32> %v2 1706} 1707 1708define <4 x i32> @load_zext_v4i32_from_global_address() { 1709; CHECK-LABEL: load_zext_v4i32_from_global_address: 1710; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128) 1711; CHECK-NEXT: # %bb.0: 1712; CHECK-NEXT: i32.const 0 1713; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1714; CHECK-NEXT: # fallthrough-return 1715 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1716 %v2 = zext <4 x i16> %v to <4 x i32> 1717 ret <4 x i32> %v2 1718} 1719 1720define <4 x i16> @load_ext_v4i32_from_global_address() { 1721; CHECK-LABEL: load_ext_v4i32_from_global_address: 1722; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128) 1723; CHECK-NEXT: # %bb.0: 1724; CHECK-NEXT: i32.const 0 1725; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1726; CHECK-NEXT: # fallthrough-return 1727 %v = load <4 x i16>, <4 x i16>* @gv_v4i16 1728 ret <4 x i16> %v 1729} 1730 1731define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) { 1732; CHECK-LABEL: store_v4i32: 1733; CHECK: .functype store_v4i32 (v128, i32) -> () 1734; CHECK-NEXT: # %bb.0: 1735; CHECK-NEXT: local.get 1 1736; CHECK-NEXT: local.get 0 1737; CHECK-NEXT: v128.store 0 1738; CHECK-NEXT: # fallthrough-return 1739 store <4 x i32> %v , <4 x i32>* %p 1740 ret void 1741} 1742 1743define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) { 1744; CHECK-LABEL: store_narrowing_v4i32: 1745; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> () 1746; CHECK-NEXT: # %bb.0: 1747; CHECK-NEXT: local.get 1 1748; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1749; CHECK-NEXT: local.get 0 1750; CHECK-NEXT: v128.and 1751; CHECK-NEXT: local.get 0 1752; CHECK-NEXT: i16x8.narrow_i32x4_u 1753; CHECK-NEXT: i64x2.extract_lane 0 1754; CHECK-NEXT: i64.store 0 1755; CHECK-NEXT: # fallthrough-return 1756 store <4 x i16> %v , <4 x i16>* %p 1757 ret void 1758} 1759 1760define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { 1761; CHECK-LABEL: store_v4i32_with_folded_offset: 1762; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () 1763; CHECK-NEXT: # %bb.0: 1764; CHECK-NEXT: local.get 1 1765; CHECK-NEXT: local.get 0 1766; CHECK-NEXT: v128.store 16 1767; CHECK-NEXT: # fallthrough-return 1768 %q = ptrtoint <4 x i32>* %p to i32 1769 %r = add nuw i32 %q, 16 1770 %s = inttoptr i32 %r to <4 x i32>* 1771 store <4 x i32> %v , <4 x i32>* %s 1772 ret void 1773} 1774 1775define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) { 1776; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset: 1777; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> () 1778; CHECK-NEXT: # %bb.0: 1779; CHECK-NEXT: local.get 1 1780; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1781; CHECK-NEXT: local.get 0 1782; CHECK-NEXT: v128.and 1783; CHECK-NEXT: local.get 0 1784; CHECK-NEXT: i16x8.narrow_i32x4_u 1785; CHECK-NEXT: i64x2.extract_lane 0 1786; CHECK-NEXT: i64.store 16 1787; CHECK-NEXT: # fallthrough-return 1788 %q = ptrtoint <4 x i16>* %p to i32 1789 %r = add nuw i32 %q, 16 1790 %s = inttoptr i32 %r to <4 x i16>* 1791 store <4 x i16> %v , <4 x i16>* %s 1792 ret void 1793} 1794 1795define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1796; CHECK-LABEL: store_v4i32_with_folded_gep_offset: 1797; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () 1798; CHECK-NEXT: # %bb.0: 1799; CHECK-NEXT: local.get 1 1800; CHECK-NEXT: local.get 0 1801; CHECK-NEXT: v128.store 16 1802; CHECK-NEXT: # fallthrough-return 1803 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 1804 store <4 x i32> %v , <4 x i32>* %s 1805 ret void 1806} 1807 1808define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { 1809; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset: 1810; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> () 1811; CHECK-NEXT: # %bb.0: 1812; CHECK-NEXT: local.get 1 1813; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1814; CHECK-NEXT: local.get 0 1815; CHECK-NEXT: v128.and 1816; CHECK-NEXT: local.get 0 1817; CHECK-NEXT: i16x8.narrow_i32x4_u 1818; CHECK-NEXT: i64x2.extract_lane 0 1819; CHECK-NEXT: i64.store 8 1820; CHECK-NEXT: # fallthrough-return 1821 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 1822 store <4 x i16> %v , <4 x i16>* %s 1823 ret void 1824} 1825 1826define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { 1827; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: 1828; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1829; CHECK-NEXT: # %bb.0: 1830; CHECK-NEXT: local.get 1 1831; CHECK-NEXT: i32.const -16 1832; CHECK-NEXT: i32.add 1833; CHECK-NEXT: local.get 0 1834; CHECK-NEXT: v128.store 0 1835; CHECK-NEXT: # fallthrough-return 1836 %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 1837 store <4 x i32> %v , <4 x i32>* %s 1838 ret void 1839} 1840 1841define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) { 1842; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset: 1843; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1844; CHECK-NEXT: # %bb.0: 1845; CHECK-NEXT: local.get 1 1846; CHECK-NEXT: i32.const -8 1847; CHECK-NEXT: i32.add 1848; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1849; CHECK-NEXT: local.get 0 1850; CHECK-NEXT: v128.and 1851; CHECK-NEXT: local.get 0 1852; CHECK-NEXT: i16x8.narrow_i32x4_u 1853; CHECK-NEXT: i64x2.extract_lane 0 1854; CHECK-NEXT: i64.store 0 1855; CHECK-NEXT: # fallthrough-return 1856 %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 1857 store <4 x i16> %v , <4 x i16>* %s 1858 ret void 1859} 1860 1861define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { 1862; CHECK-LABEL: store_v4i32_with_unfolded_offset: 1863; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () 1864; CHECK-NEXT: # %bb.0: 1865; CHECK-NEXT: local.get 1 1866; CHECK-NEXT: i32.const 16 1867; CHECK-NEXT: i32.add 1868; CHECK-NEXT: local.get 0 1869; CHECK-NEXT: v128.store 0 1870; CHECK-NEXT: # fallthrough-return 1871 %q = ptrtoint <4 x i32>* %p to i32 1872 %r = add nsw i32 %q, 16 1873 %s = inttoptr i32 %r to <4 x i32>* 1874 store <4 x i32> %v , <4 x i32>* %s 1875 ret void 1876} 1877 1878define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) { 1879; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset: 1880; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> () 1881; CHECK-NEXT: # %bb.0: 1882; CHECK-NEXT: local.get 1 1883; CHECK-NEXT: i32.const 16 1884; CHECK-NEXT: i32.add 1885; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1886; CHECK-NEXT: local.get 0 1887; CHECK-NEXT: v128.and 1888; CHECK-NEXT: local.get 0 1889; CHECK-NEXT: i16x8.narrow_i32x4_u 1890; CHECK-NEXT: i64x2.extract_lane 0 1891; CHECK-NEXT: i64.store 0 1892; CHECK-NEXT: # fallthrough-return 1893 %q = ptrtoint <4 x i16>* %p to i32 1894 %r = add nsw i32 %q, 16 1895 %s = inttoptr i32 %r to <4 x i16>* 1896 store <4 x i16> %v , <4 x i16>* %s 1897 ret void 1898} 1899 1900define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { 1901; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: 1902; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1903; CHECK-NEXT: # %bb.0: 1904; CHECK-NEXT: local.get 1 1905; CHECK-NEXT: i32.const 16 1906; CHECK-NEXT: i32.add 1907; CHECK-NEXT: local.get 0 1908; CHECK-NEXT: v128.store 0 1909; CHECK-NEXT: # fallthrough-return 1910 %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 1911 store <4 x i32> %v , <4 x i32>* %s 1912 ret void 1913} 1914 1915define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) { 1916; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset: 1917; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> () 1918; CHECK-NEXT: # %bb.0: 1919; CHECK-NEXT: local.get 1 1920; CHECK-NEXT: i32.const 8 1921; CHECK-NEXT: i32.add 1922; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1923; CHECK-NEXT: local.get 0 1924; CHECK-NEXT: v128.and 1925; CHECK-NEXT: local.get 0 1926; CHECK-NEXT: i16x8.narrow_i32x4_u 1927; CHECK-NEXT: i64x2.extract_lane 0 1928; CHECK-NEXT: i64.store 0 1929; CHECK-NEXT: # fallthrough-return 1930 %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 1931 store <4 x i16> %v , <4 x i16>* %s 1932 ret void 1933} 1934 1935define void @store_v4i32_to_numeric_address(<4 x i32> %v) { 1936; CHECK-LABEL: store_v4i32_to_numeric_address: 1937; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () 1938; CHECK-NEXT: # %bb.0: 1939; CHECK-NEXT: i32.const 0 1940; CHECK-NEXT: local.get 0 1941; CHECK-NEXT: v128.store 32 1942; CHECK-NEXT: # fallthrough-return 1943 %s = inttoptr i32 32 to <4 x i32>* 1944 store <4 x i32> %v , <4 x i32>* %s 1945 ret void 1946} 1947 1948define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) { 1949; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address: 1950; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> () 1951; CHECK-NEXT: # %bb.0: 1952; CHECK-NEXT: i32.const 0 1953; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1954; CHECK-NEXT: local.get 0 1955; CHECK-NEXT: v128.and 1956; CHECK-NEXT: local.get 0 1957; CHECK-NEXT: i16x8.narrow_i32x4_u 1958; CHECK-NEXT: i64x2.extract_lane 0 1959; CHECK-NEXT: i64.store 32 1960; CHECK-NEXT: # fallthrough-return 1961 %s = inttoptr i32 32 to <4 x i16>* 1962 store <4 x i16> %v , <4 x i16>* %s 1963 ret void 1964} 1965 1966define void @store_v4i32_to_global_address(<4 x i32> %v) { 1967; CHECK-LABEL: store_v4i32_to_global_address: 1968; CHECK: .functype store_v4i32_to_global_address (v128) -> () 1969; CHECK-NEXT: # %bb.0: 1970; CHECK-NEXT: i32.const 0 1971; CHECK-NEXT: local.get 0 1972; CHECK-NEXT: v128.store gv_v4i32 1973; CHECK-NEXT: # fallthrough-return 1974 store <4 x i32> %v , <4 x i32>* @gv_v4i32 1975 ret void 1976} 1977 1978define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) { 1979; CHECK-LABEL: store_narrowing_v4i32_to_global_address: 1980; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> () 1981; CHECK-NEXT: # %bb.0: 1982; CHECK-NEXT: i32.const 0 1983; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 1984; CHECK-NEXT: local.get 0 1985; CHECK-NEXT: v128.and 1986; CHECK-NEXT: local.get 0 1987; CHECK-NEXT: i16x8.narrow_i32x4_u 1988; CHECK-NEXT: i64x2.extract_lane 0 1989; CHECK-NEXT: i64.store gv_v4i16 1990; CHECK-NEXT: # fallthrough-return 1991 store <4 x i16> %v , <4 x i16>* @gv_v4i16 1992 ret void 1993} 1994 1995; ============================================================================== 1996; 2 x i64 1997; ============================================================================== 1998define <2 x i64> @load_v2i64(<2 x i64>* %p) { 1999; CHECK-LABEL: load_v2i64: 2000; CHECK: .functype load_v2i64 (i32) -> (v128) 2001; CHECK-NEXT: # %bb.0: 2002; CHECK-NEXT: local.get 0 2003; CHECK-NEXT: v128.load 0 2004; CHECK-NEXT: # fallthrough-return 2005 %v = load <2 x i64>, <2 x i64>* %p 2006 ret <2 x i64> %v 2007} 2008 2009define <2 x i64> @load_splat_v2i64(i64* %p) { 2010; CHECK-LABEL: load_splat_v2i64: 2011; CHECK: .functype load_splat_v2i64 (i32) -> (v128) 2012; CHECK-NEXT: # %bb.0: 2013; CHECK-NEXT: local.get 0 2014; CHECK-NEXT: v128.load64_splat 0 2015; CHECK-NEXT: # fallthrough-return 2016 %e = load i64, i64* %p 2017 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2018 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2019 ret <2 x i64> %v2 2020} 2021 2022define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) { 2023; CHECK-LABEL: load_sext_v2i64: 2024; CHECK: .functype load_sext_v2i64 (i32) -> (v128) 2025; CHECK-NEXT: # %bb.0: 2026; CHECK-NEXT: local.get 0 2027; CHECK-NEXT: i64x2.load32x2_s 0 2028; CHECK-NEXT: # fallthrough-return 2029 %v = load <2 x i32>, <2 x i32>* %p 2030 %v2 = sext <2 x i32> %v to <2 x i64> 2031 ret <2 x i64> %v2 2032} 2033 2034define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) { 2035; CHECK-LABEL: load_zext_v2i64: 2036; CHECK: .functype load_zext_v2i64 (i32) -> (v128) 2037; CHECK-NEXT: # %bb.0: 2038; CHECK-NEXT: local.get 0 2039; CHECK-NEXT: i64x2.load32x2_u 0 2040; CHECK-NEXT: # fallthrough-return 2041 %v = load <2 x i32>, <2 x i32>* %p 2042 %v2 = zext <2 x i32> %v to <2 x i64> 2043 ret <2 x i64> %v2 2044} 2045 2046define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) { 2047; CHECK-LABEL: load_ext_v2i64: 2048; CHECK: .functype load_ext_v2i64 (i32) -> (v128) 2049; CHECK-NEXT: # %bb.0: 2050; CHECK-NEXT: local.get 0 2051; CHECK-NEXT: i64x2.load32x2_u 0 2052; CHECK-NEXT: # fallthrough-return 2053 %v = load <2 x i32>, <2 x i32>* %p 2054 ret <2 x i32> %v 2055} 2056 2057define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { 2058; CHECK-LABEL: load_v2i64_with_folded_offset: 2059; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128) 2060; CHECK-NEXT: # %bb.0: 2061; CHECK-NEXT: local.get 0 2062; CHECK-NEXT: v128.load 16 2063; CHECK-NEXT: # fallthrough-return 2064 %q = ptrtoint <2 x i64>* %p to i32 2065 %r = add nuw i32 %q, 16 2066 %s = inttoptr i32 %r to <2 x i64>* 2067 %v = load <2 x i64>, <2 x i64>* %s 2068 ret <2 x i64> %v 2069} 2070 2071define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { 2072; CHECK-LABEL: load_splat_v2i64_with_folded_offset: 2073; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) 2074; CHECK-NEXT: # %bb.0: 2075; CHECK-NEXT: local.get 0 2076; CHECK-NEXT: v128.load64_splat 16 2077; CHECK-NEXT: # fallthrough-return 2078 %q = ptrtoint i64* %p to i32 2079 %r = add nuw i32 %q, 16 2080 %s = inttoptr i32 %r to i64* 2081 %e = load i64, i64* %s 2082 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2083 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2084 ret <2 x i64> %v2 2085} 2086 2087define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) { 2088; CHECK-LABEL: load_sext_v2i64_with_folded_offset: 2089; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128) 2090; CHECK-NEXT: # %bb.0: 2091; CHECK-NEXT: local.get 0 2092; CHECK-NEXT: i64x2.load32x2_s 16 2093; CHECK-NEXT: # fallthrough-return 2094 %q = ptrtoint <2 x i32>* %p to i32 2095 %r = add nuw i32 %q, 16 2096 %s = inttoptr i32 %r to <2 x i32>* 2097 %v = load <2 x i32>, <2 x i32>* %s 2098 %v2 = sext <2 x i32> %v to <2 x i64> 2099 ret <2 x i64> %v2 2100} 2101 2102define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) { 2103; CHECK-LABEL: load_zext_v2i64_with_folded_offset: 2104; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128) 2105; CHECK-NEXT: # %bb.0: 2106; CHECK-NEXT: local.get 0 2107; CHECK-NEXT: i64x2.load32x2_u 16 2108; CHECK-NEXT: # fallthrough-return 2109 %q = ptrtoint <2 x i32>* %p to i32 2110 %r = add nuw i32 %q, 16 2111 %s = inttoptr i32 %r to <2 x i32>* 2112 %v = load <2 x i32>, <2 x i32>* %s 2113 %v2 = zext <2 x i32> %v to <2 x i64> 2114 ret <2 x i64> %v2 2115} 2116 2117define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) { 2118; CHECK-LABEL: load_ext_v2i64_with_folded_offset: 2119; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128) 2120; CHECK-NEXT: # %bb.0: 2121; CHECK-NEXT: local.get 0 2122; CHECK-NEXT: i64x2.load32x2_u 16 2123; CHECK-NEXT: # fallthrough-return 2124 %q = ptrtoint <2 x i32>* %p to i32 2125 %r = add nuw i32 %q, 16 2126 %s = inttoptr i32 %r to <2 x i32>* 2127 %v = load <2 x i32>, <2 x i32>* %s 2128 ret <2 x i32> %v 2129} 2130 2131define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) { 2132; CHECK-LABEL: load_v2i64_with_folded_gep_offset: 2133; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128) 2134; CHECK-NEXT: # %bb.0: 2135; CHECK-NEXT: local.get 0 2136; CHECK-NEXT: v128.load 16 2137; CHECK-NEXT: # fallthrough-return 2138 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2139 %v = load <2 x i64>, <2 x i64>* %s 2140 ret <2 x i64> %v 2141} 2142 2143define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { 2144; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: 2145; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) 2146; CHECK-NEXT: # %bb.0: 2147; CHECK-NEXT: local.get 0 2148; CHECK-NEXT: v128.load64_splat 8 2149; CHECK-NEXT: # fallthrough-return 2150 %s = getelementptr inbounds i64, i64* %p, i32 1 2151 %e = load i64, i64* %s 2152 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2153 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2154 ret <2 x i64> %v2 2155} 2156 2157define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2158; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: 2159; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128) 2160; CHECK-NEXT: # %bb.0: 2161; CHECK-NEXT: local.get 0 2162; CHECK-NEXT: i64x2.load32x2_s 8 2163; CHECK-NEXT: # fallthrough-return 2164 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2165 %v = load <2 x i32>, <2 x i32>* %s 2166 %v2 = sext <2 x i32> %v to <2 x i64> 2167 ret <2 x i64> %v2 2168} 2169 2170define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2171; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: 2172; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128) 2173; CHECK-NEXT: # %bb.0: 2174; CHECK-NEXT: local.get 0 2175; CHECK-NEXT: i64x2.load32x2_u 8 2176; CHECK-NEXT: # fallthrough-return 2177 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2178 %v = load <2 x i32>, <2 x i32>* %s 2179 %v2 = zext <2 x i32> %v to <2 x i64> 2180 ret <2 x i64> %v2 2181} 2182 2183define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { 2184; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: 2185; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128) 2186; CHECK-NEXT: # %bb.0: 2187; CHECK-NEXT: local.get 0 2188; CHECK-NEXT: i64x2.load32x2_u 8 2189; CHECK-NEXT: # fallthrough-return 2190 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 2191 %v = load <2 x i32>, <2 x i32>* %s 2192 ret <2 x i32> %v 2193} 2194 2195define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) { 2196; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: 2197; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2198; CHECK-NEXT: # %bb.0: 2199; CHECK-NEXT: local.get 0 2200; CHECK-NEXT: i32.const -16 2201; CHECK-NEXT: i32.add 2202; CHECK-NEXT: v128.load 0 2203; CHECK-NEXT: # fallthrough-return 2204 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2205 %v = load <2 x i64>, <2 x i64>* %s 2206 ret <2 x i64> %v 2207} 2208 2209define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { 2210; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: 2211; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2212; CHECK-NEXT: # %bb.0: 2213; CHECK-NEXT: local.get 0 2214; CHECK-NEXT: i32.const -8 2215; CHECK-NEXT: i32.add 2216; CHECK-NEXT: v128.load64_splat 0 2217; CHECK-NEXT: # fallthrough-return 2218 %s = getelementptr inbounds i64, i64* %p, i32 -1 2219 %e = load i64, i64* %s 2220 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2221 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2222 ret <2 x i64> %v2 2223} 2224 2225define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2226; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: 2227; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2228; CHECK-NEXT: # %bb.0: 2229; CHECK-NEXT: local.get 0 2230; CHECK-NEXT: i32.const -8 2231; CHECK-NEXT: i32.add 2232; CHECK-NEXT: i64x2.load32x2_s 0 2233; CHECK-NEXT: # fallthrough-return 2234 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2235 %v = load <2 x i32>, <2 x i32>* %s 2236 %v2 = sext <2 x i32> %v to <2 x i64> 2237 ret <2 x i64> %v2 2238} 2239 2240define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2241; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: 2242; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2243; CHECK-NEXT: # %bb.0: 2244; CHECK-NEXT: local.get 0 2245; CHECK-NEXT: i32.const -8 2246; CHECK-NEXT: i32.add 2247; CHECK-NEXT: i64x2.load32x2_u 0 2248; CHECK-NEXT: # fallthrough-return 2249 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2250 %v = load <2 x i32>, <2 x i32>* %s 2251 %v2 = zext <2 x i32> %v to <2 x i64> 2252 ret <2 x i64> %v2 2253} 2254 2255define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { 2256; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: 2257; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2258; CHECK-NEXT: # %bb.0: 2259; CHECK-NEXT: local.get 0 2260; CHECK-NEXT: i32.const -8 2261; CHECK-NEXT: i32.add 2262; CHECK-NEXT: i64x2.load32x2_u 0 2263; CHECK-NEXT: # fallthrough-return 2264 %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 2265 %v = load <2 x i32>, <2 x i32>* %s 2266 ret <2 x i32> %v 2267} 2268 2269define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { 2270; CHECK-LABEL: load_v2i64_with_unfolded_offset: 2271; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128) 2272; CHECK-NEXT: # %bb.0: 2273; CHECK-NEXT: local.get 0 2274; CHECK-NEXT: i32.const 16 2275; CHECK-NEXT: i32.add 2276; CHECK-NEXT: v128.load 0 2277; CHECK-NEXT: # fallthrough-return 2278 %q = ptrtoint <2 x i64>* %p to i32 2279 %r = add nsw i32 %q, 16 2280 %s = inttoptr i32 %r to <2 x i64>* 2281 %v = load <2 x i64>, <2 x i64>* %s 2282 ret <2 x i64> %v 2283} 2284 2285define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { 2286; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: 2287; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128) 2288; CHECK-NEXT: # %bb.0: 2289; CHECK-NEXT: local.get 0 2290; CHECK-NEXT: i32.const 16 2291; CHECK-NEXT: i32.add 2292; CHECK-NEXT: v128.load64_splat 0 2293; CHECK-NEXT: # fallthrough-return 2294 %q = ptrtoint i64* %p to i32 2295 %r = add nsw i32 %q, 16 2296 %s = inttoptr i32 %r to i64* 2297 %e = load i64, i64* %s 2298 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2299 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2300 ret <2 x i64> %v2 2301} 2302 2303define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2304; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: 2305; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128) 2306; CHECK-NEXT: # %bb.0: 2307; CHECK-NEXT: local.get 0 2308; CHECK-NEXT: i32.const 16 2309; CHECK-NEXT: i32.add 2310; CHECK-NEXT: i64x2.load32x2_s 0 2311; CHECK-NEXT: # fallthrough-return 2312 %q = ptrtoint <2 x i32>* %p to i32 2313 %r = add nsw i32 %q, 16 2314 %s = inttoptr i32 %r to <2 x i32>* 2315 %v = load <2 x i32>, <2 x i32>* %s 2316 %v2 = sext <2 x i32> %v to <2 x i64> 2317 ret <2 x i64> %v2 2318} 2319 2320define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2321; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: 2322; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128) 2323; CHECK-NEXT: # %bb.0: 2324; CHECK-NEXT: local.get 0 2325; CHECK-NEXT: i32.const 16 2326; CHECK-NEXT: i32.add 2327; CHECK-NEXT: i64x2.load32x2_u 0 2328; CHECK-NEXT: # fallthrough-return 2329 %q = ptrtoint <2 x i32>* %p to i32 2330 %r = add nsw i32 %q, 16 2331 %s = inttoptr i32 %r to <2 x i32>* 2332 %v = load <2 x i32>, <2 x i32>* %s 2333 %v2 = zext <2 x i32> %v to <2 x i64> 2334 ret <2 x i64> %v2 2335} 2336 2337define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) { 2338; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: 2339; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128) 2340; CHECK-NEXT: # %bb.0: 2341; CHECK-NEXT: local.get 0 2342; CHECK-NEXT: i32.const 16 2343; CHECK-NEXT: i32.add 2344; CHECK-NEXT: i64x2.load32x2_u 0 2345; CHECK-NEXT: # fallthrough-return 2346 %q = ptrtoint <2 x i32>* %p to i32 2347 %r = add nsw i32 %q, 16 2348 %s = inttoptr i32 %r to <2 x i32>* 2349 %v = load <2 x i32>, <2 x i32>* %s 2350 ret <2 x i32> %v 2351} 2352 2353define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) { 2354; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: 2355; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2356; CHECK-NEXT: # %bb.0: 2357; CHECK-NEXT: local.get 0 2358; CHECK-NEXT: i32.const 16 2359; CHECK-NEXT: i32.add 2360; CHECK-NEXT: v128.load 0 2361; CHECK-NEXT: # fallthrough-return 2362 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2363 %v = load <2 x i64>, <2 x i64>* %s 2364 ret <2 x i64> %v 2365} 2366 2367define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { 2368; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: 2369; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2370; CHECK-NEXT: # %bb.0: 2371; CHECK-NEXT: local.get 0 2372; CHECK-NEXT: i32.const 8 2373; CHECK-NEXT: i32.add 2374; CHECK-NEXT: v128.load64_splat 0 2375; CHECK-NEXT: # fallthrough-return 2376 %s = getelementptr i64, i64* %p, i32 1 2377 %e = load i64, i64* %s 2378 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2379 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2380 ret <2 x i64> %v2 2381} 2382 2383define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2384; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: 2385; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2386; CHECK-NEXT: # %bb.0: 2387; CHECK-NEXT: local.get 0 2388; CHECK-NEXT: i32.const 8 2389; CHECK-NEXT: i32.add 2390; CHECK-NEXT: i64x2.load32x2_s 0 2391; CHECK-NEXT: # fallthrough-return 2392 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2393 %v = load <2 x i32>, <2 x i32>* %s 2394 %v2 = sext <2 x i32> %v to <2 x i64> 2395 ret <2 x i64> %v2 2396} 2397 2398define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2399; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: 2400; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2401; CHECK-NEXT: # %bb.0: 2402; CHECK-NEXT: local.get 0 2403; CHECK-NEXT: i32.const 8 2404; CHECK-NEXT: i32.add 2405; CHECK-NEXT: i64x2.load32x2_u 0 2406; CHECK-NEXT: # fallthrough-return 2407 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2408 %v = load <2 x i32>, <2 x i32>* %s 2409 %v2 = zext <2 x i32> %v to <2 x i64> 2410 ret <2 x i64> %v2 2411} 2412 2413define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { 2414; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: 2415; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2416; CHECK-NEXT: # %bb.0: 2417; CHECK-NEXT: local.get 0 2418; CHECK-NEXT: i32.const 8 2419; CHECK-NEXT: i32.add 2420; CHECK-NEXT: i64x2.load32x2_u 0 2421; CHECK-NEXT: # fallthrough-return 2422 %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 2423 %v = load <2 x i32>, <2 x i32>* %s 2424 ret <2 x i32> %v 2425} 2426 2427define <2 x i64> @load_v2i64_from_numeric_address() { 2428; CHECK-LABEL: load_v2i64_from_numeric_address: 2429; CHECK: .functype load_v2i64_from_numeric_address () -> (v128) 2430; CHECK-NEXT: # %bb.0: 2431; CHECK-NEXT: i32.const 0 2432; CHECK-NEXT: v128.load 32 2433; CHECK-NEXT: # fallthrough-return 2434 %s = inttoptr i32 32 to <2 x i64>* 2435 %v = load <2 x i64>, <2 x i64>* %s 2436 ret <2 x i64> %v 2437} 2438 2439define <2 x i64> @load_splat_v2i64_from_numeric_address() { 2440; CHECK-LABEL: load_splat_v2i64_from_numeric_address: 2441; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) 2442; CHECK-NEXT: # %bb.0: 2443; CHECK-NEXT: i32.const 0 2444; CHECK-NEXT: v128.load64_splat 32 2445; CHECK-NEXT: # fallthrough-return 2446 %s = inttoptr i32 32 to i64* 2447 %e = load i64, i64* %s 2448 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2449 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2450 ret <2 x i64> %v2 2451} 2452 2453define <2 x i64> @load_sext_v2i64_from_numeric_address() { 2454; CHECK-LABEL: load_sext_v2i64_from_numeric_address: 2455; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128) 2456; CHECK-NEXT: # %bb.0: 2457; CHECK-NEXT: i32.const 0 2458; CHECK-NEXT: i64x2.load32x2_s 32 2459; CHECK-NEXT: # fallthrough-return 2460 %s = inttoptr i32 32 to <2 x i32>* 2461 %v = load <2 x i32>, <2 x i32>* %s 2462 %v2 = sext <2 x i32> %v to <2 x i64> 2463 ret <2 x i64> %v2 2464} 2465 2466define <2 x i64> @load_zext_v2i64_from_numeric_address() { 2467; CHECK-LABEL: load_zext_v2i64_from_numeric_address: 2468; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128) 2469; CHECK-NEXT: # %bb.0: 2470; CHECK-NEXT: i32.const 0 2471; CHECK-NEXT: i64x2.load32x2_u 32 2472; CHECK-NEXT: # fallthrough-return 2473 %s = inttoptr i32 32 to <2 x i32>* 2474 %v = load <2 x i32>, <2 x i32>* %s 2475 %v2 = zext <2 x i32> %v to <2 x i64> 2476 ret <2 x i64> %v2 2477} 2478 2479define <2 x i32> @load_ext_v2i64_from_numeric_address() { 2480; CHECK-LABEL: load_ext_v2i64_from_numeric_address: 2481; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128) 2482; CHECK-NEXT: # %bb.0: 2483; CHECK-NEXT: i32.const 0 2484; CHECK-NEXT: i64x2.load32x2_u 32 2485; CHECK-NEXT: # fallthrough-return 2486 %s = inttoptr i32 32 to <2 x i32>* 2487 %v = load <2 x i32>, <2 x i32>* %s 2488 ret <2 x i32> %v 2489} 2490 2491@gv_v2i64 = global <2 x i64> <i64 42, i64 42> 2492define <2 x i64> @load_v2i64_from_global_address() { 2493; CHECK-LABEL: load_v2i64_from_global_address: 2494; CHECK: .functype load_v2i64_from_global_address () -> (v128) 2495; CHECK-NEXT: # %bb.0: 2496; CHECK-NEXT: i32.const 0 2497; CHECK-NEXT: v128.load gv_v2i64 2498; CHECK-NEXT: # fallthrough-return 2499 %v = load <2 x i64>, <2 x i64>* @gv_v2i64 2500 ret <2 x i64> %v 2501} 2502 2503@gv_i64 = global i64 42 2504define <2 x i64> @load_splat_v2i64_from_global_address() { 2505; CHECK-LABEL: load_splat_v2i64_from_global_address: 2506; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) 2507; CHECK-NEXT: # %bb.0: 2508; CHECK-NEXT: i32.const 0 2509; CHECK-NEXT: v128.load64_splat gv_i64 2510; CHECK-NEXT: # fallthrough-return 2511 %e = load i64, i64* @gv_i64 2512 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2513 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2514 ret <2 x i64> %v2 2515} 2516 2517@gv_v2i32 = global <2 x i32> <i32 42, i32 42> 2518define <2 x i64> @load_sext_v2i64_from_global_address() { 2519; CHECK-LABEL: load_sext_v2i64_from_global_address: 2520; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128) 2521; CHECK-NEXT: # %bb.0: 2522; CHECK-NEXT: i32.const 0 2523; CHECK-NEXT: i64x2.load32x2_s gv_v2i32 2524; CHECK-NEXT: # fallthrough-return 2525 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2526 %v2 = sext <2 x i32> %v to <2 x i64> 2527 ret <2 x i64> %v2 2528} 2529 2530define <2 x i64> @load_zext_v2i64_from_global_address() { 2531; CHECK-LABEL: load_zext_v2i64_from_global_address: 2532; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128) 2533; CHECK-NEXT: # %bb.0: 2534; CHECK-NEXT: i32.const 0 2535; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2536; CHECK-NEXT: # fallthrough-return 2537 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2538 %v2 = zext <2 x i32> %v to <2 x i64> 2539 ret <2 x i64> %v2 2540} 2541 2542define <2 x i32> @load_ext_v2i64_from_global_address() { 2543; CHECK-LABEL: load_ext_v2i64_from_global_address: 2544; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128) 2545; CHECK-NEXT: # %bb.0: 2546; CHECK-NEXT: i32.const 0 2547; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2548; CHECK-NEXT: # fallthrough-return 2549 %v = load <2 x i32>, <2 x i32>* @gv_v2i32 2550 ret <2 x i32> %v 2551} 2552 2553define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) { 2554; CHECK-LABEL: store_v2i64: 2555; CHECK: .functype store_v2i64 (v128, i32) -> () 2556; CHECK-NEXT: # %bb.0: 2557; CHECK-NEXT: local.get 1 2558; CHECK-NEXT: local.get 0 2559; CHECK-NEXT: v128.store 0 2560; CHECK-NEXT: # fallthrough-return 2561 store <2 x i64> %v , <2 x i64>* %p 2562 ret void 2563} 2564 2565define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { 2566; CHECK-LABEL: store_v2i64_with_folded_offset: 2567; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> () 2568; CHECK-NEXT: # %bb.0: 2569; CHECK-NEXT: local.get 1 2570; CHECK-NEXT: local.get 0 2571; CHECK-NEXT: v128.store 16 2572; CHECK-NEXT: # fallthrough-return 2573 %q = ptrtoint <2 x i64>* %p to i32 2574 %r = add nuw i32 %q, 16 2575 %s = inttoptr i32 %r to <2 x i64>* 2576 store <2 x i64> %v , <2 x i64>* %s 2577 ret void 2578} 2579 2580define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2581; CHECK-LABEL: store_v2i64_with_folded_gep_offset: 2582; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> () 2583; CHECK-NEXT: # %bb.0: 2584; CHECK-NEXT: local.get 1 2585; CHECK-NEXT: local.get 0 2586; CHECK-NEXT: v128.store 16 2587; CHECK-NEXT: # fallthrough-return 2588 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 2589 store <2 x i64> %v , <2 x i64>* %s 2590 ret void 2591} 2592 2593define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) { 2594; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: 2595; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> () 2596; CHECK-NEXT: # %bb.0: 2597; CHECK-NEXT: local.get 1 2598; CHECK-NEXT: i32.const -16 2599; CHECK-NEXT: i32.add 2600; CHECK-NEXT: local.get 0 2601; CHECK-NEXT: v128.store 0 2602; CHECK-NEXT: # fallthrough-return 2603 %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 2604 store <2 x i64> %v , <2 x i64>* %s 2605 ret void 2606} 2607 2608define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) { 2609; CHECK-LABEL: store_v2i64_with_unfolded_offset: 2610; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> () 2611; CHECK-NEXT: # %bb.0: 2612; CHECK-NEXT: local.get 1 2613; CHECK-NEXT: i32.const 16 2614; CHECK-NEXT: i32.add 2615; CHECK-NEXT: local.get 0 2616; CHECK-NEXT: v128.store 0 2617; CHECK-NEXT: # fallthrough-return 2618 %q = ptrtoint <2 x i64>* %p to i32 2619 %r = add nsw i32 %q, 16 2620 %s = inttoptr i32 %r to <2 x i64>* 2621 store <2 x i64> %v , <2 x i64>* %s 2622 ret void 2623} 2624 2625define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { 2626; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: 2627; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> () 2628; CHECK-NEXT: # %bb.0: 2629; CHECK-NEXT: local.get 1 2630; CHECK-NEXT: i32.const 16 2631; CHECK-NEXT: i32.add 2632; CHECK-NEXT: local.get 0 2633; CHECK-NEXT: v128.store 0 2634; CHECK-NEXT: # fallthrough-return 2635 %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 2636 store <2 x i64> %v , <2 x i64>* %s 2637 ret void 2638} 2639 2640define void @store_v2i64_to_numeric_address(<2 x i64> %v) { 2641; CHECK-LABEL: store_v2i64_to_numeric_address: 2642; CHECK: .functype store_v2i64_to_numeric_address (v128) -> () 2643; CHECK-NEXT: # %bb.0: 2644; CHECK-NEXT: i32.const 0 2645; CHECK-NEXT: local.get 0 2646; CHECK-NEXT: v128.store 32 2647; CHECK-NEXT: # fallthrough-return 2648 %s = inttoptr i32 32 to <2 x i64>* 2649 store <2 x i64> %v , <2 x i64>* %s 2650 ret void 2651} 2652 2653define void @store_v2i64_to_global_address(<2 x i64> %v) { 2654; CHECK-LABEL: store_v2i64_to_global_address: 2655; CHECK: .functype store_v2i64_to_global_address (v128) -> () 2656; CHECK-NEXT: # %bb.0: 2657; CHECK-NEXT: i32.const 0 2658; CHECK-NEXT: local.get 0 2659; CHECK-NEXT: v128.store gv_v2i64 2660; CHECK-NEXT: # fallthrough-return 2661 store <2 x i64> %v , <2 x i64>* @gv_v2i64 2662 ret void 2663} 2664 2665; ============================================================================== 2666; 4 x float 2667; ============================================================================== 2668define <4 x float> @load_v4f32(<4 x float>* %p) { 2669; CHECK-LABEL: load_v4f32: 2670; CHECK: .functype load_v4f32 (i32) -> (v128) 2671; CHECK-NEXT: # %bb.0: 2672; CHECK-NEXT: local.get 0 2673; CHECK-NEXT: v128.load 0 2674; CHECK-NEXT: # fallthrough-return 2675 %v = load <4 x float>, <4 x float>* %p 2676 ret <4 x float> %v 2677} 2678 2679define <4 x float> @load_splat_v4f32(float* %p) { 2680; CHECK-LABEL: load_splat_v4f32: 2681; CHECK: .functype load_splat_v4f32 (i32) -> (v128) 2682; CHECK-NEXT: # %bb.0: 2683; CHECK-NEXT: local.get 0 2684; CHECK-NEXT: v128.load32_splat 0 2685; CHECK-NEXT: # fallthrough-return 2686 %e = load float, float* %p 2687 %v1 = insertelement <4 x float> undef, float %e, i32 0 2688 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2689 ret <4 x float> %v2 2690} 2691 2692define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { 2693; CHECK-LABEL: load_v4f32_with_folded_offset: 2694; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128) 2695; CHECK-NEXT: # %bb.0: 2696; CHECK-NEXT: local.get 0 2697; CHECK-NEXT: v128.load 16 2698; CHECK-NEXT: # fallthrough-return 2699 %q = ptrtoint <4 x float>* %p to i32 2700 %r = add nuw i32 %q, 16 2701 %s = inttoptr i32 %r to <4 x float>* 2702 %v = load <4 x float>, <4 x float>* %s 2703 ret <4 x float> %v 2704} 2705 2706define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { 2707; CHECK-LABEL: load_splat_v4f32_with_folded_offset: 2708; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) 2709; CHECK-NEXT: # %bb.0: 2710; CHECK-NEXT: local.get 0 2711; CHECK-NEXT: v128.load32_splat 16 2712; CHECK-NEXT: # fallthrough-return 2713 %q = ptrtoint float* %p to i32 2714 %r = add nuw i32 %q, 16 2715 %s = inttoptr i32 %r to float* 2716 %e = load float, float* %s 2717 %v1 = insertelement <4 x float> undef, float %e, i32 0 2718 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2719 ret <4 x float> %v2 2720} 2721 2722define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) { 2723; CHECK-LABEL: load_v4f32_with_folded_gep_offset: 2724; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128) 2725; CHECK-NEXT: # %bb.0: 2726; CHECK-NEXT: local.get 0 2727; CHECK-NEXT: v128.load 16 2728; CHECK-NEXT: # fallthrough-return 2729 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2730 %v = load <4 x float>, <4 x float>* %s 2731 ret <4 x float> %v 2732} 2733 2734define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { 2735; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: 2736; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) 2737; CHECK-NEXT: # %bb.0: 2738; CHECK-NEXT: local.get 0 2739; CHECK-NEXT: v128.load32_splat 4 2740; CHECK-NEXT: # fallthrough-return 2741 %s = getelementptr inbounds float, float* %p, i32 1 2742 %e = load float, float* %s 2743 %v1 = insertelement <4 x float> undef, float %e, i32 0 2744 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2745 ret <4 x float> %v2 2746} 2747 2748define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) { 2749; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: 2750; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2751; CHECK-NEXT: # %bb.0: 2752; CHECK-NEXT: local.get 0 2753; CHECK-NEXT: i32.const -16 2754; CHECK-NEXT: i32.add 2755; CHECK-NEXT: v128.load 0 2756; CHECK-NEXT: # fallthrough-return 2757 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2758 %v = load <4 x float>, <4 x float>* %s 2759 ret <4 x float> %v 2760} 2761 2762define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) { 2763; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: 2764; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2765; CHECK-NEXT: # %bb.0: 2766; CHECK-NEXT: local.get 0 2767; CHECK-NEXT: i32.const -4 2768; CHECK-NEXT: i32.add 2769; CHECK-NEXT: v128.load32_splat 0 2770; CHECK-NEXT: # fallthrough-return 2771 %s = getelementptr inbounds float, float* %p, i32 -1 2772 %e = load float, float* %s 2773 %v1 = insertelement <4 x float> undef, float %e, i32 0 2774 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2775 ret <4 x float> %v2 2776} 2777 2778define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { 2779; CHECK-LABEL: load_v4f32_with_unfolded_offset: 2780; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128) 2781; CHECK-NEXT: # %bb.0: 2782; CHECK-NEXT: local.get 0 2783; CHECK-NEXT: i32.const 16 2784; CHECK-NEXT: i32.add 2785; CHECK-NEXT: v128.load 0 2786; CHECK-NEXT: # fallthrough-return 2787 %q = ptrtoint <4 x float>* %p to i32 2788 %r = add nsw i32 %q, 16 2789 %s = inttoptr i32 %r to <4 x float>* 2790 %v = load <4 x float>, <4 x float>* %s 2791 ret <4 x float> %v 2792} 2793 2794define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { 2795; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: 2796; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128) 2797; CHECK-NEXT: # %bb.0: 2798; CHECK-NEXT: local.get 0 2799; CHECK-NEXT: i32.const 16 2800; CHECK-NEXT: i32.add 2801; CHECK-NEXT: v128.load32_splat 0 2802; CHECK-NEXT: # fallthrough-return 2803 %q = ptrtoint float* %p to i32 2804 %r = add nsw i32 %q, 16 2805 %s = inttoptr i32 %r to float* 2806 %e = load float, float* %s 2807 %v1 = insertelement <4 x float> undef, float %e, i32 0 2808 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2809 ret <4 x float> %v2 2810} 2811 2812define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) { 2813; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: 2814; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2815; CHECK-NEXT: # %bb.0: 2816; CHECK-NEXT: local.get 0 2817; CHECK-NEXT: i32.const 16 2818; CHECK-NEXT: i32.add 2819; CHECK-NEXT: v128.load 0 2820; CHECK-NEXT: # fallthrough-return 2821 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2822 %v = load <4 x float>, <4 x float>* %s 2823 ret <4 x float> %v 2824} 2825 2826define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { 2827; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: 2828; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2829; CHECK-NEXT: # %bb.0: 2830; CHECK-NEXT: local.get 0 2831; CHECK-NEXT: i32.const 4 2832; CHECK-NEXT: i32.add 2833; CHECK-NEXT: v128.load32_splat 0 2834; CHECK-NEXT: # fallthrough-return 2835 %s = getelementptr float, float* %p, i32 1 2836 %e = load float, float* %s 2837 %v1 = insertelement <4 x float> undef, float %e, i32 0 2838 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2839 ret <4 x float> %v2 2840} 2841 2842define <4 x float> @load_v4f32_from_numeric_address() { 2843; CHECK-LABEL: load_v4f32_from_numeric_address: 2844; CHECK: .functype load_v4f32_from_numeric_address () -> (v128) 2845; CHECK-NEXT: # %bb.0: 2846; CHECK-NEXT: i32.const 0 2847; CHECK-NEXT: v128.load 32 2848; CHECK-NEXT: # fallthrough-return 2849 %s = inttoptr i32 32 to <4 x float>* 2850 %v = load <4 x float>, <4 x float>* %s 2851 ret <4 x float> %v 2852} 2853 2854define <4 x float> @load_splat_v4f32_from_numeric_address() { 2855; CHECK-LABEL: load_splat_v4f32_from_numeric_address: 2856; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) 2857; CHECK-NEXT: # %bb.0: 2858; CHECK-NEXT: i32.const 0 2859; CHECK-NEXT: v128.load32_splat 32 2860; CHECK-NEXT: # fallthrough-return 2861 %s = inttoptr i32 32 to float* 2862 %e = load float, float* %s 2863 %v1 = insertelement <4 x float> undef, float %e, i32 0 2864 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2865 ret <4 x float> %v2 2866} 2867 2868@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.> 2869define <4 x float> @load_v4f32_from_global_address() { 2870; CHECK-LABEL: load_v4f32_from_global_address: 2871; CHECK: .functype load_v4f32_from_global_address () -> (v128) 2872; CHECK-NEXT: # %bb.0: 2873; CHECK-NEXT: i32.const 0 2874; CHECK-NEXT: v128.load gv_v4f32 2875; CHECK-NEXT: # fallthrough-return 2876 %v = load <4 x float>, <4 x float>* @gv_v4f32 2877 ret <4 x float> %v 2878} 2879 2880@gv_f32 = global float 42. 2881define <4 x float> @load_splat_v4f32_from_global_address() { 2882; CHECK-LABEL: load_splat_v4f32_from_global_address: 2883; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) 2884; CHECK-NEXT: # %bb.0: 2885; CHECK-NEXT: i32.const 0 2886; CHECK-NEXT: v128.load32_splat gv_f32 2887; CHECK-NEXT: # fallthrough-return 2888 %e = load float, float* @gv_f32 2889 %v1 = insertelement <4 x float> undef, float %e, i32 0 2890 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2891 ret <4 x float> %v2 2892} 2893 2894define void @store_v4f32(<4 x float> %v, <4 x float>* %p) { 2895; CHECK-LABEL: store_v4f32: 2896; CHECK: .functype store_v4f32 (v128, i32) -> () 2897; CHECK-NEXT: # %bb.0: 2898; CHECK-NEXT: local.get 1 2899; CHECK-NEXT: local.get 0 2900; CHECK-NEXT: v128.store 0 2901; CHECK-NEXT: # fallthrough-return 2902 store <4 x float> %v , <4 x float>* %p 2903 ret void 2904} 2905 2906define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { 2907; CHECK-LABEL: store_v4f32_with_folded_offset: 2908; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> () 2909; CHECK-NEXT: # %bb.0: 2910; CHECK-NEXT: local.get 1 2911; CHECK-NEXT: local.get 0 2912; CHECK-NEXT: v128.store 16 2913; CHECK-NEXT: # fallthrough-return 2914 %q = ptrtoint <4 x float>* %p to i32 2915 %r = add nuw i32 %q, 16 2916 %s = inttoptr i32 %r to <4 x float>* 2917 store <4 x float> %v , <4 x float>* %s 2918 ret void 2919} 2920 2921define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2922; CHECK-LABEL: store_v4f32_with_folded_gep_offset: 2923; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> () 2924; CHECK-NEXT: # %bb.0: 2925; CHECK-NEXT: local.get 1 2926; CHECK-NEXT: local.get 0 2927; CHECK-NEXT: v128.store 16 2928; CHECK-NEXT: # fallthrough-return 2929 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 2930 store <4 x float> %v , <4 x float>* %s 2931 ret void 2932} 2933 2934define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) { 2935; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: 2936; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> () 2937; CHECK-NEXT: # %bb.0: 2938; CHECK-NEXT: local.get 1 2939; CHECK-NEXT: i32.const -16 2940; CHECK-NEXT: i32.add 2941; CHECK-NEXT: local.get 0 2942; CHECK-NEXT: v128.store 0 2943; CHECK-NEXT: # fallthrough-return 2944 %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 2945 store <4 x float> %v , <4 x float>* %s 2946 ret void 2947} 2948 2949define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) { 2950; CHECK-LABEL: store_v4f32_with_unfolded_offset: 2951; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> () 2952; CHECK-NEXT: # %bb.0: 2953; CHECK-NEXT: local.get 1 2954; CHECK-NEXT: i32.const 16 2955; CHECK-NEXT: i32.add 2956; CHECK-NEXT: local.get 0 2957; CHECK-NEXT: v128.store 0 2958; CHECK-NEXT: # fallthrough-return 2959 %q = ptrtoint <4 x float>* %p to i32 2960 %r = add nsw i32 %q, 16 2961 %s = inttoptr i32 %r to <4 x float>* 2962 store <4 x float> %v , <4 x float>* %s 2963 ret void 2964} 2965 2966define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) { 2967; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: 2968; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> () 2969; CHECK-NEXT: # %bb.0: 2970; CHECK-NEXT: local.get 1 2971; CHECK-NEXT: i32.const 16 2972; CHECK-NEXT: i32.add 2973; CHECK-NEXT: local.get 0 2974; CHECK-NEXT: v128.store 0 2975; CHECK-NEXT: # fallthrough-return 2976 %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 2977 store <4 x float> %v , <4 x float>* %s 2978 ret void 2979} 2980 2981define void @store_v4f32_to_numeric_address(<4 x float> %v) { 2982; CHECK-LABEL: store_v4f32_to_numeric_address: 2983; CHECK: .functype store_v4f32_to_numeric_address (v128) -> () 2984; CHECK-NEXT: # %bb.0: 2985; CHECK-NEXT: i32.const 0 2986; CHECK-NEXT: local.get 0 2987; CHECK-NEXT: v128.store 32 2988; CHECK-NEXT: # fallthrough-return 2989 %s = inttoptr i32 32 to <4 x float>* 2990 store <4 x float> %v , <4 x float>* %s 2991 ret void 2992} 2993 2994define void @store_v4f32_to_global_address(<4 x float> %v) { 2995; CHECK-LABEL: store_v4f32_to_global_address: 2996; CHECK: .functype store_v4f32_to_global_address (v128) -> () 2997; CHECK-NEXT: # %bb.0: 2998; CHECK-NEXT: i32.const 0 2999; CHECK-NEXT: local.get 0 3000; CHECK-NEXT: v128.store gv_v4f32 3001; CHECK-NEXT: # fallthrough-return 3002 store <4 x float> %v , <4 x float>* @gv_v4f32 3003 ret void 3004} 3005 3006; ============================================================================== 3007; 2 x double 3008; ============================================================================== 3009define <2 x double> @load_v2f64(<2 x double>* %p) { 3010; CHECK-LABEL: load_v2f64: 3011; CHECK: .functype load_v2f64 (i32) -> (v128) 3012; CHECK-NEXT: # %bb.0: 3013; CHECK-NEXT: local.get 0 3014; CHECK-NEXT: v128.load 0 3015; CHECK-NEXT: # fallthrough-return 3016 %v = load <2 x double>, <2 x double>* %p 3017 ret <2 x double> %v 3018} 3019 3020define <2 x double> @load_splat_v2f64(double* %p) { 3021; CHECK-LABEL: load_splat_v2f64: 3022; CHECK: .functype load_splat_v2f64 (i32) -> (v128) 3023; CHECK-NEXT: # %bb.0: 3024; CHECK-NEXT: local.get 0 3025; CHECK-NEXT: v128.load64_splat 0 3026; CHECK-NEXT: # fallthrough-return 3027 %e = load double, double* %p 3028 %v1 = insertelement <2 x double> undef, double %e, i32 0 3029 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3030 ret <2 x double> %v2 3031} 3032 3033define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { 3034; CHECK-LABEL: load_v2f64_with_folded_offset: 3035; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128) 3036; CHECK-NEXT: # %bb.0: 3037; CHECK-NEXT: local.get 0 3038; CHECK-NEXT: v128.load 16 3039; CHECK-NEXT: # fallthrough-return 3040 %q = ptrtoint <2 x double>* %p to i32 3041 %r = add nuw i32 %q, 16 3042 %s = inttoptr i32 %r to <2 x double>* 3043 %v = load <2 x double>, <2 x double>* %s 3044 ret <2 x double> %v 3045} 3046 3047define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { 3048; CHECK-LABEL: load_splat_v2f64_with_folded_offset: 3049; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) 3050; CHECK-NEXT: # %bb.0: 3051; CHECK-NEXT: local.get 0 3052; CHECK-NEXT: v128.load64_splat 16 3053; CHECK-NEXT: # fallthrough-return 3054 %q = ptrtoint double* %p to i32 3055 %r = add nuw i32 %q, 16 3056 %s = inttoptr i32 %r to double* 3057 %e = load double, double* %s 3058 %v1 = insertelement <2 x double> undef, double %e, i32 0 3059 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3060 ret <2 x double> %v2 3061} 3062 3063define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) { 3064; CHECK-LABEL: load_v2f64_with_folded_gep_offset: 3065; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128) 3066; CHECK-NEXT: # %bb.0: 3067; CHECK-NEXT: local.get 0 3068; CHECK-NEXT: v128.load 16 3069; CHECK-NEXT: # fallthrough-return 3070 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 3071 %v = load <2 x double>, <2 x double>* %s 3072 ret <2 x double> %v 3073} 3074 3075define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { 3076; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: 3077; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) 3078; CHECK-NEXT: # %bb.0: 3079; CHECK-NEXT: local.get 0 3080; CHECK-NEXT: v128.load64_splat 8 3081; CHECK-NEXT: # fallthrough-return 3082 %s = getelementptr inbounds double, double* %p, i32 1 3083 %e = load double, double* %s 3084 %v1 = insertelement <2 x double> undef, double %e, i32 0 3085 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3086 ret <2 x double> %v2 3087} 3088 3089define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) { 3090; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: 3091; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3092; CHECK-NEXT: # %bb.0: 3093; CHECK-NEXT: local.get 0 3094; CHECK-NEXT: i32.const -16 3095; CHECK-NEXT: i32.add 3096; CHECK-NEXT: v128.load 0 3097; CHECK-NEXT: # fallthrough-return 3098 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 3099 %v = load <2 x double>, <2 x double>* %s 3100 ret <2 x double> %v 3101} 3102 3103define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) { 3104; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: 3105; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3106; CHECK-NEXT: # %bb.0: 3107; CHECK-NEXT: local.get 0 3108; CHECK-NEXT: i32.const -8 3109; CHECK-NEXT: i32.add 3110; CHECK-NEXT: v128.load64_splat 0 3111; CHECK-NEXT: # fallthrough-return 3112 %s = getelementptr inbounds double, double* %p, i32 -1 3113 %e = load double, double* %s 3114 %v1 = insertelement <2 x double> undef, double %e, i32 0 3115 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3116 ret <2 x double> %v2 3117} 3118 3119define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { 3120; CHECK-LABEL: load_v2f64_with_unfolded_offset: 3121; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128) 3122; CHECK-NEXT: # %bb.0: 3123; CHECK-NEXT: local.get 0 3124; CHECK-NEXT: i32.const 16 3125; CHECK-NEXT: i32.add 3126; CHECK-NEXT: v128.load 0 3127; CHECK-NEXT: # fallthrough-return 3128 %q = ptrtoint <2 x double>* %p to i32 3129 %r = add nsw i32 %q, 16 3130 %s = inttoptr i32 %r to <2 x double>* 3131 %v = load <2 x double>, <2 x double>* %s 3132 ret <2 x double> %v 3133} 3134 3135define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { 3136; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: 3137; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128) 3138; CHECK-NEXT: # %bb.0: 3139; CHECK-NEXT: local.get 0 3140; CHECK-NEXT: i32.const 16 3141; CHECK-NEXT: i32.add 3142; CHECK-NEXT: v128.load64_splat 0 3143; CHECK-NEXT: # fallthrough-return 3144 %q = ptrtoint double* %p to i32 3145 %r = add nsw i32 %q, 16 3146 %s = inttoptr i32 %r to double* 3147 %e = load double, double* %s 3148 %v1 = insertelement <2 x double> undef, double %e, i32 0 3149 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3150 ret <2 x double> %v2 3151} 3152 3153define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) { 3154; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: 3155; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3156; CHECK-NEXT: # %bb.0: 3157; CHECK-NEXT: local.get 0 3158; CHECK-NEXT: i32.const 16 3159; CHECK-NEXT: i32.add 3160; CHECK-NEXT: v128.load 0 3161; CHECK-NEXT: # fallthrough-return 3162 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3163 %v = load <2 x double>, <2 x double>* %s 3164 ret <2 x double> %v 3165} 3166 3167define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { 3168; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: 3169; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3170; CHECK-NEXT: # %bb.0: 3171; CHECK-NEXT: local.get 0 3172; CHECK-NEXT: i32.const 8 3173; CHECK-NEXT: i32.add 3174; CHECK-NEXT: v128.load64_splat 0 3175; CHECK-NEXT: # fallthrough-return 3176 %s = getelementptr double, double* %p, i32 1 3177 %e = load double, double* %s 3178 %v1 = insertelement <2 x double> undef, double %e, i32 0 3179 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3180 ret <2 x double> %v2 3181} 3182 3183define <2 x double> @load_v2f64_from_numeric_address() { 3184; CHECK-LABEL: load_v2f64_from_numeric_address: 3185; CHECK: .functype load_v2f64_from_numeric_address () -> (v128) 3186; CHECK-NEXT: # %bb.0: 3187; CHECK-NEXT: i32.const 0 3188; CHECK-NEXT: v128.load 32 3189; CHECK-NEXT: # fallthrough-return 3190 %s = inttoptr i32 32 to <2 x double>* 3191 %v = load <2 x double>, <2 x double>* %s 3192 ret <2 x double> %v 3193} 3194 3195define <2 x double> @load_splat_v2f64_from_numeric_address() { 3196; CHECK-LABEL: load_splat_v2f64_from_numeric_address: 3197; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) 3198; CHECK-NEXT: # %bb.0: 3199; CHECK-NEXT: i32.const 0 3200; CHECK-NEXT: v128.load64_splat 32 3201; CHECK-NEXT: # fallthrough-return 3202 %s = inttoptr i32 32 to double* 3203 %e = load double, double* %s 3204 %v1 = insertelement <2 x double> undef, double %e, i32 0 3205 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3206 ret <2 x double> %v2 3207} 3208 3209@gv_v2f64 = global <2 x double> <double 42., double 42.> 3210define <2 x double> @load_v2f64_from_global_address() { 3211; CHECK-LABEL: load_v2f64_from_global_address: 3212; CHECK: .functype load_v2f64_from_global_address () -> (v128) 3213; CHECK-NEXT: # %bb.0: 3214; CHECK-NEXT: i32.const 0 3215; CHECK-NEXT: v128.load gv_v2f64 3216; CHECK-NEXT: # fallthrough-return 3217 %v = load <2 x double>, <2 x double>* @gv_v2f64 3218 ret <2 x double> %v 3219} 3220 3221@gv_f64 = global double 42. 3222define <2 x double> @load_splat_v2f64_from_global_address() { 3223; CHECK-LABEL: load_splat_v2f64_from_global_address: 3224; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) 3225; CHECK-NEXT: # %bb.0: 3226; CHECK-NEXT: i32.const 0 3227; CHECK-NEXT: v128.load64_splat gv_f64 3228; CHECK-NEXT: # fallthrough-return 3229 %e = load double, double* @gv_f64 3230 %v1 = insertelement <2 x double> undef, double %e, i32 0 3231 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3232 ret <2 x double> %v2 3233} 3234 3235define void @store_v2f64(<2 x double> %v, <2 x double>* %p) { 3236; CHECK-LABEL: store_v2f64: 3237; CHECK: .functype store_v2f64 (v128, i32) -> () 3238; CHECK-NEXT: # %bb.0: 3239; CHECK-NEXT: local.get 1 3240; CHECK-NEXT: local.get 0 3241; CHECK-NEXT: v128.store 0 3242; CHECK-NEXT: # fallthrough-return 3243 store <2 x double> %v , <2 x double>* %p 3244 ret void 3245} 3246 3247define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { 3248; CHECK-LABEL: store_v2f64_with_folded_offset: 3249; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> () 3250; CHECK-NEXT: # %bb.0: 3251; CHECK-NEXT: local.get 1 3252; CHECK-NEXT: local.get 0 3253; CHECK-NEXT: v128.store 16 3254; CHECK-NEXT: # fallthrough-return 3255 %q = ptrtoint <2 x double>* %p to i32 3256 %r = add nuw i32 %q, 16 3257 %s = inttoptr i32 %r to <2 x double>* 3258 store <2 x double> %v , <2 x double>* %s 3259 ret void 3260} 3261 3262define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3263; CHECK-LABEL: store_v2f64_with_folded_gep_offset: 3264; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> () 3265; CHECK-NEXT: # %bb.0: 3266; CHECK-NEXT: local.get 1 3267; CHECK-NEXT: local.get 0 3268; CHECK-NEXT: v128.store 16 3269; CHECK-NEXT: # fallthrough-return 3270 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 3271 store <2 x double> %v , <2 x double>* %s 3272 ret void 3273} 3274 3275define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) { 3276; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: 3277; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> () 3278; CHECK-NEXT: # %bb.0: 3279; CHECK-NEXT: local.get 1 3280; CHECK-NEXT: i32.const -16 3281; CHECK-NEXT: i32.add 3282; CHECK-NEXT: local.get 0 3283; CHECK-NEXT: v128.store 0 3284; CHECK-NEXT: # fallthrough-return 3285 %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 3286 store <2 x double> %v , <2 x double>* %s 3287 ret void 3288} 3289 3290define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) { 3291; CHECK-LABEL: store_v2f64_with_unfolded_offset: 3292; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> () 3293; CHECK-NEXT: # %bb.0: 3294; CHECK-NEXT: local.get 1 3295; CHECK-NEXT: i32.const 16 3296; CHECK-NEXT: i32.add 3297; CHECK-NEXT: local.get 0 3298; CHECK-NEXT: v128.store 0 3299; CHECK-NEXT: # fallthrough-return 3300 %q = ptrtoint <2 x double>* %p to i32 3301 %r = add nsw i32 %q, 16 3302 %s = inttoptr i32 %r to <2 x double>* 3303 store <2 x double> %v , <2 x double>* %s 3304 ret void 3305} 3306 3307define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) { 3308; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: 3309; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> () 3310; CHECK-NEXT: # %bb.0: 3311; CHECK-NEXT: local.get 1 3312; CHECK-NEXT: i32.const 16 3313; CHECK-NEXT: i32.add 3314; CHECK-NEXT: local.get 0 3315; CHECK-NEXT: v128.store 0 3316; CHECK-NEXT: # fallthrough-return 3317 %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 3318 store <2 x double> %v , <2 x double>* %s 3319 ret void 3320} 3321 3322define void @store_v2f64_to_numeric_address(<2 x double> %v) { 3323; CHECK-LABEL: store_v2f64_to_numeric_address: 3324; CHECK: .functype store_v2f64_to_numeric_address (v128) -> () 3325; CHECK-NEXT: # %bb.0: 3326; CHECK-NEXT: i32.const 0 3327; CHECK-NEXT: local.get 0 3328; CHECK-NEXT: v128.store 32 3329; CHECK-NEXT: # fallthrough-return 3330 %s = inttoptr i32 32 to <2 x double>* 3331 store <2 x double> %v , <2 x double>* %s 3332 ret void 3333} 3334 3335define void @store_v2f64_to_global_address(<2 x double> %v) { 3336; CHECK-LABEL: store_v2f64_to_global_address: 3337; CHECK: .functype store_v2f64_to_global_address (v128) -> () 3338; CHECK-NEXT: # %bb.0: 3339; CHECK-NEXT: i32.const 0 3340; CHECK-NEXT: local.get 0 3341; CHECK-NEXT: v128.store gv_v2f64 3342; CHECK-NEXT: # fallthrough-return 3343 store <2 x double> %v , <2 x double>* @gv_v2f64 3344 ret void 3345} 3346