1f31811f2SFangrui Song; RUN: opt -mtriple=amdgcn-amd-amdhsa -basic-aa -load-store-vectorizer -S -o - %s | FileCheck %s 2cee313d2SEric Christopher 3cee313d2SEric Christophertarget datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 4cee313d2SEric Christopher 5cee313d2SEric Christopherdeclare i32 @llvm.amdgcn.workitem.id.x() #1 6cee313d2SEric Christopher 7cee313d2SEric Christopher; CHECK-LABEL: @merge_v2p1i8( 8cee313d2SEric Christopher; CHECK: load <2 x i64> 9cee313d2SEric Christopher; CHECK: inttoptr i64 %{{[^ ]+}} to i8 addrspace(1)* 10cee313d2SEric Christopher; CHECK: inttoptr i64 %{{[^ ]+}} to i8 addrspace(1)* 11cee313d2SEric Christopher; CHECK: store <2 x i64> zeroinitializer 12cee313d2SEric Christopherdefine amdgpu_kernel void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 { 13cee313d2SEric Christopherentry: 14cee313d2SEric Christopher %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 15cee313d2SEric Christopher %b.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, i64 1 16cee313d2SEric Christopher 17cee313d2SEric Christopher %ld.c = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, align 4 18cee313d2SEric Christopher %ld.c.idx.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b.1, align 4 19cee313d2SEric Christopher 20cee313d2SEric Christopher store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a, align 4 21cee313d2SEric Christopher store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a.1, align 4 22cee313d2SEric Christopher 23cee313d2SEric Christopher ret void 24cee313d2SEric Christopher} 25cee313d2SEric Christopher 26cee313d2SEric Christopher; CHECK-LABEL: @merge_v2p3i8( 27cee313d2SEric Christopher; CHECK: load <2 x i32> 28cee313d2SEric Christopher; CHECK: inttoptr i32 %{{[^ ]+}} to i8 addrspace(3)* 29cee313d2SEric Christopher; CHECK: inttoptr i32 %{{[^ ]+}} to i8 addrspace(3)* 30cee313d2SEric Christopher; CHECK: store <2 x i32> zeroinitializer 31cee313d2SEric Christopherdefine amdgpu_kernel void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 { 32cee313d2SEric Christopherentry: 33cee313d2SEric Christopher %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i64 1 34cee313d2SEric Christopher %b.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, i64 1 35cee313d2SEric Christopher 36cee313d2SEric Christopher %ld.c = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, align 4 37cee313d2SEric Christopher %ld.c.idx.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b.1, align 4 38cee313d2SEric Christopher 39cee313d2SEric Christopher store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a, align 4 40cee313d2SEric Christopher store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a.1, align 4 41cee313d2SEric Christopher 42cee313d2SEric Christopher ret void 43cee313d2SEric Christopher} 44cee313d2SEric Christopher 45cee313d2SEric Christopher; CHECK-LABEL: @merge_load_i64_ptr64( 46cee313d2SEric Christopher; CHECK: load <2 x i64> 47cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 48cee313d2SEric Christopher; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)* 49cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 { 50cee313d2SEric Christopherentry: 51cee313d2SEric Christopher %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 52cee313d2SEric Christopher %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)* 53cee313d2SEric Christopher 54cee313d2SEric Christopher %ld.0 = load i64, i64 addrspace(1)* %a 55cee313d2SEric Christopher %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast 56cee313d2SEric Christopher 57cee313d2SEric Christopher ret void 58cee313d2SEric Christopher} 59cee313d2SEric Christopher 60cee313d2SEric Christopher; CHECK-LABEL: @merge_load_ptr64_i64( 61cee313d2SEric Christopher; CHECK: load <2 x i64> 62cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0 63cee313d2SEric Christopher; CHECK: inttoptr i64 [[ELT0]] to i8 addrspace(1)* 64cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 { 65cee313d2SEric Christopherentry: 66cee313d2SEric Christopher %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* 67cee313d2SEric Christopher %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 68cee313d2SEric Christopher 69cee313d2SEric Christopher %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast 70cee313d2SEric Christopher %ld.1 = load i64, i64 addrspace(1)* %a.1 71cee313d2SEric Christopher 72cee313d2SEric Christopher ret void 73cee313d2SEric Christopher} 74cee313d2SEric Christopher 75cee313d2SEric Christopher; CHECK-LABEL: @merge_store_ptr64_i64( 76cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64 77*cf284f6cShyeongyu kim; CHECK: insertelement <2 x i64> poison, i64 [[ELT0]], i32 0 78cee313d2SEric Christopher; CHECK: store <2 x i64> 79cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 { 80cee313d2SEric Christopherentry: 81cee313d2SEric Christopher %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* 82cee313d2SEric Christopher %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 83cee313d2SEric Christopher 84cee313d2SEric Christopher 85cee313d2SEric Christopher store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast 86cee313d2SEric Christopher store i64 %val1, i64 addrspace(1)* %a.1 87cee313d2SEric Christopher 88cee313d2SEric Christopher ret void 89cee313d2SEric Christopher} 90cee313d2SEric Christopher 91cee313d2SEric Christopher; CHECK-LABEL: @merge_store_i64_ptr64( 92cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64 93cee313d2SEric Christopher; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1]], i32 1 94cee313d2SEric Christopher; CHECK: store <2 x i64> 95cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 { 96cee313d2SEric Christopherentry: 97cee313d2SEric Christopher %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 98cee313d2SEric Christopher %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to i64 addrspace(1)* 99cee313d2SEric Christopher 100cee313d2SEric Christopher store i64 %val0, i64 addrspace(1)* %a.cast 101cee313d2SEric Christopher store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1 102cee313d2SEric Christopher 103cee313d2SEric Christopher ret void 104cee313d2SEric Christopher} 105cee313d2SEric Christopher 106cee313d2SEric Christopher; CHECK-LABEL: @merge_load_i32_ptr32( 107cee313d2SEric Christopher; CHECK: load <2 x i32> 108cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 1 109cee313d2SEric Christopher; CHECK: inttoptr i32 [[ELT1]] to i8 addrspace(3)* 110cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 { 111cee313d2SEric Christopherentry: 112cee313d2SEric Christopher %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 113cee313d2SEric Christopher %a.1.cast = bitcast i32 addrspace(3)* %a.1 to i8 addrspace(3)* addrspace(3)* 114cee313d2SEric Christopher 115cee313d2SEric Christopher %ld.0 = load i32, i32 addrspace(3)* %a 116cee313d2SEric Christopher %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.1.cast 117cee313d2SEric Christopher 118cee313d2SEric Christopher ret void 119cee313d2SEric Christopher} 120cee313d2SEric Christopher 121cee313d2SEric Christopher; CHECK-LABEL: @merge_load_ptr32_i32( 122cee313d2SEric Christopher; CHECK: load <2 x i32> 123cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 0 124cee313d2SEric Christopher; CHECK: inttoptr i32 [[ELT0]] to i8 addrspace(3)* 125cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 { 126cee313d2SEric Christopherentry: 127cee313d2SEric Christopher %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)* 128cee313d2SEric Christopher %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 129cee313d2SEric Christopher 130cee313d2SEric Christopher %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.cast 131cee313d2SEric Christopher %ld.1 = load i32, i32 addrspace(3)* %a.1 132cee313d2SEric Christopher 133cee313d2SEric Christopher ret void 134cee313d2SEric Christopher} 135cee313d2SEric Christopher 136cee313d2SEric Christopher; CHECK-LABEL: @merge_store_ptr32_i32( 137cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32 138*cf284f6cShyeongyu kim; CHECK: insertelement <2 x i32> poison, i32 [[ELT0]], i32 0 139cee313d2SEric Christopher; CHECK: store <2 x i32> 140cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 { 141cee313d2SEric Christopherentry: 142cee313d2SEric Christopher %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)* 143cee313d2SEric Christopher %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 144cee313d2SEric Christopher 145cee313d2SEric Christopher store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(3)* %a.cast 146cee313d2SEric Christopher store i32 %val1, i32 addrspace(3)* %a.1 147cee313d2SEric Christopher 148cee313d2SEric Christopher ret void 149cee313d2SEric Christopher} 150cee313d2SEric Christopher 151cee313d2SEric Christopher; CHECK-LABEL: @merge_store_i32_ptr32( 152cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr1 to i32 153cee313d2SEric Christopher; CHECK: insertelement <2 x i32> %{{[^ ]+}}, i32 [[ELT1]], i32 1 154cee313d2SEric Christopher; CHECK: store <2 x i32> 155cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 { 156cee313d2SEric Christopherentry: 157cee313d2SEric Christopher %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i32 1 158cee313d2SEric Christopher %a.cast = bitcast i8 addrspace(3)* addrspace(3)* %a to i32 addrspace(3)* 159cee313d2SEric Christopher 160cee313d2SEric Christopher store i32 %val0, i32 addrspace(3)* %a.cast 161cee313d2SEric Christopher store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(3)* %a.1 162cee313d2SEric Christopher 163cee313d2SEric Christopher ret void 164cee313d2SEric Christopher} 165cee313d2SEric Christopher 166cee313d2SEric Christopher; CHECK-LABEL: @no_merge_store_ptr32_i64( 167cee313d2SEric Christopher; CHECK: store i8 addrspace(3)* 168cee313d2SEric Christopher; CHECK: store i64 169cee313d2SEric Christopherdefine amdgpu_kernel void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 { 170cee313d2SEric Christopherentry: 171cee313d2SEric Christopher %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)* 172cee313d2SEric Christopher %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 173cee313d2SEric Christopher 174cee313d2SEric Christopher 175cee313d2SEric Christopher store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(1)* %a.cast 176cee313d2SEric Christopher store i64 %val1, i64 addrspace(1)* %a.1 177cee313d2SEric Christopher 178cee313d2SEric Christopher ret void 179cee313d2SEric Christopher} 180cee313d2SEric Christopher 181cee313d2SEric Christopher; CHECK-LABEL: @no_merge_store_i64_ptr32( 182cee313d2SEric Christopher; CHECK: store i64 183cee313d2SEric Christopher; CHECK: store i8 addrspace(3)* 184cee313d2SEric Christopherdefine amdgpu_kernel void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 { 185cee313d2SEric Christopherentry: 186cee313d2SEric Christopher %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a, i64 1 187cee313d2SEric Christopher %a.cast = bitcast i8 addrspace(3)* addrspace(1)* %a to i64 addrspace(1)* 188cee313d2SEric Christopher 189cee313d2SEric Christopher store i64 %val0, i64 addrspace(1)* %a.cast 190cee313d2SEric Christopher store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(1)* %a.1 191cee313d2SEric Christopher 192cee313d2SEric Christopher ret void 193cee313d2SEric Christopher} 194cee313d2SEric Christopher 195cee313d2SEric Christopher; CHECK-LABEL: @no_merge_load_i64_ptr32( 196cee313d2SEric Christopher; CHECK: load i64, 197cee313d2SEric Christopher; CHECK: load i8 addrspace(3)*, 198cee313d2SEric Christopherdefine amdgpu_kernel void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 { 199cee313d2SEric Christopherentry: 200cee313d2SEric Christopher %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 201cee313d2SEric Christopher %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(3)* addrspace(1)* 202cee313d2SEric Christopher 203cee313d2SEric Christopher %ld.0 = load i64, i64 addrspace(1)* %a 204cee313d2SEric Christopher %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.1.cast 205cee313d2SEric Christopher 206cee313d2SEric Christopher ret void 207cee313d2SEric Christopher} 208cee313d2SEric Christopher 209cee313d2SEric Christopher; CHECK-LABEL: @no_merge_load_ptr32_i64( 210cee313d2SEric Christopher; CHECK: load i8 addrspace(3)*, 211cee313d2SEric Christopher; CHECK: load i64, 212cee313d2SEric Christopherdefine amdgpu_kernel void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 { 213cee313d2SEric Christopherentry: 214cee313d2SEric Christopher %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)* 215cee313d2SEric Christopher %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 216cee313d2SEric Christopher 217cee313d2SEric Christopher %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.cast 218cee313d2SEric Christopher %ld.1 = load i64, i64 addrspace(1)* %a.1 219cee313d2SEric Christopher 220cee313d2SEric Christopher ret void 221cee313d2SEric Christopher} 222cee313d2SEric Christopher 223cee313d2SEric Christopher; XXX - This isn't merged for some reason 224cee313d2SEric Christopher; CHECK-LABEL: @merge_v2p1i8_v2p1i8( 225cee313d2SEric Christopher; CHECK: load <2 x i8 addrspace(1)*> 226cee313d2SEric Christopher; CHECK: load <2 x i8 addrspace(1)*> 227cee313d2SEric Christopher; CHECK: store <2 x i8 addrspace(1)*> 228cee313d2SEric Christopher; CHECK: store <2 x i8 addrspace(1)*> 229cee313d2SEric Christopherdefine amdgpu_kernel void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 { 230cee313d2SEric Christopherentry: 231cee313d2SEric Christopher %a.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %a, i64 1 232cee313d2SEric Christopher %b.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, i64 1 233cee313d2SEric Christopher 234cee313d2SEric Christopher %ld.c = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, align 4 235cee313d2SEric Christopher %ld.c.idx.1 = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b.1, align 4 236cee313d2SEric Christopher 237cee313d2SEric Christopher store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a, align 4 238cee313d2SEric Christopher store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a.1, align 4 239cee313d2SEric Christopher ret void 240cee313d2SEric Christopher} 241cee313d2SEric Christopher 242cee313d2SEric Christopher; CHECK-LABEL: @merge_load_ptr64_f64( 243cee313d2SEric Christopher; CHECK: load <2 x i64> 244cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0 245cee313d2SEric Christopher; CHECK: [[ELT0_INT:%[^ ]+]] = inttoptr i64 [[ELT0]] to i8 addrspace(1)* 246cee313d2SEric Christopher; CHECK: [[ELT1_INT:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 247cee313d2SEric Christopher; CHECK: bitcast i64 [[ELT1_INT]] to double 248cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 { 249cee313d2SEric Christopherentry: 250cee313d2SEric Christopher %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* 251cee313d2SEric Christopher %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 252cee313d2SEric Christopher 253cee313d2SEric Christopher %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast 254cee313d2SEric Christopher %ld.1 = load double, double addrspace(1)* %a.1 255cee313d2SEric Christopher 256cee313d2SEric Christopher ret void 257cee313d2SEric Christopher} 258cee313d2SEric Christopher 259cee313d2SEric Christopher; CHECK-LABEL: @merge_load_f64_ptr64( 260cee313d2SEric Christopher; CHECK: load <2 x i64> 261cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0 262cee313d2SEric Christopher; CHECK: bitcast i64 [[ELT0]] to double 263cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 264cee313d2SEric Christopher; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)* 265cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 { 266cee313d2SEric Christopherentry: 267cee313d2SEric Christopher %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 268cee313d2SEric Christopher %a.1.cast = bitcast double addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)* 269cee313d2SEric Christopher 270cee313d2SEric Christopher %ld.0 = load double, double addrspace(1)* %a 271cee313d2SEric Christopher %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast 272cee313d2SEric Christopher 273cee313d2SEric Christopher ret void 274cee313d2SEric Christopher} 275cee313d2SEric Christopher 276cee313d2SEric Christopher; CHECK-LABEL: @merge_store_ptr64_f64( 277cee313d2SEric Christopher; CHECK: [[ELT0_INT:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64 278*cf284f6cShyeongyu kim; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0 279cee313d2SEric Christopher; CHECK: [[ELT1_INT:%[^ ]+]] = bitcast double %val1 to i64 280cee313d2SEric Christopher; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1 281cee313d2SEric Christopher; CHECK: store <2 x i64> 282cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 { 283cee313d2SEric Christopherentry: 284cee313d2SEric Christopher %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* 285cee313d2SEric Christopher %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 286cee313d2SEric Christopher 287cee313d2SEric Christopher store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast 288cee313d2SEric Christopher store double %val1, double addrspace(1)* %a.1 289cee313d2SEric Christopher 290cee313d2SEric Christopher ret void 291cee313d2SEric Christopher} 292cee313d2SEric Christopher 293cee313d2SEric Christopher; CHECK-LABEL: @merge_store_f64_ptr64( 294cee313d2SEric Christopher; CHECK: [[ELT0_INT:%[^ ]+]] = bitcast double %val0 to i64 295*cf284f6cShyeongyu kim; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0 296cee313d2SEric Christopher; CHECK: [[ELT1_INT:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64 297cee313d2SEric Christopher; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1 298cee313d2SEric Christopher; CHECK: store <2 x i64> 299cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 { 300cee313d2SEric Christopherentry: 301cee313d2SEric Christopher %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 302cee313d2SEric Christopher %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to double addrspace(1)* 303cee313d2SEric Christopher 304cee313d2SEric Christopher store double %val0, double addrspace(1)* %a.cast 305cee313d2SEric Christopher store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1 306cee313d2SEric Christopher 307cee313d2SEric Christopher ret void 308cee313d2SEric Christopher} 309cee313d2SEric Christopher 310cee313d2SEric Christopherattributes #0 = { nounwind } 311cee313d2SEric Christopherattributes #1 = { nounwind readnone } 312