10da6350dSMatt Arsenault; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck %s 2c41e2f6eSYaxun Liu 3c41e2f6eSYaxun Liu%struct.wombat = type { [4 x i32], [4 x i32], [4 x i32] } 4c41e2f6eSYaxun Liu 5c41e2f6eSYaxun Liudefine amdgpu_kernel void @wobble(i8 addrspace(1)* nocapture readonly %arg) #0 !dbg !4 { 6c41e2f6eSYaxun Liubb: 7c41e2f6eSYaxun Liu %tmp = load i32, i32 addrspace(1)* undef, align 4 8c41e2f6eSYaxun Liu %tmp1 = load <4 x float>, <4 x float> addrspace(1)* undef, align 16 9c41e2f6eSYaxun Liu %tmp2 = sext i32 %tmp to i64 10c41e2f6eSYaxun Liu %tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7> 11c41e2f6eSYaxun Liu %tmp4 = call float @barney() #2 12c41e2f6eSYaxun Liu %tmp5 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 0 13c41e2f6eSYaxun Liu %tmp6 = bitcast i8 addrspace(1)* %tmp5 to <2 x float> addrspace(1)* 14c41e2f6eSYaxun Liu %tmp7 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 0 15c41e2f6eSYaxun Liu %tmp8 = bitcast i8 addrspace(1)* %tmp7 to %struct.wombat addrspace(1)* 16c41e2f6eSYaxun Liu %tmp9 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(1)* %tmp8, i64 %tmp2, i32 2, i64 0 17c41e2f6eSYaxun Liu %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4 18c41e2f6eSYaxun Liu %tmp11 = sext i32 %tmp10 to i64 19c41e2f6eSYaxun Liu %tmp12 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp6, i64 %tmp11 20c41e2f6eSYaxun Liu %tmp13 = bitcast <2 x float> addrspace(1)* %tmp12 to i64 addrspace(1)* 21c41e2f6eSYaxun Liu %tmp14 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 undef 22c41e2f6eSYaxun Liu %tmp15 = bitcast i8 addrspace(1)* %tmp14 to <4 x float> addrspace(1)* 23c41e2f6eSYaxun Liu %tmp16 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %tmp15, i64 undef 24c41e2f6eSYaxun Liu %tmp17 = load <4 x float>, <4 x float> addrspace(1)* %tmp16, align 16 25672ad326SSanjay Patel %tmp18 = fsub <4 x float> %tmp17, %tmp17 26672ad326SSanjay Patel %ext = extractelement <4 x float> %tmp18, i32 1 27672ad326SSanjay Patel %tmp19 = fadd float %ext, 0.000000e+00 28c41e2f6eSYaxun Liu %tmp20 = fcmp oeq float %tmp19, 0.000000e+00 29c41e2f6eSYaxun Liu br i1 %tmp20, label %bb21, label %bb25 30c41e2f6eSYaxun Liu 31c41e2f6eSYaxun Liubb21: ; preds = %bb 32672ad326SSanjay Patel %tmp22 = fmul <4 x float> %tmp18, %tmp18 33672ad326SSanjay Patel %tmp23 = fadd <4 x float> %tmp22, %tmp22 34672ad326SSanjay Patel %tmp24 = fmul <4 x float> %tmp23, %tmp23 35c41e2f6eSYaxun Liu br label %bb28 36c41e2f6eSYaxun Liu 37c41e2f6eSYaxun Liubb25: ; preds = %bb 38c41e2f6eSYaxun Liu %tmp26 = insertelement <4 x float> undef, float 0.000000e+00, i32 1 39c41e2f6eSYaxun Liu %tmp27 = insertelement <4 x float> %tmp26, float undef, i32 2 40c41e2f6eSYaxun Liu br label %bb28 41c41e2f6eSYaxun Liu 42c41e2f6eSYaxun Liubb28: ; preds = %bb25, %bb21 43c41e2f6eSYaxun Liu %tmp29 = phi <4 x float> [ %tmp27, %bb25 ], [ %tmp24, %bb21 ] 44c41e2f6eSYaxun Liu store <4 x float> %tmp29, <4 x float> addrspace(5)* undef, align 16 45c41e2f6eSYaxun Liu %tmp30 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(1)* %tmp8, i64 %tmp2, i32 2, i64 2 46c41e2f6eSYaxun Liu %tmp31 = load i32, i32 addrspace(1)* %tmp30, align 4 47c41e2f6eSYaxun Liu %tmp32 = sext i32 %tmp31 to i64 48c41e2f6eSYaxun Liu %tmp33 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp6, i64 %tmp32 49c41e2f6eSYaxun Liu %tmp34 = bitcast <2 x float> addrspace(1)* %tmp33 to i64 addrspace(1)* 50c41e2f6eSYaxun Liu %tmp35 = load i64, i64 addrspace(1)* %tmp34, align 8 51c41e2f6eSYaxun Liu %tmp36 = load i32, i32 addrspace(1)* undef, align 4 52c41e2f6eSYaxun Liu %tmp37 = sext i32 %tmp36 to i64 53c41e2f6eSYaxun Liu %tmp38 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* null, i64 %tmp37 54c41e2f6eSYaxun Liu %tmp39 = load <4 x float>, <4 x float> addrspace(1)* %tmp38, align 16 55c41e2f6eSYaxun Liu %tmp40 = load <4 x float>, <4 x float> addrspace(1)* undef, align 16 56c41e2f6eSYaxun Liu %tmp41 = fsub <4 x float> zeroinitializer, %tmp40 57c41e2f6eSYaxun Liu %tmp42 = fsub <4 x float> %tmp39, %tmp40 58c41e2f6eSYaxun Liu %tmp43 = extractelement <4 x float> %tmp40, i32 1 59c41e2f6eSYaxun Liu %tmp44 = fsub float %tmp43, undef 60c41e2f6eSYaxun Liu %tmp45 = fadd float undef, undef 61c41e2f6eSYaxun Liu %tmp46 = fdiv float %tmp44, %tmp45 62c41e2f6eSYaxun Liu %tmp47 = insertelement <4 x float> undef, float %tmp46, i32 0 63c41e2f6eSYaxun Liu %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> undef, <4 x i32> zeroinitializer 64c41e2f6eSYaxun Liu %tmp49 = fsub <4 x float> %tmp48, %tmp40 65c41e2f6eSYaxun Liu %tmp50 = extractelement <4 x float> %tmp41, i32 1 66c41e2f6eSYaxun Liu %tmp51 = extractelement <4 x float> %tmp42, i32 2 67c41e2f6eSYaxun Liu %tmp52 = fmul float undef, undef 68c41e2f6eSYaxun Liu %tmp53 = fadd float %tmp52, undef 69c41e2f6eSYaxun Liu %tmp54 = fadd float %tmp51, %tmp53 70c41e2f6eSYaxun Liu %tmp55 = extractelement <4 x float> %tmp49, i32 1 71c41e2f6eSYaxun Liu %tmp56 = fmul float %tmp55, %tmp50 72c41e2f6eSYaxun Liu %tmp57 = fmul float %tmp54, %tmp56 73c41e2f6eSYaxun Liu %tmp58 = fdiv float %tmp57, 0.000000e+00 74*808dc6f8SMatt Arsenault ; Make sure this isn't double emitted 75*808dc6f8SMatt Arsenault ; CHECK-NOT: ;DEBUG_VALUE: 76c41e2f6eSYaxun Liu ; CHECK: ;DEBUG_VALUE: foo:var <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef] 77*808dc6f8SMatt Arsenault ; CHECK-NOT: ;DEBUG_VALUE: 78c41e2f6eSYaxun Liu call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5 79c41e2f6eSYaxun Liu %tmp59 = bitcast i64 %tmp35 to <2 x float> 80c41e2f6eSYaxun Liu %tmp60 = insertelement <2 x float> undef, float %tmp58, i32 0 81c41e2f6eSYaxun Liu %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer 82c41e2f6eSYaxun Liu %tmp62 = fmul <2 x float> %tmp61, undef 83c41e2f6eSYaxun Liu %tmp63 = fsub <2 x float> %tmp62, %tmp59 84c41e2f6eSYaxun Liu %tmp64 = extractelement <2 x float> %tmp63, i64 0 85c41e2f6eSYaxun Liu call void @eggs(float %tmp64) #2 86c41e2f6eSYaxun Liu store <2 x float> %tmp3, <2 x float> addrspace(1)* undef, align 8 87c41e2f6eSYaxun Liu store float 0.000000e+00, float addrspace(1)* undef, align 4 88c41e2f6eSYaxun Liu ret void 89c41e2f6eSYaxun Liu} 90c41e2f6eSYaxun Liu 91c41e2f6eSYaxun Liudeclare float @barney() #2 92c41e2f6eSYaxun Liudeclare void @eggs(float) #2 93c41e2f6eSYaxun Liudeclare void @llvm.dbg.value(metadata, metadata, metadata) #1 94c41e2f6eSYaxun Liu 955660bb6bSMatt Arsenaultattributes #0 = { convergent nounwind "target-cpu"="gfx900" } 96c41e2f6eSYaxun Liuattributes #1 = { nounwind readnone speculatable } 97c41e2f6eSYaxun Liuattributes #2 = { nounwind } 98c41e2f6eSYaxun Liu 99c41e2f6eSYaxun Liu!llvm.dbg.cu = !{!0} 100c41e2f6eSYaxun Liu!llvm.module.flags = !{!2} 101c41e2f6eSYaxun Liu 102c41e2f6eSYaxun Liu!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) 103c41e2f6eSYaxun Liu!1 = !DIFile(filename: "foo.cl", directory: "/tmp") 104c41e2f6eSYaxun Liu!2 = !{i32 2, !"Debug Info Version", i32 3} 105c41e2f6eSYaxun Liu!3 = !DILocalVariable(name: "var", arg: 8, scope: !4) 106c41e2f6eSYaxun Liu!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, type: !12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !0) 107c41e2f6eSYaxun Liu!5 = !DILocation(line: 69, scope: !4) 108c41e2f6eSYaxun Liu!12 = !DISubroutineType(types: !13) 109c41e2f6eSYaxun Liu!13 = !{null, !14} 110c41e2f6eSYaxun Liu!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) 111