1; Test that floating-point strict compares are omitted if CC already has the 2; right value. 3; 4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ 5; RUN: -enable-misched=0 -no-integrated-as | FileCheck %s 6; 7; We need -enable-misched=0 to make sure f12 and following routines really 8; test the compare elimination pass. 9 10 11declare float @llvm.fabs.f32(float %f) 12 13; Test addition followed by EQ, which can use the CC result of the addition. 14define float @f1(float %a, float %b, float *%dest) #0 { 15; CHECK-LABEL: f1: 16; CHECK: aebr %f0, %f2 17; CHECK-NEXT: ber %r14 18; CHECK: br %r14 19entry: 20 %res = call float @llvm.experimental.constrained.fadd.f32( 21 float %a, float %b, 22 metadata !"round.dynamic", 23 metadata !"fpexcept.strict") #0 24 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 25 float %res, float 0.0, 26 metadata !"oeq", 27 metadata !"fpexcept.strict") #0 28 br i1 %cmp, label %exit, label %store 29 30store: 31 store float %b, float *%dest 32 br label %exit 33 34exit: 35 ret float %res 36} 37 38; ...and again with LT. 39define float @f2(float %a, float %b, float *%dest) #0 { 40; CHECK-LABEL: f2: 41; CHECK: aebr %f0, %f2 42; CHECK-NEXT: blr %r14 43; CHECK: br %r14 44entry: 45 %res = call float @llvm.experimental.constrained.fadd.f32( 46 float %a, float %b, 47 metadata !"round.dynamic", 48 metadata !"fpexcept.strict") #0 49 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 50 float %res, float 0.0, 51 metadata !"olt", 52 metadata !"fpexcept.strict") #0 53 br i1 %cmp, label %exit, label %store 54 55store: 56 store float %b, float *%dest 57 br label %exit 58 59exit: 60 ret float %res 61} 62 63; ...and again with GT. 64define float @f3(float %a, float %b, float *%dest) #0 { 65; CHECK-LABEL: f3: 66; CHECK: aebr %f0, %f2 67; CHECK-NEXT: bhr %r14 68; CHECK: br %r14 69entry: 70 %res = call float @llvm.experimental.constrained.fadd.f32( 71 float %a, float %b, 72 metadata !"round.dynamic", 73 metadata !"fpexcept.strict") #0 74 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 75 float %res, float 0.0, 76 metadata !"ogt", 77 metadata !"fpexcept.strict") #0 78 br i1 %cmp, label %exit, label %store 79 80store: 81 store float %b, float *%dest 82 br label %exit 83 84exit: 85 ret float %res 86} 87 88; ...and again with UEQ. 89define float @f4(float %a, float %b, float *%dest) #0 { 90; CHECK-LABEL: f4: 91; CHECK: aebr %f0, %f2 92; CHECK-NEXT: bnlhr %r14 93; CHECK: br %r14 94entry: 95 %res = call float @llvm.experimental.constrained.fadd.f32( 96 float %a, float %b, 97 metadata !"round.dynamic", 98 metadata !"fpexcept.strict") #0 99 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 100 float %res, float 0.0, 101 metadata !"ueq", 102 metadata !"fpexcept.strict") #0 103 br i1 %cmp, label %exit, label %store 104 105store: 106 store float %b, float *%dest 107 br label %exit 108 109exit: 110 ret float %res 111} 112 113; Subtraction also provides a zero-based CC value. 114define float @f5(float %a, float %b, float *%dest) #0 { 115; CHECK-LABEL: f5: 116; CHECK: seb %f0, 0(%r2) 117; CHECK-NEXT: bnher %r14 118; CHECK: br %r14 119entry: 120 %cur = load float, float *%dest 121 %res = call float @llvm.experimental.constrained.fsub.f32( 122 float %a, float %cur, 123 metadata !"round.dynamic", 124 metadata !"fpexcept.strict") #0 125 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 126 float %res, float 0.0, 127 metadata !"ult", 128 metadata !"fpexcept.strict") #0 129 br i1 %cmp, label %exit, label %store 130 131store: 132 store float %b, float *%dest 133 br label %exit 134 135exit: 136 ret float %res 137} 138 139; Test the result of LOAD POSITIVE. We cannot omit the LTEBR. 140define float @f6(float %dummy, float %a, float *%dest) #0 { 141; CHECK-LABEL: f6: 142; CHECK: lpdfr %f0, %f2 143; CHECK-NEXT: ltebr %f0, %f0 144; CHECK-NEXT: bhr %r14 145; CHECK: br %r14 146entry: 147 %res = call float @llvm.fabs.f32(float %a) #0 148 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 149 float %res, float 0.0, 150 metadata !"ogt", 151 metadata !"fpexcept.strict") #0 152 br i1 %cmp, label %exit, label %store 153 154store: 155 store float %res, float *%dest 156 br label %exit 157 158exit: 159 ret float %res 160} 161 162; Test the result of LOAD NEGATIVE. We cannot omit the LTEBR. 163define float @f7(float %dummy, float %a, float *%dest) #0 { 164; CHECK-LABEL: f7: 165; CHECK: lndfr %f0, %f2 166; CHECK-NEXT: ltebr %f0, %f0 167; CHECK-NEXT: blr %r14 168; CHECK: br %r14 169entry: 170 %abs = call float @llvm.fabs.f32(float %a) #0 171 %res = fneg float %abs 172 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 173 float %res, float 0.0, 174 metadata !"olt", 175 metadata !"fpexcept.strict") #0 176 br i1 %cmp, label %exit, label %store 177 178store: 179 store float %res, float *%dest 180 br label %exit 181 182exit: 183 ret float %res 184} 185 186; Test the result of LOAD COMPLEMENT. We cannot omit the LTEBR. 187define float @f8(float %dummy, float %a, float *%dest) #0 { 188; CHECK-LABEL: f8: 189; CHECK: lcdfr %f0, %f2 190; CHECK-NEXT: ltebr %f0, %f0 191; CHECK-NEXT: bler %r14 192; CHECK: br %r14 193entry: 194 %res = fneg float %a 195 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 196 float %res, float 0.0, 197 metadata !"ole", 198 metadata !"fpexcept.strict") #0 199 br i1 %cmp, label %exit, label %store 200 201store: 202 store float %res, float *%dest 203 br label %exit 204 205exit: 206 ret float %res 207} 208 209; Multiplication (for example) does not modify CC. 210define float @f9(float %a, float %b, float *%dest) #0 { 211; CHECK-LABEL: f9: 212; CHECK: meebr %f0, %f2 213; CHECK-NEXT: ltebr %f0, %f0 214; CHECK-NEXT: blhr %r14 215; CHECK: br %r14 216entry: 217 %res = call float @llvm.experimental.constrained.fmul.f32( 218 float %a, float %b, 219 metadata !"round.dynamic", 220 metadata !"fpexcept.strict") #0 221 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 222 float %res, float 0.0, 223 metadata !"one", 224 metadata !"fpexcept.strict") #0 225 br i1 %cmp, label %exit, label %store 226 227store: 228 store float %b, float *%dest 229 br label %exit 230 231exit: 232 ret float %res 233} 234 235; Test a combination involving a CC-setting instruction followed by 236; a non-CC-setting instruction. 237define float @f10(float %a, float %b, float %c, float *%dest) #0 { 238; CHECK-LABEL: f10: 239; CHECK: aebr %f0, %f2 240; CHECK-NEXT: debr %f0, %f4 241; CHECK-NEXT: ltebr %f0, %f0 242; CHECK-NEXT: bner %r14 243; CHECK: br %r14 244entry: 245 %add = call float @llvm.experimental.constrained.fadd.f32( 246 float %a, float %b, 247 metadata !"round.dynamic", 248 metadata !"fpexcept.strict") #0 249 %res = call float @llvm.experimental.constrained.fdiv.f32( 250 float %add, float %c, 251 metadata !"round.dynamic", 252 metadata !"fpexcept.strict") #0 253 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 254 float %res, float 0.0, 255 metadata !"une", 256 metadata !"fpexcept.strict") #0 257 br i1 %cmp, label %exit, label %store 258 259store: 260 store float %b, float *%dest 261 br label %exit 262 263exit: 264 ret float %res 265} 266 267; Test a case where CC is set based on a different register from the 268; compare input. 269define float @f11(float %a, float %b, float %c, float *%dest1, float *%dest2) #0 { 270; CHECK-LABEL: f11: 271; CHECK: aebr %f0, %f2 272; CHECK-NEXT: sebr %f4, %f0 273; CHECK-DAG: ste %f4, 0(%r2) 274; CHECK-DAG: ltebr %f0, %f0 275; CHECK-NEXT: ber %r14 276; CHECK: br %r14 277entry: 278 %add = call float @llvm.experimental.constrained.fadd.f32( 279 float %a, float %b, 280 metadata !"round.dynamic", 281 metadata !"fpexcept.strict") #0 282 %sub = call float @llvm.experimental.constrained.fsub.f32( 283 float %c, float %add, 284 metadata !"round.dynamic", 285 metadata !"fpexcept.strict") #0 286 store float %sub, float *%dest1 287 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 288 float %add, float 0.0, 289 metadata !"oeq", 290 metadata !"fpexcept.strict") #0 291 br i1 %cmp, label %exit, label %store 292 293store: 294 store float %sub, float *%dest2 295 br label %exit 296 297exit: 298 ret float %add 299} 300 301; Test that LER gets converted to LTEBR where useful. 302define float @f12(float %dummy, float %val) #0 { 303; CHECK-LABEL: f12: 304; CHECK: ltebr %f0, %f2 305; CHECK-NEXT: #APP 306; CHECK-NEXT: blah %f0 307; CHECK-NEXT: #NO_APP 308; CHECK-NEXT: blr %r14 309; CHECK: br %r14 310entry: 311 %ret = call float asm "blah $1", "=f,{f0}"(float %val) #0 312 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 313 float %val, float 0.0, 314 metadata !"olt", 315 metadata !"fpexcept.strict") #0 316 br i1 %cmp, label %exit, label %store 317 318store: 319 call void asm sideeffect "blah", ""() #0 320 br label %exit 321 322exit: 323 ret float %ret 324} 325 326; Test that LDR gets converted to LTDBR where useful. 327define double @f13(double %dummy, double %val) #0 { 328; CHECK-LABEL: f13: 329; CHECK: ltdbr %f0, %f2 330; CHECK-NEXT: #APP 331; CHECK-NEXT: blah %f0 332; CHECK-NEXT: #NO_APP 333; CHECK-NEXT: blr %r14 334; CHECK: br %r14 335entry: 336 %ret = call double asm "blah $1", "=f,{f0}"(double %val) #0 337 %cmp = call i1 @llvm.experimental.constrained.fcmp.f64( 338 double %val, double 0.0, 339 metadata !"olt", 340 metadata !"fpexcept.strict") #0 341 br i1 %cmp, label %exit, label %store 342 343store: 344 call void asm sideeffect "blah", ""() #0 345 br label %exit 346 347exit: 348 ret double %ret 349} 350 351; Test that LXR gets converted to LTXBR where useful. 352define void @f14(fp128 *%ptr1, fp128 *%ptr2) #0 { 353; CHECK-LABEL: f14: 354; CHECK: ltxbr 355; CHECK-NEXT: dxbr 356; CHECK-NEXT: std 357; CHECK-NEXT: std 358; CHECK-NEXT: mxbr 359; CHECK-NEXT: std 360; CHECK-NEXT: std 361; CHECK-NEXT: blr %r14 362; CHECK: br %r14 363entry: 364 %val1 = load fp128, fp128 *%ptr1 365 %val2 = load fp128, fp128 *%ptr2 366 %div = fdiv fp128 %val1, %val2 367 store fp128 %div, fp128 *%ptr1 368 %mul = fmul fp128 %val1, %val2 369 store fp128 %mul, fp128 *%ptr2 370 %cmp = call i1 @llvm.experimental.constrained.fcmp.f128( 371 fp128 %val1, fp128 0xL00000000000000000000000000000000, 372 metadata !"olt", 373 metadata !"fpexcept.strict") #0 374 br i1 %cmp, label %exit, label %store 375 376store: 377 call void asm sideeffect "blah", ""() #0 378 br label %exit 379 380exit: 381 ret void 382} 383 384; Test a case where it is the source rather than destination of LER that 385; we need. 386define float @f15(float %val, float %dummy) #0 { 387; CHECK-LABEL: f15: 388; CHECK: ltebr %f2, %f0 389; CHECK-NEXT: #APP 390; CHECK-NEXT: blah %f2 391; CHECK-NEXT: #NO_APP 392; CHECK-NEXT: blr %r14 393; CHECK: br %r14 394entry: 395 %ret = call float asm "blah $1", "=f,{f2}"(float %val) #0 396 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 397 float %val, float 0.0, 398 metadata !"olt", 399 metadata !"fpexcept.strict") #0 400 br i1 %cmp, label %exit, label %store 401 402store: 403 call void asm sideeffect "blah", ""() #0 404 br label %exit 405 406exit: 407 ret float %ret 408} 409 410; Test a case where it is the source rather than destination of LDR that 411; we need. 412define double @f16(double %val, double %dummy) #0 { 413; CHECK-LABEL: f16: 414; CHECK: ltdbr %f2, %f0 415; CHECK-NEXT: #APP 416; CHECK-NEXT: blah %f2 417; CHECK-NEXT: #NO_APP 418; CHECK-NEXT: blr %r14 419; CHECK: br %r14 420entry: 421 %ret = call double asm "blah $1", "=f,{f2}"(double %val) #0 422 %cmp = call i1 @llvm.experimental.constrained.fcmp.f64( 423 double %val, double 0.0, 424 metadata !"olt", 425 metadata !"fpexcept.strict") #0 426 br i1 %cmp, label %exit, label %store 427 428store: 429 call void asm sideeffect "blah", ""() #0 430 br label %exit 431 432exit: 433 ret double %ret 434} 435 436; Repeat f2 with a comparison against -0. 437define float @f17(float %a, float %b, float *%dest) #0 { 438; CHECK-LABEL: f17: 439; CHECK: aebr %f0, %f2 440; CHECK-NEXT: blr %r14 441; CHECK: br %r14 442entry: 443 %res = call float @llvm.experimental.constrained.fadd.f32( 444 float %a, float %b, 445 metadata !"round.dynamic", 446 metadata !"fpexcept.strict") #0 447 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 448 float %res, float -0.0, 449 metadata !"olt", 450 metadata !"fpexcept.strict") #0 451 br i1 %cmp, label %exit, label %store 452 453store: 454 store float %b, float *%dest 455 br label %exit 456 457exit: 458 ret float %res 459} 460 461; Verify that we cannot omit the compare if there may be an intervening 462; change to the exception flags. 463define float @f18(float %a, float %b, float *%dest) #0 { 464; CHECK-LABEL: f18: 465; CHECK: aebr %f0, %f2 466; CHECK: ltebr %f0, %f0 467; CHECK-NEXT: ber %r14 468; CHECK: br %r14 469entry: 470 %res = call float @llvm.experimental.constrained.fadd.f32( 471 float %a, float %b, 472 metadata !"round.dynamic", 473 metadata !"fpexcept.strict") #0 474 call void asm sideeffect "blah", ""() #0 475 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 476 float %res, float 0.0, 477 metadata !"oeq", 478 metadata !"fpexcept.strict") #0 479 br i1 %cmp, label %exit, label %store 480 481store: 482 store float %b, float *%dest 483 br label %exit 484 485exit: 486 ret float %res 487} 488 489; Verify that we cannot convert LER to LTEBR and omit the compare if 490; there may be an intervening change to the exception flags. 491define float @f19(float %dummy, float %val) #0 { 492; CHECK-LABEL: f19: 493; CHECK: ler %f0, %f2 494; CHECK-NEXT: #APP 495; CHECK-NEXT: blah %f0 496; CHECK-NEXT: #NO_APP 497; CHECK-NEXT: ltebr %f2, %f2 498; CHECK-NEXT: blr %r14 499; CHECK: br %r14 500entry: 501 %ret = call float asm sideeffect "blah $1", "=f,{f0}"(float %val) #0 502 %cmp = call i1 @llvm.experimental.constrained.fcmp.f32( 503 float %val, float 0.0, 504 metadata !"olt", 505 metadata !"fpexcept.strict") #0 506 br i1 %cmp, label %exit, label %store 507 508store: 509 call void asm sideeffect "blah", ""() #0 510 br label %exit 511 512exit: 513 ret float %ret 514} 515 516attributes #0 = { strictfp } 517 518declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) 519declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) 520declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) 521declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) 522declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) 523declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) 524declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) 525