1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s 3 4define <16 x i32> @no_existing_zext(<16 x i8> %a, <16 x i32> %op) { 5; CHECK-LABEL: no_existing_zext: 6; CHECK: ; %bb.0: ; %entry 7; CHECK-NEXT: movi.16b v5, #10 8; CHECK-NEXT: cmhi.16b v0, v0, v5 9; CHECK-NEXT: sshll.8h v5, v0, #0 10; CHECK-NEXT: sshll2.8h v0, v0, #0 11; CHECK-NEXT: sshll.4s v6, v5, #0 12; CHECK-NEXT: sshll.4s v7, v0, #0 13; CHECK-NEXT: sshll2.4s v0, v0, #0 14; CHECK-NEXT: sshll2.4s v5, v5, #0 15; CHECK-NEXT: and.16b v4, v4, v0 16; CHECK-NEXT: and.16b v5, v2, v5 17; CHECK-NEXT: and.16b v2, v3, v7 18; CHECK-NEXT: and.16b v0, v1, v6 19; CHECK-NEXT: mov.16b v1, v5 20; CHECK-NEXT: mov.16b v3, v4 21; CHECK-NEXT: ret 22entry: 23 %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 24 %sel = select <16 x i1> %cmp, <16 x i32> %op, <16 x i32> zeroinitializer 25 ret <16 x i32> %sel 26} 27 28define <16 x i32> @second_compare_operand_not_splat(<16 x i8> %a, <16 x i8> %b) { 29; CHECK-LABEL: second_compare_operand_not_splat: 30; CHECK: ; %bb.0: ; %entry 31; CHECK-NEXT: ushll.8h v2, v0, #0 32; CHECK-NEXT: ushll2.8h v3, v0, #0 33; CHECK-NEXT: cmgt.16b v0, v0, v1 34; CHECK-NEXT: ushll.4s v4, v2, #0 35; CHECK-NEXT: ushll.4s v5, v3, #0 36; CHECK-NEXT: ushll2.4s v1, v2, #0 37; CHECK-NEXT: ushll2.4s v2, v3, #0 38; CHECK-NEXT: sshll.8h v3, v0, #0 39; CHECK-NEXT: sshll2.8h v0, v0, #0 40; CHECK-NEXT: sshll.4s v6, v3, #0 41; CHECK-NEXT: sshll.4s v7, v0, #0 42; CHECK-NEXT: sshll2.4s v0, v0, #0 43; CHECK-NEXT: sshll2.4s v16, v3, #0 44; CHECK-NEXT: and.16b v3, v2, v0 45; CHECK-NEXT: and.16b v1, v1, v16 46; CHECK-NEXT: and.16b v2, v5, v7 47; CHECK-NEXT: and.16b v0, v4, v6 48; CHECK-NEXT: ret 49entry: 50 %ext = zext <16 x i8> %a to <16 x i32> 51 %cmp = icmp sgt <16 x i8> %a, %b 52 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 53 ret <16 x i32> %sel 54} 55 56define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select(<16 x i8> %a) { 57; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select: 58; CHECK: ; %bb.0: ; %entry 59; CHECK-NEXT: movi.16b v1, #10 60; CHECK-NEXT: ushll.8h v2, v0, #0 61; CHECK-NEXT: ushll2.8h v3, v0, #0 62; CHECK-NEXT: ushll.4s v4, v2, #0 63; CHECK-NEXT: cmgt.16b v0, v0, v1 64; CHECK-NEXT: ushll.4s v5, v3, #0 65; CHECK-NEXT: ushll2.4s v1, v3, #0 66; CHECK-NEXT: sshll.8h v3, v0, #0 67; CHECK-NEXT: sshll2.8h v0, v0, #0 68; CHECK-NEXT: ushll2.4s v2, v2, #0 69; CHECK-NEXT: sshll.4s v6, v3, #0 70; CHECK-NEXT: sshll.4s v7, v0, #0 71; CHECK-NEXT: sshll2.4s v0, v0, #0 72; CHECK-NEXT: sshll2.4s v16, v3, #0 73; CHECK-NEXT: and.16b v3, v1, v0 74; CHECK-NEXT: and.16b v1, v2, v16 75; CHECK-NEXT: and.16b v2, v5, v7 76; CHECK-NEXT: and.16b v0, v4, v6 77; CHECK-NEXT: ret 78entry: 79 %ext = zext <16 x i8> %a to <16 x i32> 80 %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 81 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 82 ret <16 x i32> %sel 83} 84 85define <8 x i64> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i64(<8 x i8> %a) { 86; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i64: 87; CHECK: ; %bb.0: 88; CHECK-NEXT: ushll.8h v0, v0, #0 89; CHECK-NEXT: mov w8, #10 90; CHECK-NEXT: ushll2.4s v2, v0, #0 91; CHECK-NEXT: ushll.4s v0, v0, #0 92; CHECK-NEXT: dup.2d v1, x8 93; CHECK-NEXT: ushll2.2d v3, v2, #0 94; CHECK-NEXT: ushll2.2d v4, v0, #0 95; CHECK-NEXT: ushll.2d v0, v0, #0 96; CHECK-NEXT: ushll.2d v2, v2, #0 97; CHECK-NEXT: cmhi.2d v5, v0, v1 98; CHECK-NEXT: cmhi.2d v6, v2, v1 99; CHECK-NEXT: cmhi.2d v7, v3, v1 100; CHECK-NEXT: cmhi.2d v1, v4, v1 101; CHECK-NEXT: and.16b v3, v3, v7 102; CHECK-NEXT: and.16b v1, v4, v1 103; CHECK-NEXT: and.16b v2, v2, v6 104; CHECK-NEXT: and.16b v0, v0, v5 105; CHECK-NEXT: ret 106 %ext = zext <8 x i8> %a to <8 x i64> 107 %cmp = icmp ugt <8 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 108 %sel = select <8 x i1> %cmp, <8 x i64> %ext, <8 x i64> zeroinitializer 109 ret <8 x i64> %sel 110} 111 112 113define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v16i32(<16 x i8> %a) { 114; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v16i32: 115; CHECK: ; %bb.0: 116; CHECK-NEXT: movi.4s v1, #10 117; CHECK-NEXT: ushll2.8h v2, v0, #0 118; CHECK-NEXT: ushll.8h v0, v0, #0 119; CHECK-NEXT: ushll2.4s v3, v2, #0 120; CHECK-NEXT: ushll2.4s v4, v0, #0 121; CHECK-NEXT: ushll.4s v0, v0, #0 122; CHECK-NEXT: ushll.4s v2, v2, #0 123; CHECK-NEXT: cmhi.4s v5, v0, v1 124; CHECK-NEXT: cmhi.4s v6, v2, v1 125; CHECK-NEXT: cmhi.4s v7, v3, v1 126; CHECK-NEXT: cmhi.4s v1, v4, v1 127; CHECK-NEXT: and.16b v3, v3, v7 128; CHECK-NEXT: and.16b v1, v4, v1 129; CHECK-NEXT: and.16b v2, v2, v6 130; CHECK-NEXT: and.16b v0, v0, v5 131; CHECK-NEXT: ret 132 %ext = zext <16 x i8> %a to <16 x i32> 133 %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 134 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 135 ret <16 x i32> %sel 136} 137 138define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32(<8 x i8> %a) { 139; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32: 140; CHECK: ; %bb.0: 141; CHECK-NEXT: movi.4s v1, #10 142; CHECK-NEXT: ushll.8h v0, v0, #0 143; CHECK-NEXT: ushll2.4s v2, v0, #0 144; CHECK-NEXT: ushll.4s v0, v0, #0 145; CHECK-NEXT: cmhi.4s v3, v2, v1 146; CHECK-NEXT: cmhi.4s v4, v0, v1 147; CHECK-NEXT: and.16b v1, v2, v3 148; CHECK-NEXT: and.16b v0, v0, v4 149; CHECK-NEXT: ret 150 %ext = zext <8 x i8> %a to <8 x i32> 151 %cmp = icmp ugt <8 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 152 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 153 ret <8 x i32> %sel 154} 155 156define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2(<8 x i16> %a) { 157; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2: 158; CHECK: ; %bb.0: 159; CHECK-NEXT: movi.4s v1, #10 160; CHECK-NEXT: ushll2.4s v2, v0, #0 161; CHECK-NEXT: ushll.4s v0, v0, #0 162; CHECK-NEXT: cmhi.4s v3, v2, v1 163; CHECK-NEXT: cmhi.4s v4, v0, v1 164; CHECK-NEXT: and.16b v1, v2, v3 165; CHECK-NEXT: and.16b v0, v0, v4 166; CHECK-NEXT: ret 167 %ext = zext <8 x i16> %a to <8 x i32> 168 %cmp = icmp ugt <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 169 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 170 ret <8 x i32> %sel 171} 172 173 174define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) { 175; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15: 176; CHECK: ; %bb.0: 177; CHECK-NEXT: movi.4s v1, #10 178; CHECK-NEXT: bic.8h v0, #128, lsl #8 179; CHECK-NEXT: ushll2.4s v2, v0, #0 180; CHECK-NEXT: ushll.4s v0, v0, #0 181; CHECK-NEXT: cmhi.4s v3, v2, v1 182; CHECK-NEXT: cmhi.4s v4, v0, v1 183; CHECK-NEXT: and.16b v1, v2, v3 184; CHECK-NEXT: and.16b v0, v0, v4 185; CHECK-NEXT: ret 186 %ext = zext <8 x i15> %a to <8 x i32> 187 %cmp = icmp ugt <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10> 188 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 189 ret <8 x i32> %sel 190} 191 192define <7 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v7i32(<7 x i16> %a) { 193; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v7i32: 194; CHECK: ; %bb.0: 195; CHECK-NEXT: movi.8h v1, #10 196; CHECK-NEXT: ushll2.4s v2, v0, #0 197; CHECK-NEXT: cmhi.8h v1, v0, v1 198; CHECK-NEXT: ushll.4s v0, v0, #0 199; CHECK-NEXT: sshll.4s v3, v1, #0 200; CHECK-NEXT: sshll2.4s v1, v1, #0 201; CHECK-NEXT: and.16b v0, v0, v3 202; CHECK-NEXT: and.16b v1, v2, v1 203; CHECK-NEXT: mov.s w1, v0[1] 204; CHECK-NEXT: mov.s w2, v0[2] 205; CHECK-NEXT: mov.s w3, v0[3] 206; CHECK-NEXT: mov.s w5, v1[1] 207; CHECK-NEXT: mov.s w6, v1[2] 208; CHECK-NEXT: fmov w0, s0 209; CHECK-NEXT: fmov w4, s1 210; CHECK-NEXT: ret 211 %ext = zext <7 x i16> %a to <7 x i32> 212 %cmp = icmp ugt <7 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 213 %sel = select <7 x i1> %cmp, <7 x i32> %ext, <7 x i32> zeroinitializer 214 ret <7 x i32> %sel 215} 216 217define <3 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v3i16(<3 x i8> %a) { 218; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v3i16: 219; CHECK: ; %bb.0: 220; CHECK-NEXT: fmov s0, w0 221; CHECK-NEXT: Lloh0: 222; CHECK-NEXT: adrp x8, lCPI9_0@PAGE 223; CHECK-NEXT: mov.h v0[1], w1 224; CHECK-NEXT: Lloh1: 225; CHECK-NEXT: ldr d2, [x8, lCPI9_0@PAGEOFF] 226; CHECK-NEXT: mov.h v0[2], w2 227; CHECK-NEXT: fmov d1, d0 228; CHECK-NEXT: bic.4h v1, #255, lsl #8 229; CHECK-NEXT: cmhi.4h v1, v1, v2 230; CHECK-NEXT: movi.2d v2, #0x0000ff000000ff 231; CHECK-NEXT: and.8b v0, v0, v1 232; CHECK-NEXT: ushll.4s v0, v0, #0 233; CHECK-NEXT: and.16b v0, v0, v2 234; CHECK-NEXT: ret 235; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 236 %ext = zext <3 x i8> %a to <3 x i32> 237 %cmp = icmp ugt <3 x i8> %a, <i8 10, i8 10, i8 10> 238 %sel = select <3 x i1> %cmp, <3 x i32> %ext, <3 x i32> zeroinitializer 239 ret <3 x i32> %sel 240} 241 242define <4 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v4i32(<4 x i16> %a) { 243; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v4i32: 244; CHECK: ; %bb.0: 245; CHECK-NEXT: movi.4s v1, #10 246; CHECK-NEXT: ushll.4s v0, v0, #0 247; CHECK-NEXT: cmhi.4s v1, v0, v1 248; CHECK-NEXT: and.16b v0, v0, v1 249; CHECK-NEXT: ret 250 %ext = zext <4 x i16> %a to <4 x i32> 251 %cmp = icmp ugt <4 x i16> %a, <i16 10, i16 10, i16 10, i16 10> 252 %sel = select <4 x i1> %cmp, <4 x i32> %ext, <4 x i32> zeroinitializer 253 ret <4 x i32> %sel 254} 255 256define <2 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v2i32(<2 x i16> %a) { 257; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v2i32: 258; CHECK: ; %bb.0: 259; CHECK-NEXT: movi d1, #0x00ffff0000ffff 260; CHECK-NEXT: movi.2s v2, #10 261; CHECK-NEXT: and.8b v0, v0, v1 262; CHECK-NEXT: cmhi.2s v1, v0, v2 263; CHECK-NEXT: and.8b v0, v0, v1 264; CHECK-NEXT: ret 265 %ext = zext <2 x i16> %a to <2 x i32> 266 %cmp = icmp ugt <2 x i16> %a, <i16 10, i16 10> 267 %sel = select <2 x i1> %cmp, <2 x i32> %ext, <2 x i32> zeroinitializer 268 ret <2 x i32> %sel 269} 270 271define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) { 272; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32: 273; CHECK: ; %bb.0: 274; CHECK-NEXT: movi.4s v1, #10 275; CHECK-NEXT: ushll2.4s v2, v0, #0 276; CHECK-NEXT: ushll.4s v0, v0, #0 277; CHECK-NEXT: cmeq.4s v3, v2, v1 278; CHECK-NEXT: cmeq.4s v4, v0, v1 279; CHECK-NEXT: and.16b v1, v2, v3 280; CHECK-NEXT: and.16b v0, v0, v4 281; CHECK-NEXT: ret 282 %ext = zext <8 x i16> %a to <8 x i32> 283 %cmp = icmp eq <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 284 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 285 ret <8 x i32> %sel 286} 287 288define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) { 289; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13: 290; CHECK: ; %bb.0: 291; CHECK-NEXT: movi.4s v1, #10 292; CHECK-NEXT: bic.8h v0, #224, lsl #8 293; CHECK-NEXT: ushll2.4s v2, v0, #0 294; CHECK-NEXT: ushll.4s v0, v0, #0 295; CHECK-NEXT: cmeq.4s v3, v2, v1 296; CHECK-NEXT: cmeq.4s v4, v0, v1 297; CHECK-NEXT: and.16b v1, v2, v3 298; CHECK-NEXT: and.16b v0, v0, v4 299; CHECK-NEXT: ret 300 %ext = zext <8 x i13> %a to <8 x i32> 301 %cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10> 302 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 303 ret <8 x i32> %sel 304} 305 306define <16 x i32> @same_zext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) { 307; CHECK-LABEL: same_zext_used_in_cmp_ne_and_select_v8i32: 308; CHECK: ; %bb.0: 309; CHECK-NEXT: movi.4s v1, #10 310; CHECK-NEXT: ushll2.8h v2, v0, #0 311; CHECK-NEXT: ushll.8h v0, v0, #0 312; CHECK-NEXT: ushll2.4s v3, v2, #0 313; CHECK-NEXT: ushll2.4s v4, v0, #0 314; CHECK-NEXT: ushll.4s v0, v0, #0 315; CHECK-NEXT: ushll.4s v2, v2, #0 316; CHECK-NEXT: cmeq.4s v5, v0, v1 317; CHECK-NEXT: cmeq.4s v6, v2, v1 318; CHECK-NEXT: cmeq.4s v7, v3, v1 319; CHECK-NEXT: cmeq.4s v1, v4, v1 320; CHECK-NEXT: bic.16b v3, v3, v7 321; CHECK-NEXT: bic.16b v1, v4, v1 322; CHECK-NEXT: bic.16b v2, v2, v6 323; CHECK-NEXT: bic.16b v0, v0, v5 324; CHECK-NEXT: ret 325 %ext = zext <16 x i8> %a to <16 x i32> 326 %cmp = icmp ne <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 327 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 328 ret <16 x i32> %sel 329} 330 331; A variation of @same_zext_used_in_cmp_unsigned_pred_and_select, with with 332; multiple users of the compare. 333define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_other_use(<16 x i8> %a, <16 x i64> %v, <16 x i64>* %ptr) { 334; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_other_use: 335; CHECK: ; %bb.0: ; %entry 336; CHECK-NEXT: mov.16b v16, v2 337; CHECK-NEXT: movi.16b v2, #10 338; CHECK-NEXT: ushll.8h v18, v0, #0 339; CHECK-NEXT: ushll2.8h v20, v0, #0 340; CHECK-NEXT: mov.16b v17, v1 341; CHECK-NEXT: ldr q1, [sp] 342; CHECK-NEXT: cmhi.16b v0, v0, v2 343; CHECK-NEXT: ushll.4s v19, v18, #0 344; CHECK-NEXT: sshll2.8h v21, v0, #0 345; CHECK-NEXT: sshll.8h v0, v0, #0 346; CHECK-NEXT: sshll2.4s v22, v21, #0 347; CHECK-NEXT: sshll.4s v21, v21, #0 348; CHECK-NEXT: sshll2.2d v23, v22, #0 349; CHECK-NEXT: sshll.2d v24, v22, #0 350; CHECK-NEXT: sshll2.4s v25, v0, #0 351; CHECK-NEXT: sshll2.2d v26, v21, #0 352; CHECK-NEXT: sshll.2d v28, v21, #0 353; CHECK-NEXT: sshll2.2d v27, v25, #0 354; CHECK-NEXT: sshll.4s v0, v0, #0 355; CHECK-NEXT: and.16b v1, v1, v23 356; CHECK-NEXT: and.16b v7, v7, v24 357; CHECK-NEXT: sshll.2d v29, v25, #0 358; CHECK-NEXT: stp q7, q1, [x0, #96] 359; CHECK-NEXT: and.16b v1, v6, v26 360; CHECK-NEXT: and.16b v5, v5, v28 361; CHECK-NEXT: ushll.4s v2, v20, #0 362; CHECK-NEXT: stp q5, q1, [x0, #64] 363; CHECK-NEXT: ushll2.4s v18, v18, #0 364; CHECK-NEXT: ushll2.4s v20, v20, #0 365; CHECK-NEXT: and.16b v1, v4, v27 366; CHECK-NEXT: sshll2.2d v4, v0, #0 367; CHECK-NEXT: sshll.2d v5, v0, #0 368; CHECK-NEXT: and.16b v3, v3, v29 369; CHECK-NEXT: stp q3, q1, [x0, #32] 370; CHECK-NEXT: and.16b v3, v20, v22 371; CHECK-NEXT: and.16b v1, v18, v25 372; CHECK-NEXT: and.16b v2, v2, v21 373; CHECK-NEXT: and.16b v0, v19, v0 374; CHECK-NEXT: and.16b v4, v16, v4 375; CHECK-NEXT: and.16b v5, v17, v5 376; CHECK-NEXT: stp q5, q4, [x0] 377; CHECK-NEXT: ret 378entry: 379 %ext = zext <16 x i8> %a to <16 x i32> 380 %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 381 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 382 %sel.2 = select <16 x i1> %cmp, <16 x i64> %v, <16 x i64> zeroinitializer 383 store <16 x i64> %sel.2, <16 x i64>* %ptr 384 ret <16 x i32> %sel 385} 386 387define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v16i32(<16 x i8> %a) { 388; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v16i32: 389; CHECK: ; %bb.0: ; %entry 390; CHECK-NEXT: movi.4s v1, #10 391; CHECK-NEXT: sshll2.8h v2, v0, #0 392; CHECK-NEXT: sshll.8h v0, v0, #0 393; CHECK-NEXT: sshll2.4s v3, v2, #0 394; CHECK-NEXT: sshll2.4s v4, v0, #0 395; CHECK-NEXT: sshll.4s v0, v0, #0 396; CHECK-NEXT: sshll.4s v2, v2, #0 397; CHECK-NEXT: cmgt.4s v5, v0, v1 398; CHECK-NEXT: cmgt.4s v6, v2, v1 399; CHECK-NEXT: cmgt.4s v7, v3, v1 400; CHECK-NEXT: cmgt.4s v1, v4, v1 401; CHECK-NEXT: and.16b v3, v3, v7 402; CHECK-NEXT: and.16b v1, v4, v1 403; CHECK-NEXT: and.16b v2, v2, v6 404; CHECK-NEXT: and.16b v0, v0, v5 405; CHECK-NEXT: ret 406entry: 407 %ext = sext <16 x i8> %a to <16 x i32> 408 %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 409 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 410 ret <16 x i32> %sel 411} 412 413define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) { 414; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32: 415; CHECK: ; %bb.0: 416; CHECK-NEXT: movi.4s v1, #10 417; CHECK-NEXT: sshll2.4s v2, v0, #0 418; CHECK-NEXT: sshll.4s v0, v0, #0 419; CHECK-NEXT: cmeq.4s v3, v2, v1 420; CHECK-NEXT: cmeq.4s v4, v0, v1 421; CHECK-NEXT: and.16b v1, v2, v3 422; CHECK-NEXT: and.16b v0, v0, v4 423; CHECK-NEXT: ret 424 %ext = sext <8 x i16> %a to <8 x i32> 425 %cmp = icmp eq <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 426 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 427 ret <8 x i32> %sel 428} 429 430define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) { 431; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13: 432; CHECK: ; %bb.0: 433; CHECK-NEXT: ushll2.4s v2, v0, #0 434; CHECK-NEXT: ushll.4s v0, v0, #0 435; CHECK-NEXT: movi.4s v1, #10 436; CHECK-NEXT: shl.4s v2, v2, #19 437; CHECK-NEXT: shl.4s v0, v0, #19 438; CHECK-NEXT: sshr.4s v2, v2, #19 439; CHECK-NEXT: sshr.4s v0, v0, #19 440; CHECK-NEXT: cmeq.4s v3, v2, v1 441; CHECK-NEXT: cmeq.4s v4, v0, v1 442; CHECK-NEXT: and.16b v1, v2, v3 443; CHECK-NEXT: and.16b v0, v0, v4 444; CHECK-NEXT: ret 445 %ext = sext <8 x i13> %a to <8 x i32> 446 %cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10> 447 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 448 ret <8 x i32> %sel 449} 450 451define <16 x i32> @same_sext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) { 452; CHECK-LABEL: same_sext_used_in_cmp_ne_and_select_v8i32: 453; CHECK: ; %bb.0: 454; CHECK-NEXT: movi.4s v1, #10 455; CHECK-NEXT: sshll2.8h v2, v0, #0 456; CHECK-NEXT: sshll.8h v0, v0, #0 457; CHECK-NEXT: sshll2.4s v3, v2, #0 458; CHECK-NEXT: sshll2.4s v4, v0, #0 459; CHECK-NEXT: sshll.4s v0, v0, #0 460; CHECK-NEXT: sshll.4s v2, v2, #0 461; CHECK-NEXT: cmeq.4s v5, v0, v1 462; CHECK-NEXT: cmeq.4s v6, v2, v1 463; CHECK-NEXT: cmeq.4s v7, v3, v1 464; CHECK-NEXT: cmeq.4s v1, v4, v1 465; CHECK-NEXT: bic.16b v3, v3, v7 466; CHECK-NEXT: bic.16b v1, v4, v1 467; CHECK-NEXT: bic.16b v2, v2, v6 468; CHECK-NEXT: bic.16b v0, v0, v5 469; CHECK-NEXT: ret 470 %ext = sext <16 x i8> %a to <16 x i32> 471 %cmp = icmp ne <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 472 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 473 ret <16 x i32> %sel 474} 475 476define <8 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v8i32(<8 x i16> %a) { 477; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v8i32: 478; CHECK: ; %bb.0: ; %entry 479; CHECK-NEXT: movi.4s v1, #10 480; CHECK-NEXT: sshll2.4s v2, v0, #0 481; CHECK-NEXT: sshll.4s v0, v0, #0 482; CHECK-NEXT: cmgt.4s v3, v2, v1 483; CHECK-NEXT: cmgt.4s v4, v0, v1 484; CHECK-NEXT: and.16b v1, v2, v3 485; CHECK-NEXT: and.16b v0, v0, v4 486; CHECK-NEXT: ret 487entry: 488 %ext = sext <8 x i16> %a to <8 x i32> 489 %cmp = icmp sgt <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 490 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 491 ret <8 x i32> %sel 492} 493 494define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) { 495; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15: 496; CHECK: ; %bb.0: 497; CHECK-NEXT: ushll2.4s v2, v0, #0 498; CHECK-NEXT: ushll.4s v0, v0, #0 499; CHECK-NEXT: movi.4s v1, #10 500; CHECK-NEXT: shl.4s v2, v2, #17 501; CHECK-NEXT: shl.4s v0, v0, #17 502; CHECK-NEXT: sshr.4s v2, v2, #17 503; CHECK-NEXT: sshr.4s v0, v0, #17 504; CHECK-NEXT: cmge.4s v3, v2, v1 505; CHECK-NEXT: cmge.4s v4, v0, v1 506; CHECK-NEXT: and.16b v1, v2, v3 507; CHECK-NEXT: and.16b v0, v0, v4 508; CHECK-NEXT: ret 509 %ext = sext <8 x i15> %a to <8 x i32> 510 %cmp = icmp sge <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10> 511 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 512 ret <8 x i32> %sel 513} 514 515define <16 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select(<16 x i8> %a) { 516; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select: 517; CHECK: ; %bb.0: ; %entry 518; CHECK-NEXT: movi.16b v1, #10 519; CHECK-NEXT: sshll.8h v3, v0, #0 520; CHECK-NEXT: sshll2.8h v2, v0, #0 521; CHECK-NEXT: cmhi.16b v0, v0, v1 522; CHECK-NEXT: ext.16b v1, v3, v3, #8 523; CHECK-NEXT: sshll.8h v5, v0, #0 524; CHECK-NEXT: sshll2.8h v0, v0, #0 525; CHECK-NEXT: ext.16b v4, v2, v2, #8 526; CHECK-NEXT: ext.16b v6, v5, v5, #8 527; CHECK-NEXT: ext.16b v7, v0, v0, #8 528; CHECK-NEXT: and.8b v0, v2, v0 529; CHECK-NEXT: sshll.4s v2, v0, #0 530; CHECK-NEXT: and.8b v0, v3, v5 531; CHECK-NEXT: and.8b v1, v1, v6 532; CHECK-NEXT: and.8b v3, v4, v7 533; CHECK-NEXT: sshll.4s v0, v0, #0 534; CHECK-NEXT: sshll.4s v1, v1, #0 535; CHECK-NEXT: sshll.4s v3, v3, #0 536; CHECK-NEXT: ret 537entry: 538 %ext = sext <16 x i8> %a to <16 x i32> 539 %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 540 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 541 ret <16 x i32> %sel 542} 543 544define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred(<16 x i8> %a) { 545; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred: 546; CHECK: ; %bb.0: ; %entry 547; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff 548; CHECK-NEXT: ushll.8h v2, v0, #0 549; CHECK-NEXT: ushll2.8h v3, v0, #0 550; CHECK-NEXT: ushll.4s v4, v2, #0 551; CHECK-NEXT: cmgt.16b v0, v0, v1 552; CHECK-NEXT: ushll.4s v5, v3, #0 553; CHECK-NEXT: ushll2.4s v1, v3, #0 554; CHECK-NEXT: sshll.8h v3, v0, #0 555; CHECK-NEXT: sshll2.8h v0, v0, #0 556; CHECK-NEXT: ushll2.4s v2, v2, #0 557; CHECK-NEXT: sshll.4s v6, v3, #0 558; CHECK-NEXT: sshll.4s v7, v0, #0 559; CHECK-NEXT: sshll2.4s v0, v0, #0 560; CHECK-NEXT: sshll2.4s v16, v3, #0 561; CHECK-NEXT: and.16b v3, v1, v0 562; CHECK-NEXT: and.16b v1, v2, v16 563; CHECK-NEXT: and.16b v2, v5, v7 564; CHECK-NEXT: and.16b v0, v4, v6 565; CHECK-NEXT: ret 566entry: 567 %ext = zext <16 x i8> %a to <16 x i32> 568 %cmp = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 569 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 570 ret <16 x i32> %sel 571} 572 573define void @extension_in_loop_v16i8_to_v16i32(i8* %src, i32* %dst) { 574; CHECK-LABEL: extension_in_loop_v16i8_to_v16i32: 575; CHECK: ; %bb.0: ; %entry 576; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff 577; CHECK-NEXT: mov x8, xzr 578; CHECK-NEXT: LBB24_1: ; %loop 579; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 580; CHECK-NEXT: ldr q1, [x0, x8] 581; CHECK-NEXT: add x8, x8, #16 582; CHECK-NEXT: cmp x8, #128 583; CHECK-NEXT: cmgt.16b v2, v1, v0 584; CHECK-NEXT: ushll2.8h v3, v1, #0 585; CHECK-NEXT: sshll2.8h v4, v2, #0 586; CHECK-NEXT: ushll2.4s v5, v3, #0 587; CHECK-NEXT: ushll.4s v3, v3, #0 588; CHECK-NEXT: sshll2.4s v6, v4, #0 589; CHECK-NEXT: sshll.4s v4, v4, #0 590; CHECK-NEXT: ushll.8h v1, v1, #0 591; CHECK-NEXT: sshll.8h v2, v2, #0 592; CHECK-NEXT: and.16b v5, v5, v6 593; CHECK-NEXT: and.16b v3, v3, v4 594; CHECK-NEXT: stp q3, q5, [x1, #32] 595; CHECK-NEXT: sshll2.4s v4, v2, #0 596; CHECK-NEXT: sshll.4s v2, v2, #0 597; CHECK-NEXT: ushll2.4s v3, v1, #0 598; CHECK-NEXT: ushll.4s v1, v1, #0 599; CHECK-NEXT: and.16b v3, v3, v4 600; CHECK-NEXT: and.16b v1, v1, v2 601; CHECK-NEXT: stp q1, q3, [x1], #64 602; CHECK-NEXT: b.ne LBB24_1 603; CHECK-NEXT: ; %bb.2: ; %exit 604; CHECK-NEXT: ret 605entry: 606 br label %loop 607 608loop: 609 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 610 %src.gep = getelementptr i8, i8* %src, i64 %iv 611 %src.gep.cast = bitcast i8* %src.gep to <16 x i8>* 612 %load = load <16 x i8>, <16 x i8>* %src.gep.cast 613 %cmp = icmp sgt <16 x i8> %load, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 614 %ext = zext <16 x i8> %load to <16 x i32> 615 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 616 %dst.gep = getelementptr i32, i32* %dst, i64 %iv 617 %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>* 618 store <16 x i32> %sel, <16 x i32>* %dst.gep.cast 619 %iv.next = add nuw i64 %iv, 16 620 %ec = icmp eq i64 %iv.next, 128 621 br i1 %ec, label %exit, label %loop 622 623exit: 624 ret void 625} 626 627define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(i8* %src, i32* %dst) { 628; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32: 629; CHECK: ; %bb.0: ; %entry 630; CHECK-NEXT: Lloh2: 631; CHECK-NEXT: adrp x9, lCPI25_0@PAGE 632; CHECK-NEXT: Lloh3: 633; CHECK-NEXT: adrp x10, lCPI25_1@PAGE 634; CHECK-NEXT: Lloh4: 635; CHECK-NEXT: adrp x11, lCPI25_2@PAGE 636; CHECK-NEXT: Lloh5: 637; CHECK-NEXT: adrp x12, lCPI25_3@PAGE 638; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff 639; CHECK-NEXT: mov x8, xzr 640; CHECK-NEXT: Lloh6: 641; CHECK-NEXT: ldr q0, [x9, lCPI25_0@PAGEOFF] 642; CHECK-NEXT: Lloh7: 643; CHECK-NEXT: ldr q1, [x10, lCPI25_1@PAGEOFF] 644; CHECK-NEXT: Lloh8: 645; CHECK-NEXT: ldr q3, [x11, lCPI25_2@PAGEOFF] 646; CHECK-NEXT: Lloh9: 647; CHECK-NEXT: ldr q4, [x12, lCPI25_3@PAGEOFF] 648; CHECK-NEXT: LBB25_1: ; %loop 649; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 650; CHECK-NEXT: ldr q5, [x0, x8] 651; CHECK-NEXT: add x8, x8, #16 652; CHECK-NEXT: cmp x8, #128 653; CHECK-NEXT: cmgt.16b v6, v5, v2 654; CHECK-NEXT: tbl.16b v7, { v5 }, v0 655; CHECK-NEXT: tbl.16b v16, { v5 }, v1 656; CHECK-NEXT: sshll2.8h v18, v6, #0 657; CHECK-NEXT: tbl.16b v17, { v5 }, v3 658; CHECK-NEXT: sshll2.4s v19, v18, #0 659; CHECK-NEXT: sshll.4s v18, v18, #0 660; CHECK-NEXT: tbl.16b v5, { v5 }, v4 661; CHECK-NEXT: sshll.8h v6, v6, #0 662; CHECK-NEXT: and.16b v7, v7, v19 663; CHECK-NEXT: and.16b v16, v16, v18 664; CHECK-NEXT: stp q16, q7, [x1, #32] 665; CHECK-NEXT: sshll2.4s v7, v6, #0 666; CHECK-NEXT: sshll.4s v6, v6, #0 667; CHECK-NEXT: and.16b v7, v17, v7 668; CHECK-NEXT: and.16b v5, v5, v6 669; CHECK-NEXT: stp q5, q7, [x1], #64 670; CHECK-NEXT: b.ne LBB25_1 671; CHECK-NEXT: ; %bb.2: ; %exit 672; CHECK-NEXT: ret 673; CHECK-NEXT: .loh AdrpLdr Lloh5, Lloh9 674; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh8 675; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7 676; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh6 677entry: 678 br label %loop 679 680loop: 681 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 682 %src.gep = getelementptr i8, i8* %src, i64 %iv 683 %src.gep.cast = bitcast i8* %src.gep to <16 x i8>* 684 %load = load <16 x i8>, <16 x i8>* %src.gep.cast 685 %cmp = icmp sgt <16 x i8> %load, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 686 %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15> 687 %ext = bitcast <64 x i8> %ext.shuf to <16 x i32> 688 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 689 %dst.gep = getelementptr i32, i32* %dst, i64 %iv 690 %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>* 691 store <16 x i32> %sel, <16 x i32>* %dst.gep.cast 692 %iv.next = add nuw i64 %iv, 16 693 %ec = icmp eq i64 %iv.next, 128 694 br i1 %ec, label %exit, label %loop 695 696exit: 697 ret void 698} 699 700define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(i8* %src, i32* %dst) { 701; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32: 702; CHECK: ; %bb.0: ; %entry 703; CHECK-NEXT: Lloh10: 704; CHECK-NEXT: adrp x9, lCPI26_0@PAGE 705; CHECK-NEXT: Lloh11: 706; CHECK-NEXT: adrp x10, lCPI26_1@PAGE 707; CHECK-NEXT: Lloh12: 708; CHECK-NEXT: adrp x11, lCPI26_2@PAGE 709; CHECK-NEXT: Lloh13: 710; CHECK-NEXT: adrp x12, lCPI26_3@PAGE 711; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff 712; CHECK-NEXT: mov x8, xzr 713; CHECK-NEXT: Lloh14: 714; CHECK-NEXT: ldr q0, [x9, lCPI26_0@PAGEOFF] 715; CHECK-NEXT: Lloh15: 716; CHECK-NEXT: ldr q1, [x10, lCPI26_1@PAGEOFF] 717; CHECK-NEXT: Lloh16: 718; CHECK-NEXT: ldr q3, [x11, lCPI26_2@PAGEOFF] 719; CHECK-NEXT: Lloh17: 720; CHECK-NEXT: ldr q4, [x12, lCPI26_3@PAGEOFF] 721; CHECK-NEXT: LBB26_1: ; %loop 722; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 723; CHECK-NEXT: ldr q5, [x0, x8] 724; CHECK-NEXT: add x8, x8, #16 725; CHECK-NEXT: cmp x8, #128 726; CHECK-NEXT: cmgt.16b v6, v5, v2 727; CHECK-NEXT: tbl.16b v7, { v5 }, v0 728; CHECK-NEXT: tbl.16b v16, { v5 }, v1 729; CHECK-NEXT: sshll2.8h v18, v6, #0 730; CHECK-NEXT: tbl.16b v17, { v5 }, v3 731; CHECK-NEXT: sshll2.4s v19, v18, #0 732; CHECK-NEXT: sshll.4s v18, v18, #0 733; CHECK-NEXT: tbl.16b v5, { v5 }, v4 734; CHECK-NEXT: sshll.8h v6, v6, #0 735; CHECK-NEXT: and.16b v7, v7, v19 736; CHECK-NEXT: and.16b v16, v16, v18 737; CHECK-NEXT: stp q16, q7, [x1, #32] 738; CHECK-NEXT: sshll2.4s v7, v6, #0 739; CHECK-NEXT: sshll.4s v6, v6, #0 740; CHECK-NEXT: and.16b v7, v17, v7 741; CHECK-NEXT: and.16b v5, v5, v6 742; CHECK-NEXT: stp q5, q7, [x1], #64 743; CHECK-NEXT: b.ne LBB26_1 744; CHECK-NEXT: ; %bb.2: ; %exit 745; CHECK-NEXT: ret 746; CHECK-NEXT: .loh AdrpLdr Lloh13, Lloh17 747; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh16 748; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh15 749; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh14 750entry: 751 br label %loop 752 753loop: 754 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 755 %src.gep = getelementptr i8, i8* %src, i64 %iv 756 %src.gep.cast = bitcast i8* %src.gep to <16 x i8>* 757 %load = load <16 x i8>, <16 x i8>* %src.gep.cast 758 %cmp = icmp sgt <16 x i8> %load, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 759 %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 1, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15> 760 %ext = bitcast <64 x i8> %ext.shuf to <16 x i32> 761 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 762 %dst.gep = getelementptr i32, i32* %dst, i64 %iv 763 %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>* 764 store <16 x i32> %sel, <16 x i32>* %dst.gep.cast 765 %iv.next = add nuw i64 %iv, 16 766 %ec = icmp eq i64 %iv.next, 128 767 br i1 %ec, label %exit, label %loop 768 769exit: 770 ret void 771} 772