1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s 3; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s 4 5define i16 @mul_i16(i16 %lhs, i16 %rhs) { 6; SI-LABEL: @mul_i16( 7; SI-NEXT: [[TMP1:%.*]] = zext i16 [[LHS:%.*]] to i32 8; SI-NEXT: [[TMP2:%.*]] = zext i16 [[RHS:%.*]] to i32 9; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) 10; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 11; SI-NEXT: ret i16 [[TMP4]] 12; 13; VI-LABEL: @mul_i16( 14; VI-NEXT: [[MUL:%.*]] = mul i16 [[LHS:%.*]], [[RHS:%.*]] 15; VI-NEXT: ret i16 [[MUL]] 16; 17 %mul = mul i16 %lhs, %rhs 18 ret i16 %mul 19} 20 21define i32 @smul24_i32(i32 %lhs, i32 %rhs) { 22; SI-LABEL: @smul24_i32( 23; SI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 8 24; SI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8 25; SI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8 26; SI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8 27; SI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]]) 28; SI-NEXT: ret i32 [[TMP1]] 29; 30; VI-LABEL: @smul24_i32( 31; VI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 8 32; VI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8 33; VI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8 34; VI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8 35; VI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]]) 36; VI-NEXT: ret i32 [[TMP1]] 37; 38 %shl.lhs = shl i32 %lhs, 8 39 %lhs24 = ashr i32 %shl.lhs, 8 40 %lshr.rhs = shl i32 %rhs, 8 41 %rhs24 = ashr i32 %lhs, 8 42 %mul = mul i32 %lhs24, %rhs24 43 ret i32 %mul 44} 45 46define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 47; SI-LABEL: @smul24_v2i32( 48; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8> 49; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8> 50; SI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8> 51; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8> 52; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 53; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 54; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 55; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1 56; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]]) 57; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]]) 58; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0 59; SI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1 60; SI-NEXT: ret <2 x i32> [[TMP8]] 61; 62; VI-LABEL: @smul24_v2i32( 63; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8> 64; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8> 65; VI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8> 66; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8> 67; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 68; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 69; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 70; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1 71; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]]) 72; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]]) 73; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0 74; VI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1 75; VI-NEXT: ret <2 x i32> [[TMP8]] 76; 77 %shl.lhs = shl <2 x i32> %lhs, <i32 8, i32 8> 78 %lhs24 = ashr <2 x i32> %shl.lhs, <i32 8, i32 8> 79 %lshr.rhs = shl <2 x i32> %rhs, <i32 8, i32 8> 80 %rhs24 = ashr <2 x i32> %lhs, <i32 8, i32 8> 81 %mul = mul <2 x i32> %lhs24, %rhs24 82 ret <2 x i32> %mul 83} 84 85define i32 @umul24_i32(i32 %lhs, i32 %rhs) { 86; SI-LABEL: @umul24_i32( 87; SI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215 88; SI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215 89; SI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]]) 90; SI-NEXT: ret i32 [[TMP1]] 91; 92; VI-LABEL: @umul24_i32( 93; VI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215 94; VI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215 95; VI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]]) 96; VI-NEXT: ret i32 [[TMP1]] 97; 98 %lhs24 = and i32 %lhs, 16777215 99 %rhs24 = and i32 %rhs, 16777215 100 %mul = mul i32 %lhs24, %rhs24 101 ret i32 %mul 102} 103 104define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 105; SI-LABEL: @umul24_v2i32( 106; SI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215> 107; SI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215> 108; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 109; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 110; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 111; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1 112; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]]) 113; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]]) 114; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0 115; SI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1 116; SI-NEXT: ret <2 x i32> [[TMP8]] 117; 118; VI-LABEL: @umul24_v2i32( 119; VI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215> 120; VI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215> 121; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 122; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 123; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 124; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1 125; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]]) 126; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]]) 127; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0 128; VI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1 129; VI-NEXT: ret <2 x i32> [[TMP8]] 130; 131 %lhs24 = and <2 x i32> %lhs, <i32 16777215, i32 16777215> 132 %rhs24 = and <2 x i32> %rhs, <i32 16777215, i32 16777215> 133 %mul = mul <2 x i32> %lhs24, %rhs24 134 ret <2 x i32> %mul 135} 136 137define i64 @smul24_i64(i64 %lhs, i64 %rhs) { 138; SI-LABEL: @smul24_i64( 139; SI-NEXT: [[SHL_LHS:%.*]] = shl i64 [[LHS:%.*]], 40 140; SI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40 141; SI-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40 142; SI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40 143; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 144; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 145; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) 146; SI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 147; SI-NEXT: ret i64 [[TMP4]] 148; 149; VI-LABEL: @smul24_i64( 150; VI-NEXT: [[SHL_LHS:%.*]] = shl i64 [[LHS:%.*]], 40 151; VI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40 152; VI-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40 153; VI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40 154; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 155; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 156; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) 157; VI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 158; VI-NEXT: ret i64 [[TMP4]] 159; 160 %shl.lhs = shl i64 %lhs, 40 161 %lhs24 = ashr i64 %shl.lhs, 40 162 %lshr.rhs = shl i64 %rhs, 40 163 %rhs24 = ashr i64 %lhs, 40 164 %mul = mul i64 %lhs24, %rhs24 165 ret i64 %mul 166} 167 168define i64 @umul24_i64(i64 %lhs, i64 %rhs) { 169; SI-LABEL: @umul24_i64( 170; SI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215 171; SI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215 172; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 173; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 174; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) 175; SI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 176; SI-NEXT: ret i64 [[TMP4]] 177; 178; VI-LABEL: @umul24_i64( 179; VI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215 180; VI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215 181; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 182; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 183; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) 184; VI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 185; VI-NEXT: ret i64 [[TMP4]] 186; 187 %lhs24 = and i64 %lhs, 16777215 188 %rhs24 = and i64 %rhs, 16777215 189 %mul = mul i64 %lhs24, %rhs24 190 ret i64 %mul 191} 192 193define i31 @smul24_i31(i31 %lhs, i31 %rhs) { 194; SI-LABEL: @smul24_i31( 195; SI-NEXT: [[SHL_LHS:%.*]] = shl i31 [[LHS:%.*]], 7 196; SI-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7 197; SI-NEXT: [[LSHR_RHS:%.*]] = shl i31 [[RHS:%.*]], 7 198; SI-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7 199; SI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32 200; SI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32 201; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) 202; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31 203; SI-NEXT: ret i31 [[TMP4]] 204; 205; VI-LABEL: @smul24_i31( 206; VI-NEXT: [[SHL_LHS:%.*]] = shl i31 [[LHS:%.*]], 7 207; VI-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7 208; VI-NEXT: [[LSHR_RHS:%.*]] = shl i31 [[RHS:%.*]], 7 209; VI-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7 210; VI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32 211; VI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32 212; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) 213; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31 214; VI-NEXT: ret i31 [[TMP4]] 215; 216 %shl.lhs = shl i31 %lhs, 7 217 %lhs24 = ashr i31 %shl.lhs, 7 218 %lshr.rhs = shl i31 %rhs, 7 219 %rhs24 = ashr i31 %lhs, 7 220 %mul = mul i31 %lhs24, %rhs24 221 ret i31 %mul 222} 223 224define i31 @umul24_i31(i31 %lhs, i31 %rhs) { 225; SI-LABEL: @umul24_i31( 226; SI-NEXT: [[LHS24:%.*]] = and i31 [[LHS:%.*]], 16777215 227; SI-NEXT: [[RHS24:%.*]] = and i31 [[RHS:%.*]], 16777215 228; SI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32 229; SI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32 230; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) 231; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31 232; SI-NEXT: ret i31 [[TMP4]] 233; 234; VI-LABEL: @umul24_i31( 235; VI-NEXT: [[LHS24:%.*]] = and i31 [[LHS:%.*]], 16777215 236; VI-NEXT: [[RHS24:%.*]] = and i31 [[RHS:%.*]], 16777215 237; VI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32 238; VI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32 239; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) 240; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31 241; VI-NEXT: ret i31 [[TMP4]] 242; 243 %lhs24 = and i31 %lhs, 16777215 244 %rhs24 = and i31 %rhs, 16777215 245 %mul = mul i31 %lhs24, %rhs24 246 ret i31 %mul 247} 248 249define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { 250; SI-LABEL: @umul24_v2i31( 251; SI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], <i31 16777215, i31 16777215> 252; SI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], <i31 16777215, i31 16777215> 253; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 254; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 255; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 256; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1 257; SI-NEXT: [[TMP5:%.*]] = zext i31 [[TMP1]] to i32 258; SI-NEXT: [[TMP6:%.*]] = zext i31 [[TMP3]] to i32 259; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]]) 260; SI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31 261; SI-NEXT: [[TMP9:%.*]] = zext i31 [[TMP2]] to i32 262; SI-NEXT: [[TMP10:%.*]] = zext i31 [[TMP4]] to i32 263; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]]) 264; SI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31 265; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0 266; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1 267; SI-NEXT: ret <2 x i31> [[TMP14]] 268; 269; VI-LABEL: @umul24_v2i31( 270; VI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], <i31 16777215, i31 16777215> 271; VI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], <i31 16777215, i31 16777215> 272; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 273; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 274; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 275; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1 276; VI-NEXT: [[TMP5:%.*]] = zext i31 [[TMP1]] to i32 277; VI-NEXT: [[TMP6:%.*]] = zext i31 [[TMP3]] to i32 278; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]]) 279; VI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31 280; VI-NEXT: [[TMP9:%.*]] = zext i31 [[TMP2]] to i32 281; VI-NEXT: [[TMP10:%.*]] = zext i31 [[TMP4]] to i32 282; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]]) 283; VI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31 284; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0 285; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1 286; VI-NEXT: ret <2 x i31> [[TMP14]] 287; 288 %lhs24 = and <2 x i31> %lhs, <i31 16777215, i31 16777215> 289 %rhs24 = and <2 x i31> %rhs, <i31 16777215, i31 16777215> 290 %mul = mul <2 x i31> %lhs24, %rhs24 291 ret <2 x i31> %mul 292} 293 294define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { 295; SI-LABEL: @smul24_v2i31( 296; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], <i31 8, i31 8> 297; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8> 298; SI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], <i31 8, i31 8> 299; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8> 300; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 301; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 302; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 303; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1 304; SI-NEXT: [[TMP5:%.*]] = sext i31 [[TMP1]] to i32 305; SI-NEXT: [[TMP6:%.*]] = sext i31 [[TMP3]] to i32 306; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]]) 307; SI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31 308; SI-NEXT: [[TMP9:%.*]] = sext i31 [[TMP2]] to i32 309; SI-NEXT: [[TMP10:%.*]] = sext i31 [[TMP4]] to i32 310; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]]) 311; SI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31 312; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0 313; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1 314; SI-NEXT: ret <2 x i31> [[TMP14]] 315; 316; VI-LABEL: @smul24_v2i31( 317; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], <i31 8, i31 8> 318; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8> 319; VI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], <i31 8, i31 8> 320; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8> 321; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 322; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 323; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 324; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1 325; VI-NEXT: [[TMP5:%.*]] = sext i31 [[TMP1]] to i32 326; VI-NEXT: [[TMP6:%.*]] = sext i31 [[TMP3]] to i32 327; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]]) 328; VI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31 329; VI-NEXT: [[TMP9:%.*]] = sext i31 [[TMP2]] to i32 330; VI-NEXT: [[TMP10:%.*]] = sext i31 [[TMP4]] to i32 331; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]]) 332; VI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31 333; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0 334; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1 335; VI-NEXT: ret <2 x i31> [[TMP14]] 336; 337 %shl.lhs = shl <2 x i31> %lhs, <i31 8, i31 8> 338 %lhs24 = ashr <2 x i31> %shl.lhs, <i31 8, i31 8> 339 %lshr.rhs = shl <2 x i31> %rhs, <i31 8, i31 8> 340 %rhs24 = ashr <2 x i31> %lhs, <i31 8, i31 8> 341 %mul = mul <2 x i31> %lhs24, %rhs24 342 ret <2 x i31> %mul 343} 344 345define i33 @smul24_i33(i33 %lhs, i33 %rhs) { 346; SI-LABEL: @smul24_i33( 347; SI-NEXT: [[SHL_LHS:%.*]] = shl i33 [[LHS:%.*]], 9 348; SI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9 349; SI-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9 350; SI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9 351; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 352; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 353; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) 354; SI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i33 355; SI-NEXT: ret i33 [[TMP4]] 356; 357; VI-LABEL: @smul24_i33( 358; VI-NEXT: [[SHL_LHS:%.*]] = shl i33 [[LHS:%.*]], 9 359; VI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9 360; VI-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9 361; VI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9 362; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 363; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 364; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) 365; VI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i33 366; VI-NEXT: ret i33 [[TMP4]] 367; 368 %shl.lhs = shl i33 %lhs, 9 369 %lhs24 = ashr i33 %shl.lhs, 9 370 %lshr.rhs = shl i33 %rhs, 9 371 %rhs24 = ashr i33 %lhs, 9 372 %mul = mul i33 %lhs24, %rhs24 373 ret i33 %mul 374} 375 376define i33 @umul24_i33(i33 %lhs, i33 %rhs) { 377; SI-LABEL: @umul24_i33( 378; SI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215 379; SI-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215 380; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 381; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 382; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) 383; SI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i33 384; SI-NEXT: ret i33 [[TMP4]] 385; 386; VI-LABEL: @umul24_i33( 387; VI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215 388; VI-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215 389; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 390; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 391; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) 392; VI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i33 393; VI-NEXT: ret i33 [[TMP4]] 394; 395 %lhs24 = and i33 %lhs, 16777215 396 %rhs24 = and i33 %rhs, 16777215 397 %mul = mul i33 %lhs24, %rhs24 398 ret i33 %mul 399} 400 401define i32 @smul25_i32(i32 %lhs, i32 %rhs) { 402; SI-LABEL: @smul25_i32( 403; SI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 7 404; SI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 7 405; SI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 7 406; SI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 7 407; SI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]] 408; SI-NEXT: ret i32 [[MUL]] 409; 410; VI-LABEL: @smul25_i32( 411; VI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 7 412; VI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 7 413; VI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 7 414; VI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 7 415; VI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]] 416; VI-NEXT: ret i32 [[MUL]] 417; 418 %shl.lhs = shl i32 %lhs, 7 419 %lhs24 = ashr i32 %shl.lhs, 7 420 %lshr.rhs = shl i32 %rhs, 7 421 %rhs24 = ashr i32 %lhs, 7 422 %mul = mul i32 %lhs24, %rhs24 423 ret i32 %mul 424} 425 426define i32 @umul25_i32(i32 %lhs, i32 %rhs) { 427; SI-LABEL: @umul25_i32( 428; SI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 33554431 429; SI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 33554431 430; SI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]] 431; SI-NEXT: ret i32 [[MUL]] 432; 433; VI-LABEL: @umul25_i32( 434; VI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 33554431 435; VI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 33554431 436; VI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]] 437; VI-NEXT: ret i32 [[MUL]] 438; 439 %lhs24 = and i32 %lhs, 33554431 440 %rhs24 = and i32 %rhs, 33554431 441 %mul = mul i32 %lhs24, %rhs24 442 ret i32 %mul 443} 444 445define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) { 446; SI-LABEL: @smul24_v2i33( 447; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], <i33 9, i33 9> 448; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9> 449; SI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], <i33 9, i33 9> 450; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9> 451; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 452; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 453; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 454; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1 455; SI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32 456; SI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32 457; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]]) 458; SI-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i33 459; SI-NEXT: [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32 460; SI-NEXT: [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32 461; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]]) 462; SI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33 463; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0 464; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1 465; SI-NEXT: ret <2 x i33> [[TMP14]] 466; 467; VI-LABEL: @smul24_v2i33( 468; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], <i33 9, i33 9> 469; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9> 470; VI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], <i33 9, i33 9> 471; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9> 472; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 473; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 474; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 475; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1 476; VI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32 477; VI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32 478; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]]) 479; VI-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i33 480; VI-NEXT: [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32 481; VI-NEXT: [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32 482; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]]) 483; VI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33 484; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0 485; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1 486; VI-NEXT: ret <2 x i33> [[TMP14]] 487; 488 %shl.lhs = shl <2 x i33> %lhs, <i33 9, i33 9> 489 %lhs24 = ashr <2 x i33> %shl.lhs, <i33 9, i33 9> 490 %lshr.rhs = shl <2 x i33> %rhs, <i33 9, i33 9> 491 %rhs24 = ashr <2 x i33> %lhs, <i33 9, i33 9> 492 %mul = mul <2 x i33> %lhs24, %rhs24 493 ret <2 x i33> %mul 494} 495