1387d3c24SCraig Topper //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
2387d3c24SCraig Topper //
3387d3c24SCraig Topper // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4387d3c24SCraig Topper // See https://llvm.org/LICENSE.txt for license information.
5387d3c24SCraig Topper // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6387d3c24SCraig Topper //
7387d3c24SCraig Topper //===----------------------------------------------------------------------===//
8387d3c24SCraig Topper
9387d3c24SCraig Topper #include "RISCVMatInt.h"
10387d3c24SCraig Topper #include "MCTargetDesc/RISCVMCTargetDesc.h"
11387d3c24SCraig Topper #include "llvm/ADT/APInt.h"
12387d3c24SCraig Topper #include "llvm/Support/MathExtras.h"
13d61b40edSCraig Topper using namespace llvm;
14387d3c24SCraig Topper
getInstSeqCost(RISCVMatInt::InstSeq & Res,bool HasRVC)1581efb825SCraig Topper static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
1681efb825SCraig Topper if (!HasRVC)
1781efb825SCraig Topper return Res.size();
1881efb825SCraig Topper
1981efb825SCraig Topper int Cost = 0;
2081efb825SCraig Topper for (auto Instr : Res) {
21186d5c8aSCraig Topper // Assume instructions that aren't listed aren't compressible.
22186d5c8aSCraig Topper bool Compressed = false;
2381efb825SCraig Topper switch (Instr.Opc) {
2481efb825SCraig Topper case RISCV::SLLI:
2581efb825SCraig Topper case RISCV::SRLI:
2681efb825SCraig Topper Compressed = true;
2781efb825SCraig Topper break;
2881efb825SCraig Topper case RISCV::ADDI:
2981efb825SCraig Topper case RISCV::ADDIW:
3081efb825SCraig Topper case RISCV::LUI:
3181efb825SCraig Topper Compressed = isInt<6>(Instr.Imm);
3281efb825SCraig Topper break;
3381efb825SCraig Topper }
3481efb825SCraig Topper // Two RVC instructions take the same space as one RVI instruction, but
3581efb825SCraig Topper // can take longer to execute than the single RVI instruction. Thus, we
3681efb825SCraig Topper // consider that two RVC instruction are slightly more costly than one
3781efb825SCraig Topper // RVI instruction. For longer sequences of RVC instructions the space
3881efb825SCraig Topper // savings can be worth it, though. The costs below try to model that.
3981efb825SCraig Topper if (!Compressed)
4081efb825SCraig Topper Cost += 100; // Baseline cost of one RVI instruction: 100%.
4181efb825SCraig Topper else
4281efb825SCraig Topper Cost += 70; // 70% cost of baseline.
4381efb825SCraig Topper }
4481efb825SCraig Topper return Cost;
4581efb825SCraig Topper }
4681efb825SCraig Topper
47d61b40edSCraig Topper // Recursively generate a sequence for materializing an integer.
generateInstSeqImpl(int64_t Val,const FeatureBitset & ActiveFeatures,RISCVMatInt::InstSeq & Res)484dbb7880SCraig Topper static void generateInstSeqImpl(int64_t Val,
494dbb7880SCraig Topper const FeatureBitset &ActiveFeatures,
50d61b40edSCraig Topper RISCVMatInt::InstSeq &Res) {
514dbb7880SCraig Topper bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
524dbb7880SCraig Topper
53387d3c24SCraig Topper if (isInt<32>(Val)) {
54387d3c24SCraig Topper // Depending on the active bits in the immediate Value v, the following
55387d3c24SCraig Topper // instruction sequences are emitted:
56387d3c24SCraig Topper //
57387d3c24SCraig Topper // v == 0 : ADDI
58387d3c24SCraig Topper // v[0,12) != 0 && v[12,32) == 0 : ADDI
59387d3c24SCraig Topper // v[0,12) == 0 && v[12,32) != 0 : LUI
60387d3c24SCraig Topper // v[0,32) != 0 : LUI+ADDI(W)
61387d3c24SCraig Topper int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
62387d3c24SCraig Topper int64_t Lo12 = SignExtend64<12>(Val);
63387d3c24SCraig Topper
64387d3c24SCraig Topper if (Hi20)
65d61b40edSCraig Topper Res.push_back(RISCVMatInt::Inst(RISCV::LUI, Hi20));
66387d3c24SCraig Topper
67387d3c24SCraig Topper if (Lo12 || Hi20 == 0) {
68387d3c24SCraig Topper unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
69d61b40edSCraig Topper Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12));
70387d3c24SCraig Topper }
71387d3c24SCraig Topper return;
72387d3c24SCraig Topper }
73387d3c24SCraig Topper
74387d3c24SCraig Topper assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
75387d3c24SCraig Topper
769534811aSCraig Topper // Use BSETI for a single bit.
779534811aSCraig Topper if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val)) {
789534811aSCraig Topper Res.push_back(RISCVMatInt::Inst(RISCV::BSETI, Log2_64(Val)));
799534811aSCraig Topper return;
809534811aSCraig Topper }
819534811aSCraig Topper
82387d3c24SCraig Topper // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
831104e325SAlexander Pivovarov // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
84387d3c24SCraig Topper // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
85387d3c24SCraig Topper // while the following ADDI instructions contribute up to 12 bits each.
86387d3c24SCraig Topper //
87387d3c24SCraig Topper // On the first glance, implementing this seems to be possible by simply
88387d3c24SCraig Topper // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
89387d3c24SCraig Topper // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
90387d3c24SCraig Topper // fact that ADDI performs a sign extended addition, doing it like that would
91387d3c24SCraig Topper // only be possible when at most 11 bits of the ADDI instructions are used.
92387d3c24SCraig Topper // Using all 12 bits of the ADDI instructions, like done by GAS, actually
93387d3c24SCraig Topper // requires that the constant is processed starting with the least significant
94387d3c24SCraig Topper // bit.
95387d3c24SCraig Topper //
96387d3c24SCraig Topper // In the following, constants are processed from LSB to MSB but instruction
97387d3c24SCraig Topper // emission is performed from MSB to LSB by recursively calling
98387d3c24SCraig Topper // generateInstSeq. In each recursion, first the lowest 12 bits are removed
99387d3c24SCraig Topper // from the constant and the optimal shift amount, which can be greater than
100387d3c24SCraig Topper // 12 bits if the constant is sparse, is determined. Then, the shifted
101387d3c24SCraig Topper // remaining constant is processed recursively and gets emitted as soon as it
102387d3c24SCraig Topper // fits into 32 bits. The emission of the shifts and additions is subsequently
103387d3c24SCraig Topper // performed when the recursion returns.
104387d3c24SCraig Topper
105387d3c24SCraig Topper int64_t Lo12 = SignExtend64<12>(Val);
1065c383731SCraig Topper Val = (uint64_t)Val - (uint64_t)Lo12;
1075c383731SCraig Topper
1085c383731SCraig Topper int ShiftAmount = 0;
1095c383731SCraig Topper bool Unsigned = false;
1105c383731SCraig Topper
1115c383731SCraig Topper // Val might now be valid for LUI without needing a shift.
1125c383731SCraig Topper if (!isInt<32>(Val)) {
1135c383731SCraig Topper ShiftAmount = findFirstSet((uint64_t)Val);
1145c383731SCraig Topper Val >>= ShiftAmount;
115387d3c24SCraig Topper
11681efb825SCraig Topper // If the remaining bits don't fit in 12 bits, we might be able to reduce the
11781efb825SCraig Topper // shift amount in order to use LUI which will zero the lower 12 bits.
1185c383731SCraig Topper if (ShiftAmount > 12 && !isInt<12>(Val)) {
1195c383731SCraig Topper if (isInt<32>((uint64_t)Val << 12)) {
12081efb825SCraig Topper // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
12181efb825SCraig Topper ShiftAmount -= 12;
1225c383731SCraig Topper Val = (uint64_t)Val << 12;
1235c383731SCraig Topper } else if (isUInt<32>((uint64_t)Val << 12) &&
124481db13fSBen Shi ActiveFeatures[RISCV::FeatureStdExtZba]) {
125481db13fSBen Shi // Reduce the shift amount and add zeros to the LSBs so it will match
126481db13fSBen Shi // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
127481db13fSBen Shi ShiftAmount -= 12;
1285c383731SCraig Topper Val = ((uint64_t)Val << 12) | (0xffffffffull << 32);
129481db13fSBen Shi Unsigned = true;
130481db13fSBen Shi }
13181efb825SCraig Topper }
13281efb825SCraig Topper
1335c383731SCraig Topper // Try to use SLLI_UW for Val when it is uint32 but not int32.
1345c383731SCraig Topper if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) &&
13597e52e1cSBen Shi ActiveFeatures[RISCV::FeatureStdExtZba]) {
136588f121aSAlex Bradbury // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
137588f121aSAlex Bradbury // SLLI_UW.
1385c383731SCraig Topper Val = ((uint64_t)Val) | (0xffffffffull << 32);
13997e52e1cSBen Shi Unsigned = true;
14097e52e1cSBen Shi }
1415c383731SCraig Topper }
14297e52e1cSBen Shi
1435c383731SCraig Topper generateInstSeqImpl(Val, ActiveFeatures, Res);
144387d3c24SCraig Topper
1455c383731SCraig Topper // Skip shift if we were able to use LUI directly.
1465c383731SCraig Topper if (ShiftAmount) {
147481db13fSBen Shi if (Unsigned)
148588f121aSAlex Bradbury Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount));
149481db13fSBen Shi else
150d61b40edSCraig Topper Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
1515c383731SCraig Topper }
1525c383731SCraig Topper
153387d3c24SCraig Topper if (Lo12)
154d61b40edSCraig Topper Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
155d61b40edSCraig Topper }
156d61b40edSCraig Topper
extractRotateInfo(int64_t Val)157af931a51SBaoshan Pang static unsigned extractRotateInfo(int64_t Val) {
158af931a51SBaoshan Pang // for case: 0b111..1..xxxxxx1..1..
159af931a51SBaoshan Pang unsigned LeadingOnes = countLeadingOnes((uint64_t)Val);
160af931a51SBaoshan Pang unsigned TrailingOnes = countTrailingOnes((uint64_t)Val);
161af931a51SBaoshan Pang if (TrailingOnes > 0 && TrailingOnes < 64 &&
162af931a51SBaoshan Pang (LeadingOnes + TrailingOnes) > (64 - 12))
163af931a51SBaoshan Pang return 64 - TrailingOnes;
164af931a51SBaoshan Pang
165af931a51SBaoshan Pang // for case: 0bxxx1..1..1...xxx
166af931a51SBaoshan Pang unsigned UpperTrailingOnes = countTrailingOnes(Hi_32(Val));
167af931a51SBaoshan Pang unsigned LowerLeadingOnes = countLeadingOnes(Lo_32(Val));
168af931a51SBaoshan Pang if (UpperTrailingOnes < 32 &&
169af931a51SBaoshan Pang (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12))
170af931a51SBaoshan Pang return 32 - UpperTrailingOnes;
171af931a51SBaoshan Pang
172af931a51SBaoshan Pang return 0;
173af931a51SBaoshan Pang }
174af931a51SBaoshan Pang
175d61b40edSCraig Topper namespace llvm {
176d61b40edSCraig Topper namespace RISCVMatInt {
generateInstSeq(int64_t Val,const FeatureBitset & ActiveFeatures)1774dbb7880SCraig Topper InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
178d61b40edSCraig Topper RISCVMatInt::InstSeq Res;
1794dbb7880SCraig Topper generateInstSeqImpl(Val, ActiveFeatures, Res);
180d61b40edSCraig Topper
18198b86689SCraig Topper // If there are trailing zeros, try generating a sign extended constant with
18298b86689SCraig Topper // no trailing zeros and use a final SLLI to restore them.
18398b86689SCraig Topper if ((Val & 1) == 0 && Res.size() > 2) {
18498b86689SCraig Topper unsigned TrailingZeros = countTrailingZeros((uint64_t)Val);
18598b86689SCraig Topper int64_t ShiftedVal = Val >> TrailingZeros;
18698b86689SCraig Topper RISCVMatInt::InstSeq TmpSeq;
18798b86689SCraig Topper generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
18898b86689SCraig Topper TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SLLI, TrailingZeros));
18998b86689SCraig Topper
19098b86689SCraig Topper // Keep the new sequence if it is an improvement.
19198b86689SCraig Topper if (TmpSeq.size() < Res.size()) {
19298b86689SCraig Topper Res = TmpSeq;
19398b86689SCraig Topper // A 2 instruction sequence is the best we can do.
19498b86689SCraig Topper if (Res.size() <= 2)
19598b86689SCraig Topper return Res;
19698b86689SCraig Topper }
19798b86689SCraig Topper }
19898b86689SCraig Topper
199d61b40edSCraig Topper // If the constant is positive we might be able to generate a shifted constant
200d61b40edSCraig Topper // with no leading zeros and use a final SRLI to restore them.
201d61b40edSCraig Topper if (Val > 0 && Res.size() > 2) {
2024dbb7880SCraig Topper assert(ActiveFeatures[RISCV::Feature64Bit] &&
2034dbb7880SCraig Topper "Expected RV32 to only need 2 instructions");
2044dbb7880SCraig Topper unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
2054dbb7880SCraig Topper uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
206d61b40edSCraig Topper // Fill in the bits that will be shifted out with 1s. An example where this
207d61b40edSCraig Topper // helps is trailing one masks with 32 or more ones. This will generate
208d61b40edSCraig Topper // ADDI -1 and an SRLI.
2094dbb7880SCraig Topper ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
210d61b40edSCraig Topper
211d61b40edSCraig Topper RISCVMatInt::InstSeq TmpSeq;
2124dbb7880SCraig Topper generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
2134dbb7880SCraig Topper TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
214d61b40edSCraig Topper
215d61b40edSCraig Topper // Keep the new sequence if it is an improvement.
2164dbb7880SCraig Topper if (TmpSeq.size() < Res.size()) {
217d61b40edSCraig Topper Res = TmpSeq;
2184dbb7880SCraig Topper // A 2 instruction sequence is the best we can do.
2194dbb7880SCraig Topper if (Res.size() <= 2)
2204dbb7880SCraig Topper return Res;
2214dbb7880SCraig Topper }
222d7ffa82aSCraig Topper
223d7ffa82aSCraig Topper // Some cases can benefit from filling the lower bits with zeros instead.
2244dbb7880SCraig Topper ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
225d7ffa82aSCraig Topper TmpSeq.clear();
2264dbb7880SCraig Topper generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
2274dbb7880SCraig Topper TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
228d7ffa82aSCraig Topper
229d7ffa82aSCraig Topper // Keep the new sequence if it is an improvement.
2304dbb7880SCraig Topper if (TmpSeq.size() < Res.size()) {
231d7ffa82aSCraig Topper Res = TmpSeq;
2324dbb7880SCraig Topper // A 2 instruction sequence is the best we can do.
2334dbb7880SCraig Topper if (Res.size() <= 2)
2344dbb7880SCraig Topper return Res;
2354dbb7880SCraig Topper }
2364dbb7880SCraig Topper
2374dbb7880SCraig Topper // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
2384dbb7880SCraig Topper // the end of the sequence.
239f2933610SJim Lin if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
2404dbb7880SCraig Topper // Try replacing upper bits with 1.
2414dbb7880SCraig Topper uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
2424dbb7880SCraig Topper TmpSeq.clear();
2434dbb7880SCraig Topper generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
244588f121aSAlex Bradbury TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADD_UW, 0));
2454dbb7880SCraig Topper
2464dbb7880SCraig Topper // Keep the new sequence if it is an improvement.
2474dbb7880SCraig Topper if (TmpSeq.size() < Res.size()) {
2484dbb7880SCraig Topper Res = TmpSeq;
2494dbb7880SCraig Topper // A 2 instruction sequence is the best we can do.
2504dbb7880SCraig Topper if (Res.size() <= 2)
2514dbb7880SCraig Topper return Res;
2524dbb7880SCraig Topper }
2534dbb7880SCraig Topper }
254d61b40edSCraig Topper }
255d61b40edSCraig Topper
2567e815261SBen Shi // Perform optimization with BCLRI/BSETI in the Zbs extension.
2577e815261SBen Shi if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
2587e815261SBen Shi assert(ActiveFeatures[RISCV::Feature64Bit] &&
2597e815261SBen Shi "Expected RV32 to only need 2 instructions");
2607e815261SBen Shi
261787eeb85SBen Shi // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
262787eeb85SBen Shi // call generateInstSeqImpl with Val|0x80000000 (which is expected be
263787eeb85SBen Shi // an int32), then emit (BCLRI r, 31).
264787eeb85SBen Shi // 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
265787eeb85SBen Shi // with Val&~0x80000000 (which is expected to be an int32), then
266787eeb85SBen Shi // emit (BSETI r, 31).
267787eeb85SBen Shi int64_t NewVal;
268787eeb85SBen Shi unsigned Opc;
269787eeb85SBen Shi if (Val < 0) {
270787eeb85SBen Shi Opc = RISCV::BCLRI;
271787eeb85SBen Shi NewVal = Val | 0x80000000ll;
272787eeb85SBen Shi } else {
273787eeb85SBen Shi Opc = RISCV::BSETI;
274787eeb85SBen Shi NewVal = Val & ~0x80000000ll;
275787eeb85SBen Shi }
276787eeb85SBen Shi if (isInt<32>(NewVal)) {
277787eeb85SBen Shi RISCVMatInt::InstSeq TmpSeq;
278787eeb85SBen Shi generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
279787eeb85SBen Shi TmpSeq.push_back(RISCVMatInt::Inst(Opc, 31));
280787eeb85SBen Shi if (TmpSeq.size() < Res.size())
281787eeb85SBen Shi Res = TmpSeq;
282787eeb85SBen Shi }
2837e815261SBen Shi
2847e815261SBen Shi // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
2857e815261SBen Shi // negative int32, or use BSETI for upper 32 bits if the original lower
2867e815261SBen Shi // 32 bits are positive int32.
2877e815261SBen Shi int32_t Lo = Val;
2887e815261SBen Shi uint32_t Hi = Val >> 32;
2897e815261SBen Shi Opc = 0;
2907e815261SBen Shi RISCVMatInt::InstSeq TmpSeq;
2917e815261SBen Shi generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
2927e815261SBen Shi // Check if it is profitable to use BCLRI/BSETI.
2937e815261SBen Shi if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) {
2947e815261SBen Shi Opc = RISCV::BSETI;
2957e815261SBen Shi } else if (Lo < 0 && TmpSeq.size() + countPopulation(~Hi) < Res.size()) {
2967e815261SBen Shi Opc = RISCV::BCLRI;
2977e815261SBen Shi Hi = ~Hi;
2987e815261SBen Shi }
2997e815261SBen Shi // Search for each bit and build corresponding BCLRI/BSETI.
3007e815261SBen Shi if (Opc > 0) {
3017e815261SBen Shi while (Hi != 0) {
3027e815261SBen Shi unsigned Bit = countTrailingZeros(Hi);
3037e815261SBen Shi TmpSeq.push_back(RISCVMatInt::Inst(Opc, Bit + 32));
3047e815261SBen Shi Hi &= ~(1 << Bit);
3057e815261SBen Shi }
3067e815261SBen Shi if (TmpSeq.size() < Res.size())
3077e815261SBen Shi Res = TmpSeq;
3087e815261SBen Shi }
309787eeb85SBen Shi }
310787eeb85SBen Shi
3114fe5ab4bSBen Shi // Perform optimization with SH*ADD in the Zba extension.
3124fe5ab4bSBen Shi if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
3134fe5ab4bSBen Shi assert(ActiveFeatures[RISCV::Feature64Bit] &&
3144fe5ab4bSBen Shi "Expected RV32 to only need 2 instructions");
3154fe5ab4bSBen Shi int64_t Div = 0;
3164fe5ab4bSBen Shi unsigned Opc = 0;
3174fe5ab4bSBen Shi RISCVMatInt::InstSeq TmpSeq;
3184fe5ab4bSBen Shi // Select the opcode and divisor.
3194fe5ab4bSBen Shi if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
3204fe5ab4bSBen Shi Div = 3;
3214fe5ab4bSBen Shi Opc = RISCV::SH1ADD;
3224fe5ab4bSBen Shi } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) {
3234fe5ab4bSBen Shi Div = 5;
3244fe5ab4bSBen Shi Opc = RISCV::SH2ADD;
3254fe5ab4bSBen Shi } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) {
3264fe5ab4bSBen Shi Div = 9;
3274fe5ab4bSBen Shi Opc = RISCV::SH3ADD;
3284fe5ab4bSBen Shi }
3294fe5ab4bSBen Shi // Build the new instruction sequence.
3304fe5ab4bSBen Shi if (Div > 0) {
3314fe5ab4bSBen Shi generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
3324fe5ab4bSBen Shi TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
3334fe5ab4bSBen Shi if (TmpSeq.size() < Res.size())
3344fe5ab4bSBen Shi Res = TmpSeq;
33570046438SCraig Topper } else {
3364c3d916cSBen Shi // Try to use LUI+SH*ADD+ADDI.
3374c3d916cSBen Shi int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
3384c3d916cSBen Shi int64_t Lo12 = SignExtend64<12>(Val);
3394c3d916cSBen Shi Div = 0;
3404c3d916cSBen Shi if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
3414c3d916cSBen Shi Div = 3;
3424c3d916cSBen Shi Opc = RISCV::SH1ADD;
3434c3d916cSBen Shi } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
3444c3d916cSBen Shi Div = 5;
3454c3d916cSBen Shi Opc = RISCV::SH2ADD;
3464c3d916cSBen Shi } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
3474c3d916cSBen Shi Div = 9;
3484c3d916cSBen Shi Opc = RISCV::SH3ADD;
3494c3d916cSBen Shi }
3504c3d916cSBen Shi // Build the new instruction sequence.
3514c3d916cSBen Shi if (Div > 0) {
3524c3d916cSBen Shi // For Val that has zero Lo12 (implies Val equals to Hi52) should has
3534c3d916cSBen Shi // already been processed to LUI+SH*ADD by previous optimization.
3544c3d916cSBen Shi assert(Lo12 != 0 &&
3554c3d916cSBen Shi "unexpected instruction sequence for immediate materialisation");
35670046438SCraig Topper assert(TmpSeq.empty() && "Expected empty TmpSeq");
3574c3d916cSBen Shi generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
3584c3d916cSBen Shi TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
3594c3d916cSBen Shi TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
3604c3d916cSBen Shi if (TmpSeq.size() < Res.size())
3614c3d916cSBen Shi Res = TmpSeq;
3624c3d916cSBen Shi }
3634fe5ab4bSBen Shi }
36470046438SCraig Topper }
3654fe5ab4bSBen Shi
366af931a51SBaoshan Pang // Perform optimization with rori in the Zbb extension.
367af931a51SBaoshan Pang if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) {
368af931a51SBaoshan Pang if (unsigned Rotate = extractRotateInfo(Val)) {
369af931a51SBaoshan Pang RISCVMatInt::InstSeq TmpSeq;
370af931a51SBaoshan Pang uint64_t NegImm12 =
371af931a51SBaoshan Pang ((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate);
372af931a51SBaoshan Pang assert(isInt<12>(NegImm12));
373af931a51SBaoshan Pang TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, NegImm12));
374af931a51SBaoshan Pang TmpSeq.push_back(RISCVMatInt::Inst(RISCV::RORI, Rotate));
375af931a51SBaoshan Pang Res = TmpSeq;
376af931a51SBaoshan Pang }
377af931a51SBaoshan Pang }
378d61b40edSCraig Topper return Res;
379387d3c24SCraig Topper }
380387d3c24SCraig Topper
getIntMatCost(const APInt & Val,unsigned Size,const FeatureBitset & ActiveFeatures,bool CompressionCost)3814dbb7880SCraig Topper int getIntMatCost(const APInt &Val, unsigned Size,
38297e52e1cSBen Shi const FeatureBitset &ActiveFeatures, bool CompressionCost) {
3834dbb7880SCraig Topper bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
38481efb825SCraig Topper bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC];
385387d3c24SCraig Topper int PlatRegSize = IsRV64 ? 64 : 32;
386387d3c24SCraig Topper
387387d3c24SCraig Topper // Split the constant into platform register sized chunks, and calculate cost
388387d3c24SCraig Topper // of each chunk.
389387d3c24SCraig Topper int Cost = 0;
390387d3c24SCraig Topper for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
391387d3c24SCraig Topper APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
3924dbb7880SCraig Topper InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
39381efb825SCraig Topper Cost += getInstSeqCost(MatSeq, HasRVC);
394387d3c24SCraig Topper }
395387d3c24SCraig Topper return std::max(1, Cost);
396387d3c24SCraig Topper }
397*d2ee2c9cSCraig Topper
getOpndKind() const398*d2ee2c9cSCraig Topper OpndKind Inst::getOpndKind() const {
399*d2ee2c9cSCraig Topper switch (Opc) {
400*d2ee2c9cSCraig Topper default:
401*d2ee2c9cSCraig Topper llvm_unreachable("Unexpected opcode!");
402*d2ee2c9cSCraig Topper case RISCV::LUI:
403*d2ee2c9cSCraig Topper return RISCVMatInt::Imm;
404*d2ee2c9cSCraig Topper case RISCV::ADD_UW:
405*d2ee2c9cSCraig Topper return RISCVMatInt::RegX0;
406*d2ee2c9cSCraig Topper case RISCV::SH1ADD:
407*d2ee2c9cSCraig Topper case RISCV::SH2ADD:
408*d2ee2c9cSCraig Topper case RISCV::SH3ADD:
409*d2ee2c9cSCraig Topper return RISCVMatInt::RegReg;
410*d2ee2c9cSCraig Topper case RISCV::ADDI:
411*d2ee2c9cSCraig Topper case RISCV::ADDIW:
412*d2ee2c9cSCraig Topper case RISCV::SLLI:
413*d2ee2c9cSCraig Topper case RISCV::SRLI:
414*d2ee2c9cSCraig Topper case RISCV::SLLI_UW:
415*d2ee2c9cSCraig Topper case RISCV::RORI:
416*d2ee2c9cSCraig Topper case RISCV::BSETI:
417*d2ee2c9cSCraig Topper case RISCV::BCLRI:
418*d2ee2c9cSCraig Topper return RISCVMatInt::RegImm;
419*d2ee2c9cSCraig Topper }
420*d2ee2c9cSCraig Topper }
421*d2ee2c9cSCraig Topper
422387d3c24SCraig Topper } // namespace RISCVMatInt
423387d3c24SCraig Topper } // namespace llvm
424