1*0b57cec5SDimitry Andric //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // Handling of format string in scanf and friends. The structure of format
10*0b57cec5SDimitry Andric // strings for fscanf() are described in C99 7.19.6.2.
11*0b57cec5SDimitry Andric //
12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric
14*0b57cec5SDimitry Andric #include "clang/AST/FormatString.h"
15*0b57cec5SDimitry Andric #include "FormatStringParsing.h"
16*0b57cec5SDimitry Andric #include "clang/Basic/TargetInfo.h"
17*0b57cec5SDimitry Andric
18*0b57cec5SDimitry Andric using clang::analyze_format_string::ArgType;
19*0b57cec5SDimitry Andric using clang::analyze_format_string::FormatStringHandler;
20*0b57cec5SDimitry Andric using clang::analyze_format_string::LengthModifier;
21*0b57cec5SDimitry Andric using clang::analyze_format_string::OptionalAmount;
22*0b57cec5SDimitry Andric using clang::analyze_format_string::ConversionSpecifier;
23*0b57cec5SDimitry Andric using clang::analyze_scanf::ScanfConversionSpecifier;
24*0b57cec5SDimitry Andric using clang::analyze_scanf::ScanfSpecifier;
25*0b57cec5SDimitry Andric using clang::UpdateOnReturn;
26*0b57cec5SDimitry Andric using namespace clang;
27*0b57cec5SDimitry Andric
28*0b57cec5SDimitry Andric typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29*0b57cec5SDimitry Andric ScanfSpecifierResult;
30*0b57cec5SDimitry Andric
ParseScanList(FormatStringHandler & H,ScanfConversionSpecifier & CS,const char * & Beg,const char * E)31*0b57cec5SDimitry Andric static bool ParseScanList(FormatStringHandler &H,
32*0b57cec5SDimitry Andric ScanfConversionSpecifier &CS,
33*0b57cec5SDimitry Andric const char *&Beg, const char *E) {
34*0b57cec5SDimitry Andric const char *I = Beg;
35*0b57cec5SDimitry Andric const char *start = I - 1;
36*0b57cec5SDimitry Andric UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37*0b57cec5SDimitry Andric
38*0b57cec5SDimitry Andric // No more characters?
39*0b57cec5SDimitry Andric if (I == E) {
40*0b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I);
41*0b57cec5SDimitry Andric return true;
42*0b57cec5SDimitry Andric }
43*0b57cec5SDimitry Andric
44*0b57cec5SDimitry Andric // Special case: ']' is the first character.
45*0b57cec5SDimitry Andric if (*I == ']') {
46*0b57cec5SDimitry Andric if (++I == E) {
47*0b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1);
48*0b57cec5SDimitry Andric return true;
49*0b57cec5SDimitry Andric }
50*0b57cec5SDimitry Andric }
51*0b57cec5SDimitry Andric
52*0b57cec5SDimitry Andric // Special case: "^]" are the first characters.
53*0b57cec5SDimitry Andric if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54*0b57cec5SDimitry Andric I += 2;
55*0b57cec5SDimitry Andric if (I == E) {
56*0b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1);
57*0b57cec5SDimitry Andric return true;
58*0b57cec5SDimitry Andric }
59*0b57cec5SDimitry Andric }
60*0b57cec5SDimitry Andric
61*0b57cec5SDimitry Andric // Look for a ']' character which denotes the end of the scan list.
62*0b57cec5SDimitry Andric while (*I != ']') {
63*0b57cec5SDimitry Andric if (++I == E) {
64*0b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1);
65*0b57cec5SDimitry Andric return true;
66*0b57cec5SDimitry Andric }
67*0b57cec5SDimitry Andric }
68*0b57cec5SDimitry Andric
69*0b57cec5SDimitry Andric CS.setEndScanList(I);
70*0b57cec5SDimitry Andric return false;
71*0b57cec5SDimitry Andric }
72*0b57cec5SDimitry Andric
73*0b57cec5SDimitry Andric // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74*0b57cec5SDimitry Andric // We can possibly refactor.
ParseScanfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target)75*0b57cec5SDimitry Andric static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
76*0b57cec5SDimitry Andric const char *&Beg,
77*0b57cec5SDimitry Andric const char *E,
78*0b57cec5SDimitry Andric unsigned &argIndex,
79*0b57cec5SDimitry Andric const LangOptions &LO,
80*0b57cec5SDimitry Andric const TargetInfo &Target) {
81*0b57cec5SDimitry Andric using namespace clang::analyze_format_string;
82*0b57cec5SDimitry Andric using namespace clang::analyze_scanf;
83*0b57cec5SDimitry Andric const char *I = Beg;
84*0b57cec5SDimitry Andric const char *Start = nullptr;
85*0b57cec5SDimitry Andric UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86*0b57cec5SDimitry Andric
87*0b57cec5SDimitry Andric // Look for a '%' character that indicates the start of a format specifier.
88*0b57cec5SDimitry Andric for ( ; I != E ; ++I) {
89*0b57cec5SDimitry Andric char c = *I;
90*0b57cec5SDimitry Andric if (c == '\0') {
91*0b57cec5SDimitry Andric // Detect spurious null characters, which are likely errors.
92*0b57cec5SDimitry Andric H.HandleNullChar(I);
93*0b57cec5SDimitry Andric return true;
94*0b57cec5SDimitry Andric }
95*0b57cec5SDimitry Andric if (c == '%') {
96*0b57cec5SDimitry Andric Start = I++; // Record the start of the format specifier.
97*0b57cec5SDimitry Andric break;
98*0b57cec5SDimitry Andric }
99*0b57cec5SDimitry Andric }
100*0b57cec5SDimitry Andric
101*0b57cec5SDimitry Andric // No format specifier found?
102*0b57cec5SDimitry Andric if (!Start)
103*0b57cec5SDimitry Andric return false;
104*0b57cec5SDimitry Andric
105*0b57cec5SDimitry Andric if (I == E) {
106*0b57cec5SDimitry Andric // No more characters left?
107*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start);
108*0b57cec5SDimitry Andric return true;
109*0b57cec5SDimitry Andric }
110*0b57cec5SDimitry Andric
111*0b57cec5SDimitry Andric ScanfSpecifier FS;
112*0b57cec5SDimitry Andric if (ParseArgPosition(H, FS, Start, I, E))
113*0b57cec5SDimitry Andric return true;
114*0b57cec5SDimitry Andric
115*0b57cec5SDimitry Andric if (I == E) {
116*0b57cec5SDimitry Andric // No more characters left?
117*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start);
118*0b57cec5SDimitry Andric return true;
119*0b57cec5SDimitry Andric }
120*0b57cec5SDimitry Andric
121*0b57cec5SDimitry Andric // Look for '*' flag if it is present.
122*0b57cec5SDimitry Andric if (*I == '*') {
123*0b57cec5SDimitry Andric FS.setSuppressAssignment(I);
124*0b57cec5SDimitry Andric if (++I == E) {
125*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start);
126*0b57cec5SDimitry Andric return true;
127*0b57cec5SDimitry Andric }
128*0b57cec5SDimitry Andric }
129*0b57cec5SDimitry Andric
130*0b57cec5SDimitry Andric // Look for the field width (if any). Unlike printf, this is either
131*0b57cec5SDimitry Andric // a fixed integer or isn't present.
132*0b57cec5SDimitry Andric const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
133*0b57cec5SDimitry Andric if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
134*0b57cec5SDimitry Andric assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135*0b57cec5SDimitry Andric FS.setFieldWidth(Amt);
136*0b57cec5SDimitry Andric
137*0b57cec5SDimitry Andric if (I == E) {
138*0b57cec5SDimitry Andric // No more characters left?
139*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start);
140*0b57cec5SDimitry Andric return true;
141*0b57cec5SDimitry Andric }
142*0b57cec5SDimitry Andric }
143*0b57cec5SDimitry Andric
144*0b57cec5SDimitry Andric // Look for the length modifier.
145*0b57cec5SDimitry Andric if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
146*0b57cec5SDimitry Andric // No more characters left?
147*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start);
148*0b57cec5SDimitry Andric return true;
149*0b57cec5SDimitry Andric }
150*0b57cec5SDimitry Andric
151*0b57cec5SDimitry Andric // Detect spurious null characters, which are likely errors.
152*0b57cec5SDimitry Andric if (*I == '\0') {
153*0b57cec5SDimitry Andric H.HandleNullChar(I);
154*0b57cec5SDimitry Andric return true;
155*0b57cec5SDimitry Andric }
156*0b57cec5SDimitry Andric
157*0b57cec5SDimitry Andric // Finally, look for the conversion specifier.
158*0b57cec5SDimitry Andric const char *conversionPosition = I++;
159*0b57cec5SDimitry Andric ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160*0b57cec5SDimitry Andric switch (*conversionPosition) {
161*0b57cec5SDimitry Andric default:
162*0b57cec5SDimitry Andric break;
163*0b57cec5SDimitry Andric case '%': k = ConversionSpecifier::PercentArg; break;
164*0b57cec5SDimitry Andric case 'A': k = ConversionSpecifier::AArg; break;
165*0b57cec5SDimitry Andric case 'E': k = ConversionSpecifier::EArg; break;
166*0b57cec5SDimitry Andric case 'F': k = ConversionSpecifier::FArg; break;
167*0b57cec5SDimitry Andric case 'G': k = ConversionSpecifier::GArg; break;
168*0b57cec5SDimitry Andric case 'X': k = ConversionSpecifier::XArg; break;
169*0b57cec5SDimitry Andric case 'a': k = ConversionSpecifier::aArg; break;
170*0b57cec5SDimitry Andric case 'd': k = ConversionSpecifier::dArg; break;
171*0b57cec5SDimitry Andric case 'e': k = ConversionSpecifier::eArg; break;
172*0b57cec5SDimitry Andric case 'f': k = ConversionSpecifier::fArg; break;
173*0b57cec5SDimitry Andric case 'g': k = ConversionSpecifier::gArg; break;
174*0b57cec5SDimitry Andric case 'i': k = ConversionSpecifier::iArg; break;
175*0b57cec5SDimitry Andric case 'n': k = ConversionSpecifier::nArg; break;
176*0b57cec5SDimitry Andric case 'c': k = ConversionSpecifier::cArg; break;
177*0b57cec5SDimitry Andric case 'C': k = ConversionSpecifier::CArg; break;
178*0b57cec5SDimitry Andric case 'S': k = ConversionSpecifier::SArg; break;
179*0b57cec5SDimitry Andric case '[': k = ConversionSpecifier::ScanListArg; break;
180*0b57cec5SDimitry Andric case 'u': k = ConversionSpecifier::uArg; break;
181*0b57cec5SDimitry Andric case 'x': k = ConversionSpecifier::xArg; break;
182*0b57cec5SDimitry Andric case 'o': k = ConversionSpecifier::oArg; break;
183*0b57cec5SDimitry Andric case 's': k = ConversionSpecifier::sArg; break;
184*0b57cec5SDimitry Andric case 'p': k = ConversionSpecifier::pArg; break;
185*0b57cec5SDimitry Andric // Apple extensions
186*0b57cec5SDimitry Andric // Apple-specific
187*0b57cec5SDimitry Andric case 'D':
188*0b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin())
189*0b57cec5SDimitry Andric k = ConversionSpecifier::DArg;
190*0b57cec5SDimitry Andric break;
191*0b57cec5SDimitry Andric case 'O':
192*0b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin())
193*0b57cec5SDimitry Andric k = ConversionSpecifier::OArg;
194*0b57cec5SDimitry Andric break;
195*0b57cec5SDimitry Andric case 'U':
196*0b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin())
197*0b57cec5SDimitry Andric k = ConversionSpecifier::UArg;
198*0b57cec5SDimitry Andric break;
199*0b57cec5SDimitry Andric }
200*0b57cec5SDimitry Andric ScanfConversionSpecifier CS(conversionPosition, k);
201*0b57cec5SDimitry Andric if (k == ScanfConversionSpecifier::ScanListArg) {
202*0b57cec5SDimitry Andric if (ParseScanList(H, CS, I, E))
203*0b57cec5SDimitry Andric return true;
204*0b57cec5SDimitry Andric }
205*0b57cec5SDimitry Andric FS.setConversionSpecifier(CS);
206*0b57cec5SDimitry Andric if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
207*0b57cec5SDimitry Andric && !FS.usesPositionalArg())
208*0b57cec5SDimitry Andric FS.setArgIndex(argIndex++);
209*0b57cec5SDimitry Andric
210*0b57cec5SDimitry Andric // FIXME: '%' and '*' doesn't make sense. Issue a warning.
211*0b57cec5SDimitry Andric // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
212*0b57cec5SDimitry Andric
213*0b57cec5SDimitry Andric if (k == ScanfConversionSpecifier::InvalidSpecifier) {
214*0b57cec5SDimitry Andric unsigned Len = I - Beg;
215*0b57cec5SDimitry Andric if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
216*0b57cec5SDimitry Andric CS.setEndScanList(Beg + Len);
217*0b57cec5SDimitry Andric FS.setConversionSpecifier(CS);
218*0b57cec5SDimitry Andric }
219*0b57cec5SDimitry Andric // Assume the conversion takes one argument.
220*0b57cec5SDimitry Andric return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
221*0b57cec5SDimitry Andric }
222*0b57cec5SDimitry Andric return ScanfSpecifierResult(Start, FS);
223*0b57cec5SDimitry Andric }
224*0b57cec5SDimitry Andric
getArgType(ASTContext & Ctx) const225*0b57cec5SDimitry Andric ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
226*0b57cec5SDimitry Andric const ScanfConversionSpecifier &CS = getConversionSpecifier();
227*0b57cec5SDimitry Andric
228*0b57cec5SDimitry Andric if (!CS.consumesDataArgument())
229*0b57cec5SDimitry Andric return ArgType::Invalid();
230*0b57cec5SDimitry Andric
231*0b57cec5SDimitry Andric switch(CS.getKind()) {
232*0b57cec5SDimitry Andric // Signed int.
233*0b57cec5SDimitry Andric case ConversionSpecifier::dArg:
234*0b57cec5SDimitry Andric case ConversionSpecifier::DArg:
235*0b57cec5SDimitry Andric case ConversionSpecifier::iArg:
236*0b57cec5SDimitry Andric switch (LM.getKind()) {
237*0b57cec5SDimitry Andric case LengthModifier::None:
238*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.IntTy);
239*0b57cec5SDimitry Andric case LengthModifier::AsChar:
240*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy);
241*0b57cec5SDimitry Andric case LengthModifier::AsShort:
242*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.ShortTy);
243*0b57cec5SDimitry Andric case LengthModifier::AsLong:
244*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongTy);
245*0b57cec5SDimitry Andric case LengthModifier::AsLongLong:
246*0b57cec5SDimitry Andric case LengthModifier::AsQuad:
247*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy);
248*0b57cec5SDimitry Andric case LengthModifier::AsInt64:
249*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
250*0b57cec5SDimitry Andric case LengthModifier::AsIntMax:
251*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
252*0b57cec5SDimitry Andric case LengthModifier::AsSizeT:
253*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
254*0b57cec5SDimitry Andric case LengthModifier::AsPtrDiff:
255*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
256*0b57cec5SDimitry Andric case LengthModifier::AsLongDouble:
257*0b57cec5SDimitry Andric // GNU extension.
258*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy);
259*0b57cec5SDimitry Andric case LengthModifier::AsAllocate:
260*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate:
261*0b57cec5SDimitry Andric case LengthModifier::AsInt32:
262*0b57cec5SDimitry Andric case LengthModifier::AsInt3264:
263*0b57cec5SDimitry Andric case LengthModifier::AsWide:
264*0b57cec5SDimitry Andric case LengthModifier::AsShortLong:
265*0b57cec5SDimitry Andric return ArgType::Invalid();
266*0b57cec5SDimitry Andric }
267*0b57cec5SDimitry Andric llvm_unreachable("Unsupported LengthModifier Type");
268*0b57cec5SDimitry Andric
269*0b57cec5SDimitry Andric // Unsigned int.
270*0b57cec5SDimitry Andric case ConversionSpecifier::oArg:
271*0b57cec5SDimitry Andric case ConversionSpecifier::OArg:
272*0b57cec5SDimitry Andric case ConversionSpecifier::uArg:
273*0b57cec5SDimitry Andric case ConversionSpecifier::UArg:
274*0b57cec5SDimitry Andric case ConversionSpecifier::xArg:
275*0b57cec5SDimitry Andric case ConversionSpecifier::XArg:
276*0b57cec5SDimitry Andric switch (LM.getKind()) {
277*0b57cec5SDimitry Andric case LengthModifier::None:
278*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedIntTy);
279*0b57cec5SDimitry Andric case LengthModifier::AsChar:
280*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedCharTy);
281*0b57cec5SDimitry Andric case LengthModifier::AsShort:
282*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedShortTy);
283*0b57cec5SDimitry Andric case LengthModifier::AsLong:
284*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongTy);
285*0b57cec5SDimitry Andric case LengthModifier::AsLongLong:
286*0b57cec5SDimitry Andric case LengthModifier::AsQuad:
287*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
288*0b57cec5SDimitry Andric case LengthModifier::AsInt64:
289*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290*0b57cec5SDimitry Andric case LengthModifier::AsIntMax:
291*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292*0b57cec5SDimitry Andric case LengthModifier::AsSizeT:
293*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294*0b57cec5SDimitry Andric case LengthModifier::AsPtrDiff:
295*0b57cec5SDimitry Andric return ArgType::PtrTo(
296*0b57cec5SDimitry Andric ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
297*0b57cec5SDimitry Andric case LengthModifier::AsLongDouble:
298*0b57cec5SDimitry Andric // GNU extension.
299*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
300*0b57cec5SDimitry Andric case LengthModifier::AsAllocate:
301*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate:
302*0b57cec5SDimitry Andric case LengthModifier::AsInt32:
303*0b57cec5SDimitry Andric case LengthModifier::AsInt3264:
304*0b57cec5SDimitry Andric case LengthModifier::AsWide:
305*0b57cec5SDimitry Andric case LengthModifier::AsShortLong:
306*0b57cec5SDimitry Andric return ArgType::Invalid();
307*0b57cec5SDimitry Andric }
308*0b57cec5SDimitry Andric llvm_unreachable("Unsupported LengthModifier Type");
309*0b57cec5SDimitry Andric
310*0b57cec5SDimitry Andric // Float.
311*0b57cec5SDimitry Andric case ConversionSpecifier::aArg:
312*0b57cec5SDimitry Andric case ConversionSpecifier::AArg:
313*0b57cec5SDimitry Andric case ConversionSpecifier::eArg:
314*0b57cec5SDimitry Andric case ConversionSpecifier::EArg:
315*0b57cec5SDimitry Andric case ConversionSpecifier::fArg:
316*0b57cec5SDimitry Andric case ConversionSpecifier::FArg:
317*0b57cec5SDimitry Andric case ConversionSpecifier::gArg:
318*0b57cec5SDimitry Andric case ConversionSpecifier::GArg:
319*0b57cec5SDimitry Andric switch (LM.getKind()) {
320*0b57cec5SDimitry Andric case LengthModifier::None:
321*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.FloatTy);
322*0b57cec5SDimitry Andric case LengthModifier::AsLong:
323*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.DoubleTy);
324*0b57cec5SDimitry Andric case LengthModifier::AsLongDouble:
325*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongDoubleTy);
326*0b57cec5SDimitry Andric default:
327*0b57cec5SDimitry Andric return ArgType::Invalid();
328*0b57cec5SDimitry Andric }
329*0b57cec5SDimitry Andric
330*0b57cec5SDimitry Andric // Char, string and scanlist.
331*0b57cec5SDimitry Andric case ConversionSpecifier::cArg:
332*0b57cec5SDimitry Andric case ConversionSpecifier::sArg:
333*0b57cec5SDimitry Andric case ConversionSpecifier::ScanListArg:
334*0b57cec5SDimitry Andric switch (LM.getKind()) {
335*0b57cec5SDimitry Andric case LengthModifier::None:
336*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy);
337*0b57cec5SDimitry Andric case LengthModifier::AsLong:
338*0b57cec5SDimitry Andric case LengthModifier::AsWide:
339*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
340*0b57cec5SDimitry Andric case LengthModifier::AsAllocate:
341*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate:
342*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::CStrTy);
343*0b57cec5SDimitry Andric case LengthModifier::AsShort:
344*0b57cec5SDimitry Andric if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
345*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy);
346*0b57cec5SDimitry Andric LLVM_FALLTHROUGH;
347*0b57cec5SDimitry Andric default:
348*0b57cec5SDimitry Andric return ArgType::Invalid();
349*0b57cec5SDimitry Andric }
350*0b57cec5SDimitry Andric case ConversionSpecifier::CArg:
351*0b57cec5SDimitry Andric case ConversionSpecifier::SArg:
352*0b57cec5SDimitry Andric // FIXME: Mac OS X specific?
353*0b57cec5SDimitry Andric switch (LM.getKind()) {
354*0b57cec5SDimitry Andric case LengthModifier::None:
355*0b57cec5SDimitry Andric case LengthModifier::AsWide:
356*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
357*0b57cec5SDimitry Andric case LengthModifier::AsAllocate:
358*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate:
359*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
360*0b57cec5SDimitry Andric case LengthModifier::AsShort:
361*0b57cec5SDimitry Andric if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
362*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy);
363*0b57cec5SDimitry Andric LLVM_FALLTHROUGH;
364*0b57cec5SDimitry Andric default:
365*0b57cec5SDimitry Andric return ArgType::Invalid();
366*0b57cec5SDimitry Andric }
367*0b57cec5SDimitry Andric
368*0b57cec5SDimitry Andric // Pointer.
369*0b57cec5SDimitry Andric case ConversionSpecifier::pArg:
370*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::CPointerTy);
371*0b57cec5SDimitry Andric
372*0b57cec5SDimitry Andric // Write-back.
373*0b57cec5SDimitry Andric case ConversionSpecifier::nArg:
374*0b57cec5SDimitry Andric switch (LM.getKind()) {
375*0b57cec5SDimitry Andric case LengthModifier::None:
376*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.IntTy);
377*0b57cec5SDimitry Andric case LengthModifier::AsChar:
378*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.SignedCharTy);
379*0b57cec5SDimitry Andric case LengthModifier::AsShort:
380*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.ShortTy);
381*0b57cec5SDimitry Andric case LengthModifier::AsLong:
382*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongTy);
383*0b57cec5SDimitry Andric case LengthModifier::AsLongLong:
384*0b57cec5SDimitry Andric case LengthModifier::AsQuad:
385*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy);
386*0b57cec5SDimitry Andric case LengthModifier::AsInt64:
387*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
388*0b57cec5SDimitry Andric case LengthModifier::AsIntMax:
389*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
390*0b57cec5SDimitry Andric case LengthModifier::AsSizeT:
391*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
392*0b57cec5SDimitry Andric case LengthModifier::AsPtrDiff:
393*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
394*0b57cec5SDimitry Andric case LengthModifier::AsLongDouble:
395*0b57cec5SDimitry Andric return ArgType(); // FIXME: Is this a known extension?
396*0b57cec5SDimitry Andric case LengthModifier::AsAllocate:
397*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate:
398*0b57cec5SDimitry Andric case LengthModifier::AsInt32:
399*0b57cec5SDimitry Andric case LengthModifier::AsInt3264:
400*0b57cec5SDimitry Andric case LengthModifier::AsWide:
401*0b57cec5SDimitry Andric case LengthModifier::AsShortLong:
402*0b57cec5SDimitry Andric return ArgType::Invalid();
403*0b57cec5SDimitry Andric }
404*0b57cec5SDimitry Andric
405*0b57cec5SDimitry Andric default:
406*0b57cec5SDimitry Andric break;
407*0b57cec5SDimitry Andric }
408*0b57cec5SDimitry Andric
409*0b57cec5SDimitry Andric return ArgType();
410*0b57cec5SDimitry Andric }
411*0b57cec5SDimitry Andric
fixType(QualType QT,QualType RawQT,const LangOptions & LangOpt,ASTContext & Ctx)412*0b57cec5SDimitry Andric bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
413*0b57cec5SDimitry Andric const LangOptions &LangOpt,
414*0b57cec5SDimitry Andric ASTContext &Ctx) {
415*0b57cec5SDimitry Andric
416*0b57cec5SDimitry Andric // %n is different from other conversion specifiers; don't try to fix it.
417*0b57cec5SDimitry Andric if (CS.getKind() == ConversionSpecifier::nArg)
418*0b57cec5SDimitry Andric return false;
419*0b57cec5SDimitry Andric
420*0b57cec5SDimitry Andric if (!QT->isPointerType())
421*0b57cec5SDimitry Andric return false;
422*0b57cec5SDimitry Andric
423*0b57cec5SDimitry Andric QualType PT = QT->getPointeeType();
424*0b57cec5SDimitry Andric
425*0b57cec5SDimitry Andric // If it's an enum, get its underlying type.
426*0b57cec5SDimitry Andric if (const EnumType *ETy = PT->getAs<EnumType>()) {
427*0b57cec5SDimitry Andric // Don't try to fix incomplete enums.
428*0b57cec5SDimitry Andric if (!ETy->getDecl()->isComplete())
429*0b57cec5SDimitry Andric return false;
430*0b57cec5SDimitry Andric PT = ETy->getDecl()->getIntegerType();
431*0b57cec5SDimitry Andric }
432*0b57cec5SDimitry Andric
433*0b57cec5SDimitry Andric const BuiltinType *BT = PT->getAs<BuiltinType>();
434*0b57cec5SDimitry Andric if (!BT)
435*0b57cec5SDimitry Andric return false;
436*0b57cec5SDimitry Andric
437*0b57cec5SDimitry Andric // Pointer to a character.
438*0b57cec5SDimitry Andric if (PT->isAnyCharacterType()) {
439*0b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::sArg);
440*0b57cec5SDimitry Andric if (PT->isWideCharType())
441*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsWideChar);
442*0b57cec5SDimitry Andric else
443*0b57cec5SDimitry Andric LM.setKind(LengthModifier::None);
444*0b57cec5SDimitry Andric
445*0b57cec5SDimitry Andric // If we know the target array length, we can use it as a field width.
446*0b57cec5SDimitry Andric if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
447*0b57cec5SDimitry Andric if (CAT->getSizeModifier() == ArrayType::Normal)
448*0b57cec5SDimitry Andric FieldWidth = OptionalAmount(OptionalAmount::Constant,
449*0b57cec5SDimitry Andric CAT->getSize().getZExtValue() - 1,
450*0b57cec5SDimitry Andric "", 0, false);
451*0b57cec5SDimitry Andric
452*0b57cec5SDimitry Andric }
453*0b57cec5SDimitry Andric return true;
454*0b57cec5SDimitry Andric }
455*0b57cec5SDimitry Andric
456*0b57cec5SDimitry Andric // Figure out the length modifier.
457*0b57cec5SDimitry Andric switch (BT->getKind()) {
458*0b57cec5SDimitry Andric // no modifier
459*0b57cec5SDimitry Andric case BuiltinType::UInt:
460*0b57cec5SDimitry Andric case BuiltinType::Int:
461*0b57cec5SDimitry Andric case BuiltinType::Float:
462*0b57cec5SDimitry Andric LM.setKind(LengthModifier::None);
463*0b57cec5SDimitry Andric break;
464*0b57cec5SDimitry Andric
465*0b57cec5SDimitry Andric // hh
466*0b57cec5SDimitry Andric case BuiltinType::Char_U:
467*0b57cec5SDimitry Andric case BuiltinType::UChar:
468*0b57cec5SDimitry Andric case BuiltinType::Char_S:
469*0b57cec5SDimitry Andric case BuiltinType::SChar:
470*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsChar);
471*0b57cec5SDimitry Andric break;
472*0b57cec5SDimitry Andric
473*0b57cec5SDimitry Andric // h
474*0b57cec5SDimitry Andric case BuiltinType::Short:
475*0b57cec5SDimitry Andric case BuiltinType::UShort:
476*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsShort);
477*0b57cec5SDimitry Andric break;
478*0b57cec5SDimitry Andric
479*0b57cec5SDimitry Andric // l
480*0b57cec5SDimitry Andric case BuiltinType::Long:
481*0b57cec5SDimitry Andric case BuiltinType::ULong:
482*0b57cec5SDimitry Andric case BuiltinType::Double:
483*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLong);
484*0b57cec5SDimitry Andric break;
485*0b57cec5SDimitry Andric
486*0b57cec5SDimitry Andric // ll
487*0b57cec5SDimitry Andric case BuiltinType::LongLong:
488*0b57cec5SDimitry Andric case BuiltinType::ULongLong:
489*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLongLong);
490*0b57cec5SDimitry Andric break;
491*0b57cec5SDimitry Andric
492*0b57cec5SDimitry Andric // L
493*0b57cec5SDimitry Andric case BuiltinType::LongDouble:
494*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLongDouble);
495*0b57cec5SDimitry Andric break;
496*0b57cec5SDimitry Andric
497*0b57cec5SDimitry Andric // Don't know.
498*0b57cec5SDimitry Andric default:
499*0b57cec5SDimitry Andric return false;
500*0b57cec5SDimitry Andric }
501*0b57cec5SDimitry Andric
502*0b57cec5SDimitry Andric // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
503*0b57cec5SDimitry Andric if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
504*0b57cec5SDimitry Andric namedTypeToLengthModifier(PT, LM);
505*0b57cec5SDimitry Andric
506*0b57cec5SDimitry Andric // If fixing the length modifier was enough, we are done.
507*0b57cec5SDimitry Andric if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
508*0b57cec5SDimitry Andric const analyze_scanf::ArgType &AT = getArgType(Ctx);
509*0b57cec5SDimitry Andric if (AT.isValid() && AT.matchesType(Ctx, QT))
510*0b57cec5SDimitry Andric return true;
511*0b57cec5SDimitry Andric }
512*0b57cec5SDimitry Andric
513*0b57cec5SDimitry Andric // Figure out the conversion specifier.
514*0b57cec5SDimitry Andric if (PT->isRealFloatingType())
515*0b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::fArg);
516*0b57cec5SDimitry Andric else if (PT->isSignedIntegerType())
517*0b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::dArg);
518*0b57cec5SDimitry Andric else if (PT->isUnsignedIntegerType())
519*0b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::uArg);
520*0b57cec5SDimitry Andric else
521*0b57cec5SDimitry Andric llvm_unreachable("Unexpected type");
522*0b57cec5SDimitry Andric
523*0b57cec5SDimitry Andric return true;
524*0b57cec5SDimitry Andric }
525*0b57cec5SDimitry Andric
toString(raw_ostream & os) const526*0b57cec5SDimitry Andric void ScanfSpecifier::toString(raw_ostream &os) const {
527*0b57cec5SDimitry Andric os << "%";
528*0b57cec5SDimitry Andric
529*0b57cec5SDimitry Andric if (usesPositionalArg())
530*0b57cec5SDimitry Andric os << getPositionalArgIndex() << "$";
531*0b57cec5SDimitry Andric if (SuppressAssignment)
532*0b57cec5SDimitry Andric os << "*";
533*0b57cec5SDimitry Andric
534*0b57cec5SDimitry Andric FieldWidth.toString(os);
535*0b57cec5SDimitry Andric os << LM.toString();
536*0b57cec5SDimitry Andric os << CS.toString();
537*0b57cec5SDimitry Andric }
538*0b57cec5SDimitry Andric
ParseScanfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)539*0b57cec5SDimitry Andric bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
540*0b57cec5SDimitry Andric const char *I,
541*0b57cec5SDimitry Andric const char *E,
542*0b57cec5SDimitry Andric const LangOptions &LO,
543*0b57cec5SDimitry Andric const TargetInfo &Target) {
544*0b57cec5SDimitry Andric
545*0b57cec5SDimitry Andric unsigned argIndex = 0;
546*0b57cec5SDimitry Andric
547*0b57cec5SDimitry Andric // Keep looking for a format specifier until we have exhausted the string.
548*0b57cec5SDimitry Andric while (I != E) {
549*0b57cec5SDimitry Andric const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
550*0b57cec5SDimitry Andric LO, Target);
551*0b57cec5SDimitry Andric // Did a fail-stop error of any kind occur when parsing the specifier?
552*0b57cec5SDimitry Andric // If so, don't do any more processing.
553*0b57cec5SDimitry Andric if (FSR.shouldStop())
554*0b57cec5SDimitry Andric return true;
555*0b57cec5SDimitry Andric // Did we exhaust the string or encounter an error that
556*0b57cec5SDimitry Andric // we can recover from?
557*0b57cec5SDimitry Andric if (!FSR.hasValue())
558*0b57cec5SDimitry Andric continue;
559*0b57cec5SDimitry Andric // We have a format specifier. Pass it to the callback.
560*0b57cec5SDimitry Andric if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
561*0b57cec5SDimitry Andric I - FSR.getStart())) {
562*0b57cec5SDimitry Andric return true;
563*0b57cec5SDimitry Andric }
564*0b57cec5SDimitry Andric }
565*0b57cec5SDimitry Andric assert(I == E && "Format string not exhausted");
566*0b57cec5SDimitry Andric return false;
567*0b57cec5SDimitry Andric }
568