128800da1SDmitri Gribenko //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
228800da1SDmitri Gribenko //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
628800da1SDmitri Gribenko //
728800da1SDmitri Gribenko //===----------------------------------------------------------------------===//
828800da1SDmitri Gribenko //
9e63344e8SStephen Kelly // This tablegen backend emits an efficient function to translate HTML named
1028800da1SDmitri Gribenko // character references to UTF-8 sequences.
1128800da1SDmitri Gribenko //
1228800da1SDmitri Gribenko //===----------------------------------------------------------------------===//
1328800da1SDmitri Gribenko 
14c45f8d49SJohn McCall #include "TableGenBackends.h"
1528800da1SDmitri Gribenko #include "llvm/ADT/SmallString.h"
1628800da1SDmitri Gribenko #include "llvm/Support/ConvertUTF.h"
1728800da1SDmitri Gribenko #include "llvm/TableGen/Error.h"
1828800da1SDmitri Gribenko #include "llvm/TableGen/Record.h"
1928800da1SDmitri Gribenko #include "llvm/TableGen/StringMatcher.h"
206b11fca8SDmitri Gribenko #include "llvm/TableGen/TableGenBackend.h"
2128800da1SDmitri Gribenko #include <vector>
2228800da1SDmitri Gribenko 
2328800da1SDmitri Gribenko using namespace llvm;
2428800da1SDmitri Gribenko 
259fc8faf9SAdrian Prantl /// Convert a code point to the corresponding UTF-8 sequence represented
2628800da1SDmitri Gribenko /// as a C string literal.
2728800da1SDmitri Gribenko ///
2828800da1SDmitri Gribenko /// \returns true on success.
translateCodePointToUTF8(unsigned CodePoint,SmallVectorImpl<char> & CLiteral)2928800da1SDmitri Gribenko static bool translateCodePointToUTF8(unsigned CodePoint,
3028800da1SDmitri Gribenko                                      SmallVectorImpl<char> &CLiteral) {
3128800da1SDmitri Gribenko   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
3228800da1SDmitri Gribenko   char *TranslatedPtr = Translated;
3328800da1SDmitri Gribenko   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
3428800da1SDmitri Gribenko     return false;
3528800da1SDmitri Gribenko 
3628800da1SDmitri Gribenko   StringRef UTF8(Translated, TranslatedPtr - Translated);
3728800da1SDmitri Gribenko 
3828800da1SDmitri Gribenko   raw_svector_ostream OS(CLiteral);
3928800da1SDmitri Gribenko   OS << "\"";
4028800da1SDmitri Gribenko   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
4128800da1SDmitri Gribenko     OS << "\\x";
4228800da1SDmitri Gribenko     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
4328800da1SDmitri Gribenko   }
4428800da1SDmitri Gribenko   OS << "\"";
4528800da1SDmitri Gribenko 
4628800da1SDmitri Gribenko   return true;
4728800da1SDmitri Gribenko }
4828800da1SDmitri Gribenko 
EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper & Records,raw_ostream & OS)49c45f8d49SJohn McCall void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
5028800da1SDmitri Gribenko                                                          raw_ostream &OS) {
5128800da1SDmitri Gribenko   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
5228800da1SDmitri Gribenko   std::vector<StringMatcher::StringPair> NameToUTF8;
5328800da1SDmitri Gribenko   SmallString<32> CLiteral;
5428800da1SDmitri Gribenko   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
5528800da1SDmitri Gribenko        I != E; ++I) {
5628800da1SDmitri Gribenko     Record &Tag = **I;
57adcd0268SBenjamin Kramer     std::string Spelling = std::string(Tag.getValueAsString("Spelling"));
5828800da1SDmitri Gribenko     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
5928800da1SDmitri Gribenko     CLiteral.clear();
6028800da1SDmitri Gribenko     CLiteral.append("return ");
6128800da1SDmitri Gribenko     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
6228800da1SDmitri Gribenko       SrcMgr.PrintMessage(Tag.getLoc().front(),
6328800da1SDmitri Gribenko                           SourceMgr::DK_Error,
6428800da1SDmitri Gribenko                           Twine("invalid code point"));
6528800da1SDmitri Gribenko       continue;
6628800da1SDmitri Gribenko     }
6728800da1SDmitri Gribenko     CLiteral.append(";");
6828800da1SDmitri Gribenko 
69*735f90feSBenjamin Kramer     StringMatcher::StringPair Match(Spelling, std::string(CLiteral.str()));
7028800da1SDmitri Gribenko     NameToUTF8.push_back(Match);
7128800da1SDmitri Gribenko   }
7228800da1SDmitri Gribenko 
736b11fca8SDmitri Gribenko   emitSourceFileHeader("HTML named character reference to UTF-8 "
746b11fca8SDmitri Gribenko                        "translation", OS);
7528800da1SDmitri Gribenko 
7628800da1SDmitri Gribenko   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
7728800da1SDmitri Gribenko         "                                             StringRef Name) {\n";
7828800da1SDmitri Gribenko   StringMatcher("Name", NameToUTF8, OS).Emit();
7928800da1SDmitri Gribenko   OS << "  return StringRef();\n"
8028800da1SDmitri Gribenko      << "}\n\n";
8128800da1SDmitri Gribenko }
82