aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVedant Kumar <vsk@apple.com>2019-10-21 11:48:38 -0700
committerVedant Kumar <vsk@apple.com>2019-12-04 10:10:55 -0800
commite18531595bba495946aa52c0a16b9f9238cff8bc (patch)
tree489f93c6cda69f1e7255d0aa0fc596e466f30628 /clang/lib/CodeGen
parent[Gold Tests] Add missing target flag to X86 test (diff)
downloadllvm-project-e18531595bba495946aa52c0a16b9f9238cff8bc.tar.gz
llvm-project-e18531595bba495946aa52c0a16b9f9238cff8bc.tar.bz2
llvm-project-e18531595bba495946aa52c0a16b9f9238cff8bc.zip
[Coverage] Revise format to reduce binary size
Revise the coverage mapping format to reduce binary size by: 1. Naming function records and marking them `linkonce_odr`, and 2. Compressing filenames. This shrinks the size of llc's coverage segment by 82% (334MB -> 62MB) and speeds up end-to-end single-threaded report generation by 10%. For reference the compressed name data in llc is 81MB (__llvm_prf_names). Rationale for changes to the format: - With the current format, most coverage function records are discarded. E.g., more than 97% of the records in llc are *duplicate* placeholders for functions visible-but-not-used in TUs. Placeholders *are* used to show under-covered functions, but duplicate placeholders waste space. - We reached general consensus about giving (1) a try at the 2017 code coverage BoF [1]. The thinking was that using `linkonce_odr` to merge duplicates is simpler than alternatives like teaching build systems about a coverage-aware database/module/etc on the side. - Revising the format is expensive due to the backwards compatibility requirement, so we might as well compress filenames while we're at it. This shrinks the encoded filenames in llc by 86% (12MB -> 1.6MB). See CoverageMappingFormat.rst for the details on what exactly has changed. Fixes PR34533 [2], hopefully. [1] http://lists.llvm.org/pipermail/llvm-dev/2017-October/118428.html [2] https://bugs.llvm.org/show_bug.cgi?id=34533 Differential Revision: https://reviews.llvm.org/D69471
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.cpp134
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.h18
2 files changed, 93 insertions, 59 deletions
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index bdecff39c88f..02840bf771cc 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -13,6 +13,8 @@
#include "CoverageMappingGen.h"
#include "CodeGenFunction.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
@@ -24,6 +26,10 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
+// This selects the coverage mapping format defined when `InstrProfData.inc`
+// is textually included.
+#define COVMAP_V3
+
using namespace clang;
using namespace CodeGen;
using namespace llvm::coverage;
@@ -1272,12 +1278,6 @@ struct CounterCoverageMappingBuilder
}
};
-std::string getCoverageSection(const CodeGenModule &CGM) {
- return llvm::getInstrProfSectionName(
- llvm::IPSK_covmap,
- CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
-}
-
std::string normalizeFilename(StringRef Filename) {
llvm::SmallString<256> Path(Filename);
llvm::sys::fs::make_absolute(Path);
@@ -1317,30 +1317,71 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
}
}
-void CoverageMappingModuleGen::addFunctionMappingRecord(
- llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
- const std::string &CoverageMapping, bool IsUsed) {
+static std::string getInstrProfSection(const CodeGenModule &CGM,
+ llvm::InstrProfSectKind SK) {
+ return llvm::getInstrProfSectionName(
+ SK, CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
+}
+
+void CoverageMappingModuleGen::emitFunctionMappingRecord(
+ const FunctionInfo &Info, uint64_t FilenamesRef) {
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
- if (!FunctionRecordTy) {
+
+ // Assign a name to the function record. This is used to merge duplicates.
+ std::string FuncRecordName = "__covrec_" + llvm::utohexstr(Info.NameHash);
+
+ // A dummy description for a function included-but-not-used in a TU can be
+ // replaced by full description provided by a different TU. The two kinds of
+ // descriptions play distinct roles: therefore, assign them different names
+ // to prevent `linkonce_odr` merging.
+ if (Info.IsUsed)
+ FuncRecordName += "u";
+
+ // Create the function record type.
+ const uint64_t NameHash = Info.NameHash;
+ const uint64_t FuncHash = Info.FuncHash;
+ const std::string &CoverageMapping = Info.CoverageMapping;
#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
- llvm::Type *FunctionRecordTypes[] = {
- #include "llvm/ProfileData/InstrProfData.inc"
- };
- FunctionRecordTy =
- llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
- /*isPacked=*/true);
- }
+ llvm::Type *FunctionRecordTypes[] = {
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *FunctionRecordTy =
+ llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
+ /*isPacked=*/true);
- #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init,
+ // Create the function record constant.
+#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init,
llvm::Constant *FunctionRecordVals[] = {
#include "llvm/ProfileData/InstrProfData.inc"
};
- FunctionRecords.push_back(llvm::ConstantStruct::get(
- FunctionRecordTy, makeArrayRef(FunctionRecordVals)));
+ auto *FuncRecordConstant = llvm::ConstantStruct::get(
+ FunctionRecordTy, makeArrayRef(FunctionRecordVals));
+
+ // Create the function record global.
+ auto *FuncRecord = new llvm::GlobalVariable(
+ CGM.getModule(), FunctionRecordTy, /*isConstant=*/true,
+ llvm::GlobalValue::LinkOnceODRLinkage, FuncRecordConstant,
+ FuncRecordName);
+ FuncRecord->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ FuncRecord->setSection(getInstrProfSection(CGM, llvm::IPSK_covfun));
+ FuncRecord->setAlignment(llvm::Align(8));
+ if (CGM.supportsCOMDAT())
+ FuncRecord->setComdat(CGM.getModule().getOrInsertComdat(FuncRecordName));
+
+ // Make sure the data doesn't get deleted.
+ CGM.addUsedGlobal(FuncRecord);
+}
+
+void CoverageMappingModuleGen::addFunctionMappingRecord(
+ llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
+ const std::string &CoverageMapping, bool IsUsed) {
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+ const uint64_t NameHash = llvm::IndexedInstrProf::ComputeHash(NameValue);
+ FunctionRecords.push_back({NameHash, FuncHash, CoverageMapping, IsUsed});
+
if (!IsUsed)
FunctionNames.push_back(
llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)));
- CoverageMappings.push_back(CoverageMapping);
if (CGM.getCodeGenOpts().DumpCoverageMapping) {
// Dump the coverage mapping data for this function by decoding the
@@ -1385,37 +1426,22 @@ void CoverageMappingModuleGen::emit() {
FilenameRefs[I] = FilenameStrs[I];
}
- std::string FilenamesAndCoverageMappings;
- llvm::raw_string_ostream OS(FilenamesAndCoverageMappings);
- CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
-
- // Stream the content of CoverageMappings to OS while keeping
- // memory consumption under control.
- size_t CoverageMappingSize = 0;
- for (auto &S : CoverageMappings) {
- CoverageMappingSize += S.size();
- OS << S;
- S.clear();
- S.shrink_to_fit();
+ std::string Filenames;
+ {
+ llvm::raw_string_ostream OS(Filenames);
+ CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
}
- CoverageMappings.clear();
- CoverageMappings.shrink_to_fit();
+ auto *FilenamesVal =
+ llvm::ConstantDataArray::getString(Ctx, Filenames, false);
+ const int64_t FilenamesRef = llvm::IndexedInstrProf::ComputeHash(Filenames);
- size_t FilenamesSize = OS.str().size() - CoverageMappingSize;
- // Append extra zeroes if necessary to ensure that the size of the filenames
- // and coverage mappings is a multiple of 8.
- if (size_t Rem = OS.str().size() % 8) {
- CoverageMappingSize += 8 - Rem;
- OS.write_zeros(8 - Rem);
- }
- auto *FilenamesAndMappingsVal =
- llvm::ConstantDataArray::getString(Ctx, OS.str(), false);
-
- // Create the deferred function records array
- auto RecordsTy =
- llvm::ArrayType::get(FunctionRecordTy, FunctionRecords.size());
- auto RecordsVal = llvm::ConstantArray::get(RecordsTy, FunctionRecords);
+ // Emit the function records.
+ for (const FunctionInfo &Info : FunctionRecords)
+ emitFunctionMappingRecord(Info, FilenamesRef);
+ const unsigned NRecords = 0;
+ const size_t FilenamesSize = Filenames.size();
+ const unsigned CoverageMappingSize = 0;
llvm::Type *CovDataHeaderTypes[] = {
#define COVMAP_HEADER(Type, LLVMType, Name, Init) LLVMType,
#include "llvm/ProfileData/InstrProfData.inc"
@@ -1430,18 +1456,16 @@ void CoverageMappingModuleGen::emit() {
CovDataHeaderTy, makeArrayRef(CovDataHeaderVals));
// Create the coverage data record
- llvm::Type *CovDataTypes[] = {CovDataHeaderTy, RecordsTy,
- FilenamesAndMappingsVal->getType()};
+ llvm::Type *CovDataTypes[] = {CovDataHeaderTy, FilenamesVal->getType()};
auto CovDataTy = llvm::StructType::get(Ctx, makeArrayRef(CovDataTypes));
- llvm::Constant *TUDataVals[] = {CovDataHeaderVal, RecordsVal,
- FilenamesAndMappingsVal};
+ llvm::Constant *TUDataVals[] = {CovDataHeaderVal, FilenamesVal};
auto CovDataVal =
llvm::ConstantStruct::get(CovDataTy, makeArrayRef(TUDataVals));
auto CovData = new llvm::GlobalVariable(
- CGM.getModule(), CovDataTy, true, llvm::GlobalValue::InternalLinkage,
+ CGM.getModule(), CovDataTy, true, llvm::GlobalValue::PrivateLinkage,
CovDataVal, llvm::getCoverageMappingVarName());
- CovData->setSection(getCoverageSection(CGM));
+ CovData->setSection(getInstrProfSection(CGM, llvm::IPSK_covmap));
CovData->setAlignment(llvm::Align(8));
// Make sure the data doesn't get deleted.
diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h
index 3bf51f590479..5d79d1e65670 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.h
+++ b/clang/lib/CodeGen/CoverageMappingGen.h
@@ -47,17 +47,27 @@ class CodeGenModule;
/// Organizes the cross-function state that is used while generating
/// code coverage mapping data.
class CoverageMappingModuleGen {
+ /// Information needed to emit a coverage record for a function.
+ struct FunctionInfo {
+ uint64_t NameHash;
+ uint64_t FuncHash;
+ std::string CoverageMapping;
+ bool IsUsed;
+ };
+
CodeGenModule &CGM;
CoverageSourceInfo &SourceInfo;
llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries;
- std::vector<llvm::Constant *> FunctionRecords;
std::vector<llvm::Constant *> FunctionNames;
- llvm::StructType *FunctionRecordTy;
- std::vector<std::string> CoverageMappings;
+ std::vector<FunctionInfo> FunctionRecords;
+
+ /// Emit a function record.
+ void emitFunctionMappingRecord(const FunctionInfo &Info,
+ uint64_t FilenamesRef);
public:
CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo)
- : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) {}
+ : CGM(CGM), SourceInfo(SourceInfo) {}
CoverageSourceInfo &getSourceInfo() const {
return SourceInfo;