BaseTools: Improve the cache hit in the edk2 build cache

BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=1927

Current cache hash algorithm does not parse and generate
the makefile to get the accurate dependency files for a
module. It instead use the platform and package meta files
to get the module depenedency in a quick but over approximate
way. These meta files are monolithic and involve many redundant
dependency for the module, which cause the module build
cache miss easily.
This patch introduces one more cache checkpoint and a new
hash algorithm besides the current quick one. The new hash
algorithm leverages the module makefile to achieve more
accurate and precise dependency info for a module. When
the build cache miss with the first quick hash, the
Basetool will caculate new one after makefile is generated
and then check again.

Cc: Liming Gao <liming.gao@intel.com>
Cc: Bob Feng <bob.c.feng@intel.com>
Signed-off-by: Steven Shi <steven.shi@intel.com>
Reviewed-by: Bob Feng <bob.c.feng@intel.com>
This commit is contained in:
Shi, Steven
2019-08-15 22:26:17 +08:00
committed by Feng, Bob C
parent 8113281728
commit 0e7e7a264c
7 changed files with 869 additions and 200 deletions

227
BaseTools/Source/Python/AutoGen/GenMake.py Normal file → Executable file
View File

@ -906,6 +906,11 @@ cleanlib:
self._AutoGenObject.IncludePathList + self._AutoGenObject.BuildOptionIncPathList
)
self.DependencyHeaderFileSet = set()
if FileDependencyDict:
for Dependency in FileDependencyDict.values():
self.DependencyHeaderFileSet.update(set(Dependency))
# Get a set of unique package includes from MetaFile
parentMetaFileIncludes = set()
for aInclude in self._AutoGenObject.PackageIncludePathList:
@ -1115,7 +1120,7 @@ cleanlib:
## For creating makefile targets for dependent libraries
def ProcessDependentLibrary(self):
for LibraryAutoGen in self._AutoGenObject.LibraryAutoGenList:
if not LibraryAutoGen.IsBinaryModule and not LibraryAutoGen.CanSkipbyHash():
if not LibraryAutoGen.IsBinaryModule:
self.LibraryBuildDirectoryList.append(self.PlaceMacro(LibraryAutoGen.BuildDir, self.Macros))
## Return a list containing source file's dependencies
@ -1129,114 +1134,9 @@ cleanlib:
def GetFileDependency(self, FileList, ForceInculeList, SearchPathList):
Dependency = {}
for F in FileList:
Dependency[F] = self.GetDependencyList(F, ForceInculeList, SearchPathList)
Dependency[F] = GetDependencyList(self._AutoGenObject, self.FileCache, F, ForceInculeList, SearchPathList)
return Dependency
## Find dependencies for one source file
#
# By searching recursively "#include" directive in file, find out all the
# files needed by given source file. The dependencies will be only searched
# in given search path list.
#
# @param File The source file
# @param ForceInculeList The list of files which will be included forcely
# @param SearchPathList The list of search path
#
# @retval list The list of files the given source file depends on
#
def GetDependencyList(self, File, ForceList, SearchPathList):
EdkLogger.debug(EdkLogger.DEBUG_1, "Try to get dependency files for %s" % File)
FileStack = [File] + ForceList
DependencySet = set()
if self._AutoGenObject.Arch not in gDependencyDatabase:
gDependencyDatabase[self._AutoGenObject.Arch] = {}
DepDb = gDependencyDatabase[self._AutoGenObject.Arch]
while len(FileStack) > 0:
F = FileStack.pop()
FullPathDependList = []
if F in self.FileCache:
for CacheFile in self.FileCache[F]:
FullPathDependList.append(CacheFile)
if CacheFile not in DependencySet:
FileStack.append(CacheFile)
DependencySet.update(FullPathDependList)
continue
CurrentFileDependencyList = []
if F in DepDb:
CurrentFileDependencyList = DepDb[F]
else:
try:
Fd = open(F.Path, 'rb')
FileContent = Fd.read()
Fd.close()
except BaseException as X:
EdkLogger.error("build", FILE_OPEN_FAILURE, ExtraData=F.Path + "\n\t" + str(X))
if len(FileContent) == 0:
continue
try:
if FileContent[0] == 0xff or FileContent[0] == 0xfe:
FileContent = FileContent.decode('utf-16')
else:
FileContent = FileContent.decode()
except:
# The file is not txt file. for example .mcb file
continue
IncludedFileList = gIncludePattern.findall(FileContent)
for Inc in IncludedFileList:
Inc = Inc.strip()
# if there's macro used to reference header file, expand it
HeaderList = gMacroPattern.findall(Inc)
if len(HeaderList) == 1 and len(HeaderList[0]) == 2:
HeaderType = HeaderList[0][0]
HeaderKey = HeaderList[0][1]
if HeaderType in gIncludeMacroConversion:
Inc = gIncludeMacroConversion[HeaderType] % {"HeaderKey" : HeaderKey}
else:
# not known macro used in #include, always build the file by
# returning a empty dependency
self.FileCache[File] = []
return []
Inc = os.path.normpath(Inc)
CurrentFileDependencyList.append(Inc)
DepDb[F] = CurrentFileDependencyList
CurrentFilePath = F.Dir
PathList = [CurrentFilePath] + SearchPathList
for Inc in CurrentFileDependencyList:
for SearchPath in PathList:
FilePath = os.path.join(SearchPath, Inc)
if FilePath in gIsFileMap:
if not gIsFileMap[FilePath]:
continue
# If isfile is called too many times, the performance is slow down.
elif not os.path.isfile(FilePath):
gIsFileMap[FilePath] = False
continue
else:
gIsFileMap[FilePath] = True
FilePath = PathClass(FilePath)
FullPathDependList.append(FilePath)
if FilePath not in DependencySet:
FileStack.append(FilePath)
break
else:
EdkLogger.debug(EdkLogger.DEBUG_9, "%s included by %s was not found "\
"in any given path:\n\t%s" % (Inc, F, "\n\t".join(SearchPathList)))
self.FileCache[F] = FullPathDependList
DependencySet.update(FullPathDependList)
DependencySet.update(ForceList)
if File in DependencySet:
DependencySet.remove(File)
DependencyList = list(DependencySet) # remove duplicate ones
return DependencyList
## CustomMakefile class
#
@ -1618,7 +1518,7 @@ cleanlib:
def GetLibraryBuildDirectoryList(self):
DirList = []
for LibraryAutoGen in self._AutoGenObject.LibraryAutoGenList:
if not LibraryAutoGen.IsBinaryModule and not LibraryAutoGen.CanSkipbyHash():
if not LibraryAutoGen.IsBinaryModule:
DirList.append(os.path.join(self._AutoGenObject.BuildDir, LibraryAutoGen.BuildDir))
return DirList
@ -1754,11 +1654,116 @@ class TopLevelMakefile(BuildFile):
def GetLibraryBuildDirectoryList(self):
DirList = []
for LibraryAutoGen in self._AutoGenObject.LibraryAutoGenList:
if not LibraryAutoGen.IsBinaryModule and not LibraryAutoGen.CanSkipbyHash():
if not LibraryAutoGen.IsBinaryModule:
DirList.append(os.path.join(self._AutoGenObject.BuildDir, LibraryAutoGen.BuildDir))
return DirList
## Find dependencies for one source file
#
# By searching recursively "#include" directive in file, find out all the
# files needed by given source file. The dependencies will be only searched
# in given search path list.
#
# @param File The source file
# @param ForceInculeList The list of files which will be included forcely
# @param SearchPathList The list of search path
#
# @retval list The list of files the given source file depends on
#
def GetDependencyList(AutoGenObject, FileCache, File, ForceList, SearchPathList):
EdkLogger.debug(EdkLogger.DEBUG_1, "Try to get dependency files for %s" % File)
FileStack = [File] + ForceList
DependencySet = set()
if AutoGenObject.Arch not in gDependencyDatabase:
gDependencyDatabase[AutoGenObject.Arch] = {}
DepDb = gDependencyDatabase[AutoGenObject.Arch]
while len(FileStack) > 0:
F = FileStack.pop()
FullPathDependList = []
if F in FileCache:
for CacheFile in FileCache[F]:
FullPathDependList.append(CacheFile)
if CacheFile not in DependencySet:
FileStack.append(CacheFile)
DependencySet.update(FullPathDependList)
continue
CurrentFileDependencyList = []
if F in DepDb:
CurrentFileDependencyList = DepDb[F]
else:
try:
Fd = open(F.Path, 'rb')
FileContent = Fd.read()
Fd.close()
except BaseException as X:
EdkLogger.error("build", FILE_OPEN_FAILURE, ExtraData=F.Path + "\n\t" + str(X))
if len(FileContent) == 0:
continue
try:
if FileContent[0] == 0xff or FileContent[0] == 0xfe:
FileContent = FileContent.decode('utf-16')
else:
FileContent = FileContent.decode()
except:
# The file is not txt file. for example .mcb file
continue
IncludedFileList = gIncludePattern.findall(FileContent)
for Inc in IncludedFileList:
Inc = Inc.strip()
# if there's macro used to reference header file, expand it
HeaderList = gMacroPattern.findall(Inc)
if len(HeaderList) == 1 and len(HeaderList[0]) == 2:
HeaderType = HeaderList[0][0]
HeaderKey = HeaderList[0][1]
if HeaderType in gIncludeMacroConversion:
Inc = gIncludeMacroConversion[HeaderType] % {"HeaderKey" : HeaderKey}
else:
# not known macro used in #include, always build the file by
# returning a empty dependency
FileCache[File] = []
return []
Inc = os.path.normpath(Inc)
CurrentFileDependencyList.append(Inc)
DepDb[F] = CurrentFileDependencyList
CurrentFilePath = F.Dir
PathList = [CurrentFilePath] + SearchPathList
for Inc in CurrentFileDependencyList:
for SearchPath in PathList:
FilePath = os.path.join(SearchPath, Inc)
if FilePath in gIsFileMap:
if not gIsFileMap[FilePath]:
continue
# If isfile is called too many times, the performance is slow down.
elif not os.path.isfile(FilePath):
gIsFileMap[FilePath] = False
continue
else:
gIsFileMap[FilePath] = True
FilePath = PathClass(FilePath)
FullPathDependList.append(FilePath)
if FilePath not in DependencySet:
FileStack.append(FilePath)
break
else:
EdkLogger.debug(EdkLogger.DEBUG_9, "%s included by %s was not found "\
"in any given path:\n\t%s" % (Inc, F, "\n\t".join(SearchPathList)))
FileCache[F] = FullPathDependList
DependencySet.update(FullPathDependList)
DependencySet.update(ForceList)
if File in DependencySet:
DependencySet.remove(File)
DependencyList = list(DependencySet) # remove duplicate ones
return DependencyList
# This acts like the main() function for the script, unless it is 'import'ed into another script.
if __name__ == '__main__':
pass
pass