BaseTools: Improve the cache hit in the edk2 build cache

BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=1927

Current cache hash algorithm does not parse and generate
the makefile to get the accurate dependency files for a
module. It instead use the platform and package meta files
to get the module depenedency in a quick but over approximate
way. These meta files are monolithic and involve many redundant
dependency for the module, which cause the module build
cache miss easily.
This patch introduces one more cache checkpoint and a new
hash algorithm besides the current quick one. The new hash
algorithm leverages the module makefile to achieve more
accurate and precise dependency info for a module. When
the build cache miss with the first quick hash, the
Basetool will caculate new one after makefile is generated
and then check again.

Cc: Liming Gao <liming.gao@intel.com>
Cc: Bob Feng <bob.c.feng@intel.com>
Signed-off-by: Steven Shi <steven.shi@intel.com>
Reviewed-by: Bob Feng <bob.c.feng@intel.com>
This commit is contained in:
Shi, Steven
2019-08-15 22:26:17 +08:00
committed by Feng, Bob C
parent 8113281728
commit 0e7e7a264c
7 changed files with 869 additions and 200 deletions

129
BaseTools/Source/Python/build/build.py Normal file → Executable file
View File

@ -595,7 +595,7 @@ class BuildTask:
#
def AddDependency(self, Dependency):
for Dep in Dependency:
if not Dep.BuildObject.IsBinaryModule and not Dep.BuildObject.CanSkipbyHash():
if not Dep.BuildObject.IsBinaryModule and not Dep.BuildObject.CanSkipbyCache(GlobalData.gCacheIR):
self.DependencyList.append(BuildTask.New(Dep)) # BuildTask list
## The thread wrapper of LaunchCommand function
@ -811,7 +811,7 @@ class Build():
self.AutoGenMgr = None
EdkLogger.info("")
os.chdir(self.WorkspaceDir)
self.share_data = Manager().dict()
GlobalData.gCacheIR = Manager().dict()
self.log_q = log_q
def StartAutoGen(self,mqueue, DataPipe,SkipAutoGen,PcdMaList,share_data):
try:
@ -820,6 +820,13 @@ class Build():
feedback_q = mp.Queue()
file_lock = mp.Lock()
error_event = mp.Event()
GlobalData.file_lock = file_lock
FfsCmd = DataPipe.Get("FfsCommand")
if FfsCmd is None:
FfsCmd = {}
GlobalData.FfsCmd = FfsCmd
GlobalData.libConstPcd = DataPipe.Get("LibConstPcd")
GlobalData.Refes = DataPipe.Get("REFS")
auto_workers = [AutoGenWorkerInProcess(mqueue,DataPipe.dump_file,feedback_q,file_lock,share_data,self.log_q,error_event) for _ in range(self.ThreadNumber)]
self.AutoGenMgr = AutoGenManager(auto_workers,feedback_q,error_event)
self.AutoGenMgr.start()
@ -827,14 +834,28 @@ class Build():
w.start()
if PcdMaList is not None:
for PcdMa in PcdMaList:
if GlobalData.gBinCacheSource and self.Target in [None, "", "all"]:
PcdMa.GenModuleFilesHash(share_data)
PcdMa.GenPreMakefileHash(share_data)
if PcdMa.CanSkipbyPreMakefileCache(share_data):
continue
PcdMa.CreateCodeFile(False)
PcdMa.CreateMakeFile(False,GenFfsList = DataPipe.Get("FfsCommand").get((PcdMa.MetaFile.File, PcdMa.Arch),[]))
if GlobalData.gBinCacheSource and self.Target in [None, "", "all"]:
PcdMa.GenMakeHeaderFilesHash(share_data)
PcdMa.GenMakeHash(share_data)
if PcdMa.CanSkipbyMakeCache(share_data):
continue
self.AutoGenMgr.join()
rt = self.AutoGenMgr.Status
return rt, 0
except Exception as e:
return False,e.errcode
except FatalError as e:
return False, e.args[0]
except:
return False, UNKNOWN_ERROR
## Load configuration
#
@ -1199,10 +1220,11 @@ class Build():
mqueue.put(m)
AutoGenObject.DataPipe.DataContainer = {"FfsCommand":FfsCommand}
AutoGenObject.DataPipe.DataContainer = {"CommandTarget": self.Target}
self.Progress.Start("Generating makefile and code")
data_pipe_file = os.path.join(AutoGenObject.BuildDir, "GlobalVar_%s_%s.bin" % (str(AutoGenObject.Guid),AutoGenObject.Arch))
AutoGenObject.DataPipe.dump(data_pipe_file)
autogen_rt, errorcode = self.StartAutoGen(mqueue, AutoGenObject.DataPipe, self.SkipAutoGen, PcdMaList,self.share_data)
autogen_rt,errorcode = self.StartAutoGen(mqueue, AutoGenObject.DataPipe, self.SkipAutoGen, PcdMaList, GlobalData.gCacheIR)
self.Progress.Stop("done!")
if not autogen_rt:
self.AutoGenMgr.TerminateWorkers()
@ -1799,6 +1821,15 @@ class Build():
CmdListDict = None
if GlobalData.gEnableGenfdsMultiThread and self.Fdf:
CmdListDict = self._GenFfsCmd(Wa.ArchList)
# Add Platform and Package level hash in share_data for module hash calculation later
if GlobalData.gBinCacheSource or GlobalData.gBinCacheDest:
GlobalData.gCacheIR[('PlatformHash')] = GlobalData.gPlatformHash
for PkgName in GlobalData.gPackageHash.keys():
GlobalData.gCacheIR[(PkgName, 'PackageHash')] = GlobalData.gPackageHash[PkgName]
GlobalData.file_lock = mp.Lock()
GlobalData.FfsCmd = CmdListDict
self.Progress.Stop("done!")
MaList = []
ExitFlag = threading.Event()
@ -1808,20 +1839,23 @@ class Build():
AutoGenStart = time.time()
GlobalData.gGlobalDefines['ARCH'] = Arch
Pa = PlatformAutoGen(Wa, self.PlatformFile, BuildTarget, ToolChain, Arch)
GlobalData.libConstPcd = Pa.DataPipe.Get("LibConstPcd")
GlobalData.Refes = Pa.DataPipe.Get("REFS")
for Module in Pa.Platform.Modules:
if self.ModuleFile.Dir == Module.Dir and self.ModuleFile.Name == Module.Name:
Ma = ModuleAutoGen(Wa, Module, BuildTarget, ToolChain, Arch, self.PlatformFile,Pa.DataPipe)
if Ma is None:
continue
MaList.append(Ma)
if Ma.CanSkipbyHash():
self.HashSkipModules.append(Ma)
if GlobalData.gBinCacheSource:
EdkLogger.quiet("cache hit: %s[%s]" % (Ma.MetaFile.Path, Ma.Arch))
continue
else:
if GlobalData.gBinCacheSource:
EdkLogger.quiet("cache miss: %s[%s]" % (Ma.MetaFile.Path, Ma.Arch))
if GlobalData.gBinCacheSource and self.Target in [None, "", "all"]:
Ma.GenModuleFilesHash(GlobalData.gCacheIR)
Ma.GenPreMakefileHash(GlobalData.gCacheIR)
if Ma.CanSkipbyPreMakefileCache(GlobalData.gCacheIR):
self.HashSkipModules.append(Ma)
EdkLogger.quiet("cache hit: %s[%s]" % (Ma.MetaFile.Path, Ma.Arch))
continue
# Not to auto-gen for targets 'clean', 'cleanlib', 'cleanall', 'run', 'fds'
if self.Target not in ['clean', 'cleanlib', 'cleanall', 'run', 'fds']:
# for target which must generate AutoGen code and makefile
@ -1841,6 +1875,18 @@ class Build():
self.Progress.Stop("done!")
if self.Target == "genmake":
return True
if GlobalData.gBinCacheSource and self.Target in [None, "", "all"]:
Ma.GenMakeHeaderFilesHash(GlobalData.gCacheIR)
Ma.GenMakeHash(GlobalData.gCacheIR)
if Ma.CanSkipbyMakeCache(GlobalData.gCacheIR):
self.HashSkipModules.append(Ma)
EdkLogger.quiet("cache hit: %s[%s]" % (Ma.MetaFile.Path, Ma.Arch))
continue
else:
EdkLogger.quiet("cache miss: %s[%s]" % (Ma.MetaFile.Path, Ma.Arch))
Ma.PrintFirstMakeCacheMissFile(GlobalData.gCacheIR)
self.BuildModules.append(Ma)
# Initialize all modules in tracking to 'FAIL'
if Ma.Arch not in GlobalData.gModuleBuildTracking:
@ -1985,11 +2031,18 @@ class Build():
if GlobalData.gEnableGenfdsMultiThread and self.Fdf:
CmdListDict = self._GenFfsCmd(Wa.ArchList)
# Add Platform and Package level hash in share_data for module hash calculation later
if GlobalData.gBinCacheSource or GlobalData.gBinCacheDest:
GlobalData.gCacheIR[('PlatformHash')] = GlobalData.gPlatformHash
for PkgName in GlobalData.gPackageHash.keys():
GlobalData.gCacheIR[(PkgName, 'PackageHash')] = GlobalData.gPackageHash[PkgName]
# multi-thread exit flag
ExitFlag = threading.Event()
ExitFlag.clear()
self.AutoGenTime += int(round((time.time() - WorkspaceAutoGenTime)))
self.BuildModules = []
TotalModules = []
for Arch in Wa.ArchList:
PcdMaList = []
AutoGenStart = time.time()
@ -2009,6 +2062,7 @@ class Build():
ModuleList.append(Inf)
Pa.DataPipe.DataContainer = {"FfsCommand":CmdListDict}
Pa.DataPipe.DataContainer = {"Workspace_timestamp": Wa._SrcTimeStamp}
Pa.DataPipe.DataContainer = {"CommandTarget": self.Target}
for Module in ModuleList:
# Get ModuleAutoGen object to generate C code file and makefile
Ma = ModuleAutoGen(Wa, Module, BuildTarget, ToolChain, Arch, self.PlatformFile,Pa.DataPipe)
@ -2019,30 +2073,34 @@ class Build():
Ma.PlatformInfo = Pa
Ma.Workspace = Wa
PcdMaList.append(Ma)
if Ma.CanSkipbyHash():
self.HashSkipModules.append(Ma)
if GlobalData.gBinCacheSource:
EdkLogger.quiet("cache hit: %s[%s]" % (Ma.MetaFile.Path, Ma.Arch))
continue
else:
if GlobalData.gBinCacheSource:
EdkLogger.quiet("cache miss: %s[%s]" % (Ma.MetaFile.Path, Ma.Arch))
# Not to auto-gen for targets 'clean', 'cleanlib', 'cleanall', 'run', 'fds'
# for target which must generate AutoGen code and makefile
self.BuildModules.append(Ma)
TotalModules.append(Ma)
# Initialize all modules in tracking to 'FAIL'
if Ma.Arch not in GlobalData.gModuleBuildTracking:
GlobalData.gModuleBuildTracking[Ma.Arch] = dict()
if Ma not in GlobalData.gModuleBuildTracking[Ma.Arch]:
GlobalData.gModuleBuildTracking[Ma.Arch][Ma] = 'FAIL'
mqueue = mp.Queue()
for m in Pa.GetAllModuleInfo:
mqueue.put(m)
data_pipe_file = os.path.join(Pa.BuildDir, "GlobalVar_%s_%s.bin" % (str(Pa.Guid),Pa.Arch))
Pa.DataPipe.dump(data_pipe_file)
autogen_rt, errorcode = self.StartAutoGen(mqueue, Pa.DataPipe, self.SkipAutoGen, PcdMaList,self.share_data)
autogen_rt, errorcode = self.StartAutoGen(mqueue, Pa.DataPipe, self.SkipAutoGen, PcdMaList, GlobalData.gCacheIR)
# Skip cache hit modules
if GlobalData.gBinCacheSource:
for Ma in TotalModules:
if (Ma.MetaFile.Path, Ma.Arch) in GlobalData.gCacheIR and \
GlobalData.gCacheIR[(Ma.MetaFile.Path, Ma.Arch)].PreMakeCacheHit:
self.HashSkipModules.append(Ma)
continue
if (Ma.MetaFile.Path, Ma.Arch) in GlobalData.gCacheIR and \
GlobalData.gCacheIR[(Ma.MetaFile.Path, Ma.Arch)].MakeCacheHit:
self.HashSkipModules.append(Ma)
continue
self.BuildModules.append(Ma)
else:
self.BuildModules.extend(TotalModules)
if not autogen_rt:
self.AutoGenMgr.TerminateWorkers()
@ -2050,9 +2108,24 @@ class Build():
raise FatalError(errorcode)
self.AutoGenTime += int(round((time.time() - AutoGenStart)))
self.Progress.Stop("done!")
if GlobalData.gBinCacheSource:
EdkLogger.quiet("Total cache hit driver num: %s, cache miss driver num: %s" % (len(set(self.HashSkipModules)), len(set(self.BuildModules))))
CacheHitMa = set()
CacheNotHitMa = set()
for IR in GlobalData.gCacheIR.keys():
if 'PlatformHash' in IR or 'PackageHash' in IR:
continue
if GlobalData.gCacheIR[IR].PreMakeCacheHit or GlobalData.gCacheIR[IR].MakeCacheHit:
CacheHitMa.add(IR)
else:
# There might be binary module or module which has .inc files, not count for cache miss
CacheNotHitMa.add(IR)
EdkLogger.quiet("Total module num: %s, cache hit module num: %s" % (len(CacheHitMa)+len(CacheNotHitMa), len(CacheHitMa)))
for Arch in Wa.ArchList:
MakeStart = time.time()
for Ma in self.BuildModules:
for Ma in set(self.BuildModules):
# Generate build task for the module
if not Ma.IsBinaryModule:
Bt = BuildTask.New(ModuleMakeUnit(Ma, Pa.BuildCommand,self.Target))