From 0475fe3a80ae3d97190189fee98736be37632f7f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 28 Apr 2026 23:15:22 +0000 Subject: [PATCH 01/14] Add FILE_LEVEL fallback in _getFunctionLines for non-code files Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/4b62fa28-3730-42ad-b839-2b9a534b6cb4 Co-authored-by: Thomas Bock --- codeface/VCS.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/codeface/VCS.py b/codeface/VCS.py index bcb4d840..0c1e8d34 100644 --- a/codeface/VCS.py +++ b/codeface/VCS.py @@ -1510,6 +1510,16 @@ def _getFunctionLines(self, file_layout_src, file_commit): func_lines = self._parseSrcFileCtags(srcFile.name) file_commit.artefact_line_range = False + if not func_lines: + # No functions detected by either Doxygen or ctags (e.g., for + # Markdown or other non-code files). Fall back to a single + # synthetic file-level artefact so that commits touching such + # files still populate commit_dependency instead of being dropped + # from the dependency-based analysis pipeline entirely. + # FILE_LEVEL is an established special entityId in this codebase. + func_lines = {0: "FILE_LEVEL"} + file_commit.artefact_line_range = True + # clean up src temp file srcFile.close() From 398b8bc688b871708ea9353d2b04f38672404633 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 28 Apr 2026 23:26:30 +0000 Subject: [PATCH 02/14] feat: make source-file extension filter configurable via --all-files CLI flag Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/728f8c8c-7f13-402c-9f1d-78a97d55d181 Co-authored-by: Thomas Bock --- codeface/VCS.py | 37 +++++++++++++++++++++++-------------- codeface/cli.py | 8 +++++++- codeface/cluster/cluster.py | 6 ++++-- codeface/project.py | 3 ++- 4 files changed, 36 insertions(+), 18 deletions(-) diff --git a/codeface/VCS.py b/codeface/VCS.py index 0c1e8d34..97bc7e1f 100644 --- a/codeface/VCS.py +++ b/codeface/VCS.py @@ -125,6 +125,9 @@ def __init__(self): #file names to include in analysis(non-taged based) self._fileNames = None + #when True, skip the source-file extension filter in addFiles4Analysis + self._all_files = False + self.subsys_description = {} def getCommitDict(self): @@ -145,6 +148,9 @@ def getFileCommitDict(self): def setFileNames(self, fileNames): self._fileNames = fileNames + def setAllFiles(self, value): + self._all_files = value + def getFileNames(self): return self._fileNames @@ -1579,23 +1585,26 @@ def addFiles4Analysis(self, cmt_id_list): cmd_base.append("-r") #get all files touched by all commits - all_files = set() + touched_files = set() for cmt_id in cmt_id_list: cmd = cmd_base + [cmt_id] cmt_files = execute_command(cmd).splitlines() - all_files.update(cmt_files) - - #filter results to only get implementation files - fileExt = (".c", ".cc", ".cpp", ".cxx", ".cs", ".asmx", ".m", ".mm", - ".js", ".coffee", ".java", ".j", ".jav", ".php",".py", ".sh", ".ps1", ".rb", - '.d', '.php4', '.php5', '.inc', '.phtml', '.m', '.mm', ".ada", ".erl", ".bb", - '.f', '.for', '.f90', '.idl', '.ddl', '.odl', '.tcl', 'sql', ".q", ".exs", ".ex", - ".ru", ".rs", ".ts", ".go", ".dart", ".r", ".rscript", ".vue", # ".hs", - ".pl", ".pm", ".swift", ".lua", ".scala", ".sc", ".lisp", ".lsp", # ".feature", - ".groovy", ".gy", ".gv", ".gvy", ".gsh", ".kt", ".kts", ".ktm", ".es6", ".jsm") - - fileNames = [fileName for fileName in all_files if - fileName.lower().endswith(fileExt)] + touched_files.update(cmt_files) + + if self._all_files: + fileNames = list(touched_files) + else: + #filter results to only get implementation files + fileExt = (".c", ".cc", ".cpp", ".cxx", ".cs", ".asmx", ".m", ".mm", + ".js", ".coffee", ".java", ".j", ".jav", ".php",".py", ".sh", ".ps1", ".rb", + '.d', '.php4', '.php5', '.inc', '.phtml', '.m', '.mm', ".ada", ".erl", ".bb", + '.f', '.for', '.f90', '.idl', '.ddl', '.odl', '.tcl', 'sql', ".q", ".exs", ".ex", + ".ru", ".rs", ".ts", ".go", ".dart", ".r", ".rscript", ".vue", # ".hs", + ".pl", ".pm", ".swift", ".lua", ".scala", ".sc", ".lisp", ".lsp", # ".feature", + ".groovy", ".gy", ".gv", ".gvy", ".gsh", ".kt", ".kts", ".ktm", ".es6", ".jsm") + + fileNames = [fileName for fileName in touched_files if + fileName.lower().endswith(fileExt)] self.setFileNames(fileNames) diff --git a/codeface/cli.py b/codeface/cli.py index 9f5fef16..cc61129b 100644 --- a/codeface/cli.py +++ b/codeface/cli.py @@ -79,6 +79,11 @@ def get_parser(): help="Re-use an already existing vcs-analysis.db file. " "This flag is useful to continue a previously failed analysis" " or for debugging purposes.") + run_parser.add_argument( + '--all-files', action='store_true', dest="all_files", + help="Include all files touched by commits in the analysis, " + "bypassing the built-in source-file extension filter. " + "By default only known source-code extensions are included.") ml_parser = sub_parser.add_parser('ml', help='Run mailing list analysis') ml_parser.set_defaults(func=cmd_ml) @@ -126,7 +131,8 @@ def cmd_run(args): logfile = os.path.abspath(logfile) project_analyse(resdir, gitdir, codeface_conf, project_conf, args.no_report, args.loglevel, logfile, args.recreate, - args.profile_r, args.jobs, args.tagging, args.reuse_db) + args.profile_r, args.jobs, args.tagging, args.reuse_db, + args.all_files) return 0 def cmd_ml(args): diff --git a/codeface/cluster/cluster.py b/codeface/cluster/cluster.py index bf7aaf32..ca1268dc 100755 --- a/codeface/cluster/cluster.py +++ b/codeface/cluster/cluster.py @@ -48,7 +48,7 @@ def createDB(filename, git_repo, revrange, subsys_descr, link_type, - range_by_date, rcranges=None): + range_by_date, rcranges=None, all_files=False): #------------------ #configuration #------------------ @@ -57,6 +57,7 @@ def createDB(filename, git_repo, revrange, subsys_descr, link_type, git.setRevisionRange(revrange[0], revrange[1]) git.setSubsysDescription(subsys_descr) git.setRangeByDate(range_by_date) + git.setAllFiles(all_files) if rcranges != None: git.setRCRanges(rcranges) @@ -1854,7 +1855,8 @@ def performAnalysis(conf, dbm, dbfilename, git_repo, revrange, subsys_descr, log.devinfo("Creating data base for {0}..{1}".format(revrange[0], revrange[1])) createDB(dbfilename, git_repo, revrange, subsys_descr, \ - link_type, range_by_date, rcranges) + link_type, range_by_date, rcranges, + all_files=conf.get("all_files", False)) else: log.warning("REUSING data base for {0}..{1} " "(make sure it is up to date)" diff --git a/codeface/project.py b/codeface/project.py index 1dd9382d..920825a9 100644 --- a/codeface/project.py +++ b/codeface/project.py @@ -54,7 +54,7 @@ def project_setup(conf, recreate): def project_analyse(resdir, gitdir, codeface_conf, project_conf, no_report, loglevel, logfile, recreate, profile_r, - n_jobs, tagging_type, reuse_db): + n_jobs, tagging_type, reuse_db, all_files=False): pool = BatchJobPool(int(n_jobs)) conf = Configuration.load(codeface_conf, project_conf) tagging = conf["tagging"] @@ -74,6 +74,7 @@ def project_analyse(resdir, gitdir, codeface_conf, project_conf, repo = pathjoin(gitdir, conf["repo"], ".git") project_resdir = pathjoin(resdir, project, tagging) range_by_date = False + conf["all_files"] = all_files # When revisions are not provided by the configuration file # generate the analysis window automatically From 0c8b19827aa91b94e166b9ee2a3337e045a8d8b9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 00:45:19 +0000 Subject: [PATCH 03/14] fix: guard against NULL ts[[i]] in gen.full.ts loop Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/5e83c305-e4d2-4110-9c07-9115f74e779c Co-authored-by: Thomas Bock --- codeface/R/analyse_ts.r | 1 + 1 file changed, 1 insertion(+) diff --git a/codeface/R/analyse_ts.r b/codeface/R/analyse_ts.r index b0ee3cd2..d138319e 100755 --- a/codeface/R/analyse_ts.r +++ b/codeface/R/analyse_ts.r @@ -111,6 +111,7 @@ gen.full.ts <- function(conf) { } for (i in 1:length(ts)) { + if (is.null(ts[[i]])) next ts[[i]]$ChangedLines <- ts[[i]]$AddedLines + ts[[i]]$DeletedLines full.series[[i]] <- na.omit(xts(ts[[i]]$ChangedLines, order.by=ts[[i]]$commitDate)) From 4f569094f38172b40ee03160ef9184226b0a3504 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 01:28:37 +0000 Subject: [PATCH 04/14] fix: guard make.index.unique against empty data frames to fix NULL full.ts Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/8ac8d730-d57e-4a93-8ae2-9913e965f113 Co-authored-by: Thomas Bock --- codeface/R/analyse_ts.r | 1 + 1 file changed, 1 insertion(+) diff --git a/codeface/R/analyse_ts.r b/codeface/R/analyse_ts.r index d138319e..4223e75b 100755 --- a/codeface/R/analyse_ts.r +++ b/codeface/R/analyse_ts.r @@ -83,6 +83,7 @@ compute.next.timestamp <- function(time, last.time) { ## Take a list of commits and make their date indices unique by ## adding a one second offset to identical ones. make.index.unique <- function(dat, subset) { + if (nrow(dat) == 0) return(dat) dat$commitDate <- ymd_hms(dat$commitDate, quiet=TRUE) last.timestamp <- min(dat$commitDate) - dseconds(1) From 8eb31fe41523fb288703af62d85cf1895580bb13 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 01:38:51 +0000 Subject: [PATCH 05/14] fix: guard do.ts.analysis against NULL full.ts when all release ranges have no commits Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/71ad0181-6804-4401-bd54-7c41a20cee1b Co-authored-by: Thomas Bock --- codeface/R/analyse_ts.r | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/codeface/R/analyse_ts.r b/codeface/R/analyse_ts.r index 4223e75b..7f05b36f 100755 --- a/codeface/R/analyse_ts.r +++ b/codeface/R/analyse_ts.r @@ -523,6 +523,11 @@ do.ts.analysis <- function(resdir, graphdir, conf) { ## Prepare the raw time series as input to the smoothing ## algorithms full.ts <- gen.full.ts(conf) + if (is.null(full.ts)) { + logwarn("No commit data found for any release range; skipping time series analysis", + logger="analyse_ts") + return(invisible(NULL)) + } series.merged <- process.ts(full.ts) ## Prepare y ranges for the different graph types From 75c1fd6d8673134716fdde9bb90f6144b3a1ba1d Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Wed, 29 Apr 2026 21:21:30 +0200 Subject: [PATCH 06/14] fix: remove dead duration code, guard zero-length ts Signed-off-by: Thomas Bock --- codeface/R/analyse_ts.r | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/codeface/R/analyse_ts.r b/codeface/R/analyse_ts.r index 7f05b36f..2b2b654a 100755 --- a/codeface/R/analyse_ts.r +++ b/codeface/R/analyse_ts.r @@ -142,8 +142,6 @@ gen.rev.list <- function(revisions) { ## data point. Using the robust median instead of mean considerably ## reduces the amount of outliers process.ts <- function(series) { - duration <- end(series) - start(series) - ## We compute the window lengths based on natural time units ## to avoid dependencies on the lifetime of the project, or on the ## project's relative activity @@ -523,7 +521,7 @@ do.ts.analysis <- function(resdir, graphdir, conf) { ## Prepare the raw time series as input to the smoothing ## algorithms full.ts <- gen.full.ts(conf) - if (is.null(full.ts)) { + if (is.null(full.ts) || length(full.ts) == 0) { logwarn("No commit data found for any release range; skipping time series analysis", logger="analyse_ts") return(invisible(NULL)) From e80afaf4b4b60186875e06355ef1fdbb8cf771f2 Mon Sep 17 00:00:00 2001 From: Thomas Bock Date: Wed, 29 Apr 2026 21:22:33 +0200 Subject: [PATCH 07/14] fix: use >= start in trim.series Signed-off-by: Thomas Bock --- codeface/R/ts_utils.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeface/R/ts_utils.r b/codeface/R/ts_utils.r index 1c4c7800..e93f4164 100644 --- a/codeface/R/ts_utils.r +++ b/codeface/R/ts_utils.r @@ -20,7 +20,7 @@ suppressPackageStartupMessages(library(dtw)) ## Omit time series elements that exceed the given range trim.series <- function(series, start, end) { series <- series[which(index(series) < end),] - series <- series[which(index(series) > start),] + series <- series[which(index(series) >= start),] return(series) } From 304c26a52be4cb3675bcc807ddf13121de6418aa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 21:06:40 +0000 Subject: [PATCH 08/14] Fix IndexError in generate_analysis_windows when revs has only one element Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/54ae5ce0-944c-4d66-ad07-b966169cd4a8 Co-authored-by: Thomas Bock --- codeface/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeface/util.py b/codeface/util.py index 43337eef..a28bb915 100644 --- a/codeface/util.py +++ b/codeface/util.py @@ -537,7 +537,7 @@ def get_before_arg(num_months): # first commit does not carry the earliest commit date revs = [rev.split(",") for rev in revs] rev_len = len(revs) - if int(revs[0][1]) > int(revs[1][1]): + if len(revs) >= 2 and int(revs[0][1]) > int(revs[1][1]): del revs[0] # Extract hash values and dates intro seperate lists From 872d303979fb0a0d1fe635e68e0a6c35c089b231 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 00:36:05 +0000 Subject: [PATCH 09/14] Fix submodule handling: skip gitlinks in ls-tree check before git blame Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/940b6e95-31b6-40b3-a3b5-717dd395cf15 Co-authored-by: Thomas Bock --- codeface/VCS.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/codeface/VCS.py b/codeface/VCS.py index 97bc7e1f..20ba17f8 100644 --- a/codeface/VCS.py +++ b/codeface/VCS.py @@ -1230,13 +1230,17 @@ def _prepareFileCommitList(self, fnameList, link_type, singleBlame=True, #revision range rev = self.rev_end - # Check if file has been deleted + # Check if file has been deleted or is a submodule (gitlink) cmd = "git --git-dir={0} ls-tree".format(self.repo).split() - cmd.append("--name-only") cmd.append("--full-tree") cmd.append("-r") cmd.append(rev) - existing_files = execute_command(cmd).split() + ls_tree_output = execute_command(cmd).splitlines() + # ls-tree output format: " \t" + # Exclude gitlinks (mode 160000) which represent git submodules; + # git blame cannot be run on a submodule path. + existing_files = [line.split('\t', 1)[1] for line in ls_tree_output + if line and not line.startswith('160000')] if file_commit.filename in existing_files: # retrieve blame data if singleBlame: #only one set of blame data per file From 258b5955f00d8d4b76bad284bbb3209eb7a80287 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 01:01:11 +0000 Subject: [PATCH 10/14] Fix typos Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/8fa45b90-96b2-4b56-9e28-b373eb50240a Co-authored-by: Thomas Bock --- codeface/util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codeface/util.py b/codeface/util.py index a28bb915..afcac4fa 100644 --- a/codeface/util.py +++ b/codeface/util.py @@ -526,7 +526,7 @@ def get_before_arg(num_months): end = start start = end + window_size_months - # Check if any commits occurred since the last analysis window + # Check if any commits occurred since the last analysis window. if rev_start[0] != revs[0]: revs = rev_start + revs # else: no commit happened since last window, don't add duplicate @@ -540,13 +540,13 @@ def get_before_arg(num_months): if len(revs) >= 2 and int(revs[0][1]) > int(revs[1][1]): del revs[0] - # Extract hash values and dates intro seperate lists + # Extract hash values and dates into separate lists revs_hash = [rev[0] for rev in revs] revs_date = [rev[2].split(" ")[0] for rev in revs] - # We cannot detect release canndidate tags in this analysis mode, + # We cannot detect release candidate tags in this analysis mode, # so provide a list with None entries - rcs = [None for x in range(len(revs))] + rcs = [None for x in range(len(revs_hash))] return revs_hash, rcs, revs_date From 398a41d2303b4b0355c03dce88728eef439bde02 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 04:11:49 +0000 Subject: [PATCH 11/14] Raise clear error when repo has only one commit in generate_analysis_windows Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/a5144f38-53d6-465b-9909-f81152ebd84c Co-authored-by: bockthom <5575176+bockthom@users.noreply.github.com> --- codeface/util.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/codeface/util.py b/codeface/util.py index afcac4fa..10091a2c 100644 --- a/codeface/util.py +++ b/codeface/util.py @@ -540,6 +540,12 @@ def get_before_arg(num_months): if len(revs) >= 2 and int(revs[0][1]) > int(revs[1][1]): del revs[0] + if len(revs) < 2: + log.critical("The repository contains only a single commit. " + "At least two commits are required for analysis.") + raise ValueError("Repository has only one commit; " + "analysis requires at least two commits.") + # Extract hash values and dates into separate lists revs_hash = [rev[0] for rev in revs] revs_date = [rev[2].split(" ")[0] for rev in revs] From 16679eca2a17cf5305244a2168f99845450f1d1e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 21:38:06 +0000 Subject: [PATCH 12/14] Replace raise ValueError with sys.exit(1) to prevent hang on single-commit repo Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/2e8961f6-a26f-49dc-b1c5-336bcfa5b48e Co-authored-by: bockthom <5575176+bockthom@users.noreply.github.com> --- codeface/util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/codeface/util.py b/codeface/util.py index 10091a2c..acdee521 100644 --- a/codeface/util.py +++ b/codeface/util.py @@ -543,8 +543,7 @@ def get_before_arg(num_months): if len(revs) < 2: log.critical("The repository contains only a single commit. " "At least two commits are required for analysis.") - raise ValueError("Repository has only one commit; " - "analysis requires at least two commits.") + sys.exit(1) # Extract hash values and dates into separate lists revs_hash = [rev[0] for rev in revs] From 34e9742fb9d661040818c9e4c3b96dd6cc01272e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 21:56:46 +0000 Subject: [PATCH 13/14] Guard revs[0] access to fix IndexError on single-commit repo Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/9eebcada-84a1-42b8-875a-d4f5cca423b0 Co-authored-by: bockthom <5575176+bockthom@users.noreply.github.com> --- codeface/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/codeface/util.py b/codeface/util.py index acdee521..3644443d 100644 --- a/codeface/util.py +++ b/codeface/util.py @@ -527,7 +527,9 @@ def get_before_arg(num_months): start = end + window_size_months # Check if any commits occurred since the last analysis window. - if rev_start[0] != revs[0]: + # revs may be empty if no commit was found by the initial --before query + # (e.g. single-commit repo); treat that the same as a new entry. + if len(revs) == 0 or rev_start[0] != revs[0]: revs = rev_start + revs # else: no commit happened since last window, don't add duplicate # revisions From be69c7f3c4a7ec85f6b5d61e1c43264bffd22c7f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 22:09:22 +0000 Subject: [PATCH 14/14] Fix non-ASCII filenames by disabling core.quotepath in git diff-tree and ls-tree Prevent Python 2 UnicodeDecodeError in execute_command when filenames contain non-ASCII bytes Agent-Logs-Url: https://github.com/se-sic/codeface/sessions/4586695e-7500-4873-aff0-bca899936fd8 Co-authored-by: Thomas Bock --- codeface/VCS.py | 4 ++-- codeface/util.py | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/codeface/VCS.py b/codeface/VCS.py index 20ba17f8..58e781ac 100644 --- a/codeface/VCS.py +++ b/codeface/VCS.py @@ -1231,7 +1231,7 @@ def _prepareFileCommitList(self, fnameList, link_type, singleBlame=True, rev = self.rev_end # Check if file has been deleted or is a submodule (gitlink) - cmd = "git --git-dir={0} ls-tree".format(self.repo).split() + cmd = "git -c core.quotepath=false --git-dir={0} ls-tree".format(self.repo).split() cmd.append("--full-tree") cmd.append("-r") cmd.append(rev) @@ -1582,7 +1582,7 @@ def addFiles4Analysis(self, cmt_id_list): -- Input -- directories - a list of paths to limit the search for filenames ''' - cmd_base = 'git --git-dir={0} diff-tree'.format(self.repo).split() + cmd_base = 'git -c core.quotepath=false --git-dir={0} diff-tree'.format(self.repo).split() cmd_base.append("--diff-filter=ACMRTB") cmd_base.append("--no-commit-id") cmd_base.append("--name-only") diff --git a/codeface/util.py b/codeface/util.py index 3644443d..eac54b58 100644 --- a/codeface/util.py +++ b/codeface/util.py @@ -252,7 +252,13 @@ def execute_command(cmd, ignore_errors=False, direct_io=False, cwd=None, silent_ If direct_io is True, do not capture the stdin and stdout of the command. Returns the stdout of the command. ''' - jcmd = " ".join(cmd) + # In Python 2, cmd may contain a mix of unicode (e.g. repo path from + # PyYAML config) and bytes (e.g. filenames from git output). A plain + # " ".join() would then try to decode bytes with non-ASCII content + # (like emoji filenames) as ASCII and raise UnicodeDecodeError. + # Encode any unicode elements to UTF-8 bytes so the join stays in bytes. + jcmd = b" ".join(s.encode('utf-8') if isinstance(s, unicode) else s + for s in cmd) log.debug("Running command: {}".format(jcmd)) try: if direct_io: