Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 237 additions & 0 deletions heuristics/bugginess_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import re
from typing import Optional

from bohr.collection.artifacts import Commit
from bohr.core import Heuristic
from bohr.labeling.labelset import Labels
from labels import CommitLabel


@Heuristic(Commit)
def no_files_have_modified_status(commit: Commit) -> Optional[Labels]:
if len(commit.commit_files) == 0 or commit.commit_files[0].status == "empty":
return None
for file in commit.commit_files:
if file.status == "modified":
return None
return CommitLabel.NonBugFix


@Heuristic(Commit)
def bug_if_only_changed_lines_in_one_code_file(commit: Commit) -> Optional[Labels]:
if (
len(commit.commit_files) == 1
and commit.commit_files[0].status == "modified"
and not isinstance(commit.commit_files[0].filename, float)
and commit.commit_files[0].filename.split(".")[-1]
in [*code_extensions, *passive_code_extensions]
and commit.commit_files[0].changes
and commit.commit_files[0].no_added_lines()
and commit.commit_files[0].no_removed_lines()
):
return CommitLabel.BugFix
return None


@Heuristic(Commit)
def bugless_if_at_least_5_added_files(commit: Commit) -> Optional[Labels]:
added_count = 0
for file in commit.commit_files:
if file.status == "added":
added_count += 1
return CommitLabel.NonBugFix if added_count >= 5 else None


@Heuristic(Commit)
def bugless_if_one_added_file(commit: Commit) -> Optional[Labels]:
if len(commit.commit_files) == 1 and commit.commit_files[0].status == "added":
return CommitLabel.NonBugFix
return None


@Heuristic(Commit)
def bugless_if_at_least_2_removed_files(commit: Commit) -> Optional[Labels]:
removed_count = 0
for file in commit.commit_files:
if file.status == "removed":
removed_count += 1
return CommitLabel.NonBugFix if removed_count >= 2 else None


@Heuristic(Commit)
def bugless_if_one_removed_file(commit: Commit) -> Optional[Labels]:
if len(commit.commit_files) == 1 and commit.commit_files[0].status == "removed":
return CommitLabel.NonBugFix
return None


@Heuristic(Commit)
def refactoring_if_at_least_2_renamed(commit: Commit) -> Optional[Labels]:
renamed_count = 0
for file in commit.commit_files:
if file.status == "renamed":
renamed_count += 1
return CommitLabel.Refactoring if renamed_count >= 2 else None


code_extensions = [
"js",
"java",
"py",
"php",
"cpp",
"h",
"rb",
"ts",
"c",
"go",
"css",
"cs",
"scss",
"jsx",
"m",
"less",
"sh",
"scala",
"cc",
"coffee",
"F90",
"hpp",
"inc",
"sql",
"erb",
"tsx",
"kt",
"Makefile",
"groovy",
"hbs",
"swift",
"hh",
"twig",
"haml",
"hs",
"scssc",
]
ignore_extensions = ["gitignore"]
config_extensions = [
"yaml",
"yml",
"gradle",
"in",
"properties",
"conf",
"csproj",
"ini",
"config",
]
passive_code_extensions = [
*ignore_extensions,
*config_extensions,
"xml",
"html",
"json",
"htm",
]
binary_extensions = [
"png",
"po",
"jpg",
"gif",
"tgz",
"gz",
"jar",
"mo",
"slj",
"class",
"gem",
"map",
"pdf",
"ttf",
"aw",
]
generated_text_extensions = ["out"]
doc_text_extensions = [
"markdown",
"md",
"rst",
"adoc",
]
non_code_extensions = [
*binary_extensions,
*generated_text_extensions,
*doc_text_extensions,
"txt",
"svg",
"lock",
"LICENSE",
"csv",
]


@Heuristic(Commit)
def bugless_if_not_code_files(commit: Commit) -> Optional[Labels]:
file_found = False
for file in commit.commit_files:
if isinstance(file.filename, float):
continue # TODO filename is NaN if it's not given <- needs to be fixed
if file.filename.split(".")[-1] in [*code_extensions, *passive_code_extensions]:
# TODO move filetype logic to CommitFile artifact?
return None
else:
file_found = True
return CommitLabel.NonBugFix if file_found else None


@Heuristic(Commit)
def buggless_if_many_lines_changed(commit: Commit) -> Optional[Labels]:
sum = 0
for file in commit.commit_files:
if file.changes:
sum += len(file.changes)
return CommitLabel.NonBugFix if sum > 5000 else None


@Heuristic(Commit)
def bugless_if_many_files_changes(commit: Commit) -> Optional[Labels]:
if len(commit.commit_files) > 15:
return CommitLabel.NonBugFix
else:
return None


@Heuristic(Commit)
def all_files_test_add(commit: Commit) -> Optional[Labels]:
TEST_FILE_REGEX = re.compile(r"test", flags=re.I)
if len(commit.commit_files) == 0:
return None

for file in commit.commit_files:

def only_additions():
return not (
not file.changes or "<re>" in file.changes or "<del>" in file.changes
)

if not (
TEST_FILE_REGEX.match(str(file.filename))
and (file.status == "added" or only_additions())
):
return None
return CommitLabel.TestAdd


@Heuristic(Commit)
def all_files_test_fix(commit: Commit) -> Optional[Labels]:
TEST_FILE_REGEX = re.compile(r"test", flags=re.I)
if len(commit.commit_files) == 0:
return None

for file in commit.commit_files:
if not (
TEST_FILE_REGEX.match(str(file.filename))
and file.status == "modified"
and file.changes
and "<re>" in file.changes
):
return None
return CommitLabel.TestFix
67 changes: 67 additions & 0 deletions heuristics/commit_explorer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from typing import Optional

from commitexplorer.client import CommitExplorerClientException, CommitNotFoundException

import labels as l
from bohr.collection.artifacts import Commit
from bohr.core import Heuristic
from bohr.labeling.labelset import Labels


@Heuristic(Commit)
def commit_explorer_output_merge(commit: Commit) -> Optional[Labels]:
try:
if commit.commit_explorer_data is None:
return None

if (
"special_commit_finder/0_1" in commit.commit_explorer_data
and commit.commit_explorer_data["special_commit_finder/0_1"]["merge"]
):
return l.CommitLabel.Merge
except (CommitExplorerClientException, CommitNotFoundException) as ex:
return None


@Heuristic(Commit)
def commit_explorer_output_init(commit: Commit) -> Optional[Labels]:
try:
if commit.commit_explorer_data is None:
return None

if (
"special_commit_finder/0_1" in commit.commit_explorer_data
and commit.commit_explorer_data["special_commit_finder/0_1"]["initial"]
):
return l.CommitLabel.InitialCommit
except (CommitExplorerClientException, CommitNotFoundException) as ex:
return None


# @Heuristic(Commit)
# def commit_explorer_output_refactoring_miner(commit: Commit) -> Optional[Labels]:
# if commit.commit_explorer_data is None:
# return None
#
# data = commit.commit_explorer_data
# if "refactoring_miner/2_1_0" in data:
# if data["refactoring_miner/2_1_0"]:
# if data["refactoring_miner/2_1_0"]['status'] == 'ok':
# if len(data["refactoring_miner/2_1_0"]["refactorings"]) == 1:
# if data["refactoring_miner/2_1_0"]["refactorings"][0]["type"] in ['Move Class']:
# print("Contains refactoring")
# return l.CommitLabel.Refactoring


@Heuristic(Commit)
def commit_explorer_output_sstubs(commit: Commit) -> Optional[Labels]:
try:
if commit.commit_explorer_data is None:
return None

data = commit.commit_explorer_data
if "mine_sstubs/head" in data and data["mine_sstubs/head"]:
print("Contains bug")
return l.CommitLabel.BugFix
except (CommitExplorerClientException, CommitNotFoundException) as ex:
return None
77 changes: 77 additions & 0 deletions heuristics/conventional_commits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import re
from typing import Optional

import labels as l
from bohr.collection.artifacts import Commit
from bohr.core import Heuristic
from bohr.labeling.labelset import Labels

example1 = """feat: allow provided config object to extend other configs

BREAKING CHANGE: `extends` key in config file is now used for extending other config files
"""

example2 = """refactor!: drop support for Node 6
"""

example3 = """refactor!: drop support for Node 6

BREAKING CHANGE: refactor to use JavaScript features not available in Node 6.
"""

example4 = """docs: correct spelling of CHANGELOG
"""

example5 = """feat(lang): add polish language
"""

example6 = """fix: correct minor typos in code

see the issue for details

on typos fixed.

Reviewed-by: Z
Refs #133
"""

REGEX = re.compile(
r"\A(((Initial commit)|(Merge [^\r\n]+)|"
r"((build|chore|ci|docs|feat|fix|perf|refactor|revert|style|test)(\(\w+\))?!?: [^\r\n]+"
r"((\r|\n|\r\n)((\r|\n|\r\n)[^\r\n]+)+)*"
r")"
r")(\r|\n|\r\n)?)"
)


@Heuristic(Commit)
def conventional_commit_regex(commit: Commit) -> Optional[Labels]:
"""
>>> conventional_commit_regex(Commit('a', 'a', '1df23', example1))
CommitLabel.Feature
>>> conventional_commit_regex(Commit('a', 'a', '1df23', example2))
CommitLabel.Refactoring
>>> conventional_commit_regex(Commit('a', 'a', '1df23', example3))
CommitLabel.Refactoring
>>> conventional_commit_regex(Commit('a', 'a', '1df23', example4))
CommitLabel.DocChange
>>> conventional_commit_regex(Commit('a', 'a', '1df23', example5))
CommitLabel.Feature
>>> conventional_commit_regex(Commit('a', 'a', '1df23', example6))
CommitLabel.BugFix
"""
m = REGEX.match(commit.message.raw)
if m is None:
return None
type = m.groups()[5]
if type == "fix":
return l.CommitLabel.BugFix
elif type == "feat":
return l.CommitLabel.Feature
elif type == "refactor":
return l.CommitLabel.Refactoring
elif type == "docs":
return l.CommitLabel.DocChange
else:
return None
# TODO add more types
Loading