Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
a09695d
add BugsInPy submodule
t-sorger Nov 24, 2024
c9384d5
add initial BugsInPybug.py
t-sorger Nov 27, 2024
ce48490
add initial BugsInPy.py to benchmark
t-sorger Nov 27, 2024
865975b
add BugsInPy to core utils
t-sorger Dec 7, 2024
e8976c5
add initial tests for BugsInPy; fix typo
t-sorger Dec 7, 2024
9a3325d
add BugsInPy submodule
t-sorger Nov 24, 2024
96d79c5
add initial BugsInPybug.py
t-sorger Nov 27, 2024
83b35cd
add initial BugsInPy.py to benchmark
t-sorger Nov 27, 2024
0cf0179
add BugsInPy to core utils
t-sorger Dec 7, 2024
e09839c
add initial tests for BugsInPy; fix typo
t-sorger Dec 7, 2024
f335bdf
add test implementation for BugsInPybug
t-sorger Jan 14, 2025
2bc479a
fix bin path issues
t-sorger Jan 14, 2025
bd08ec1
lint code
t-sorger Jan 14, 2025
11600a3
rework tests for BugsInPy
t-sorger Jan 14, 2025
1cc7bc6
update submodules
t-sorger Jan 14, 2025
0d28f9d
Merge branch 'BugsInPy' of github.com:ASSERT-KTH/repairbench-framewor…
t-sorger Jan 14, 2025
d3de871
add BugsInPy submodule
t-sorger Nov 24, 2024
56f4502
add initial BugsInPybug.py
t-sorger Nov 27, 2024
8274a8d
add initial BugsInPy.py to benchmark
t-sorger Nov 27, 2024
63f5834
add BugsInPy to core utils
t-sorger Dec 7, 2024
8e761a6
add initial tests for BugsInPy; fix typo
t-sorger Dec 7, 2024
41821d4
add test implementation for BugsInPybug
t-sorger Jan 14, 2025
28e4c9a
fix bin path issues
t-sorger Jan 14, 2025
21420fd
lint code
t-sorger Jan 14, 2025
5962796
rework tests for BugsInPy
t-sorger Jan 14, 2025
ea287fa
update submodules
t-sorger Jan 14, 2025
17c438d
Merge branch 'BugsInPy' of github.com:ASSERT-KTH/repairbench-framewor…
t-sorger Jan 22, 2025
7177e86
adds RichBug and fixes process calls
t-sorger Jan 26, 2025
7a195e0
add checks and fix path issues
t-sorger Jan 26, 2025
1c2f662
fix code and first tests
t-sorger Jan 26, 2025
1845b6d
fix error in tests
t-sorger Jan 26, 2025
f0cfa76
lint code
t-sorger Jan 26, 2025
1c1ea5e
start adding instruct test and new python utils
t-sorger Feb 4, 2025
1e0ffd0
update python.py
t-sorger Feb 9, 2025
edd053f
update Python utils and comment other test cases
t-sorger Feb 25, 2025
c74c397
add InfillingPromptingPython
t-sorger Feb 25, 2025
b679250
update utils for Python
t-sorger Feb 25, 2025
994e21e
add test infilling for BugsInPy codellama
t-sorger Feb 25, 2025
4d3561c
lint files
t-sorger Feb 25, 2025
c583a39
uncomment other infilling tests
t-sorger Feb 25, 2025
779340a
add initial files for language_utils
t-sorger Feb 27, 2025
76272cf
add get_language_utils method
t-sorger Feb 27, 2025
b1e684f
add usage of LanguageUtils for infilling
t-sorger Feb 27, 2025
b72565c
add first docker adoptations
t-sorger Jun 27, 2025
5507ee7
update BugsInPy for Docker
t-sorger Jun 27, 2025
029538a
lint files
t-sorger Jun 27, 2025
04a0fc0
update steup
t-sorger Jun 27, 2025
b629e73
add sample/instruct test for BugsInPy
t-sorger Sep 21, 2025
70e7251
add sample/infilling test for BugsInPy
t-sorger Sep 21, 2025
6dd1290
add evaluation tests for BugsInPy
t-sorger Sep 21, 2025
7c21a6d
add missing tests for RichBug implementation of BugsInPy
t-sorger Sep 22, 2025
4963e5b
remove prints
t-sorger Sep 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@
[submodule "cache"]
path = cache
url = https://github.com/ASSERT-KTH/elle-elle-aime-cache.git
[submodule "benchmarks/BugsInPy"]
path = benchmarks/BugsInPy
url = https://github.com/ASSERT-KTH/BugsInPy
1 change: 1 addition & 0 deletions benchmarks/BugsInPy
Submodule BugsInPy added at b1f184
2 changes: 1 addition & 1 deletion benchmarks/gitbug-java
Submodule gitbug-java updated 5 files
+27 −80 README.md
+1 −1 gitbug-java
+3 −13 gitbug/bug.py
+277 −501 poetry.lock
+3 −3 pyproject.toml
2 changes: 1 addition & 1 deletion cache
Submodule cache updated from 06cd07 to 0d3f97
162 changes: 162 additions & 0 deletions elleelleaime/core/benchmarks/BugsInPy/BugsInPy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from pathlib import Path
from typing import Optional
from io import StringIO
from elleelleaime.core.benchmarks.benchmark import Benchmark
from elleelleaime.core.benchmarks.BugsInPy.BugsInPybug import BugsInPyBug
import subprocess
import logging
import re
import pandas as pd


class BugsInPy(Benchmark):
"""
The class for representing the BugsInPy benchmark.
"""

def __init__(self, path: Path = Path("benchmarks/BugsInPy").absolute()) -> None:
super().__init__("BugsInPy", path)

def get_bin(self, options: str = "") -> Optional[str]:
return f'{Path(self.path, "framework/bin/")}'

def initialize(self) -> None:
"""
Initializes the BugsInPy benchmark object by collecting the list of all projects and bugs.
"""
logging.info("Initializing BugsInPy benchmark...")

# Get all project names
run = subprocess.run(
f"docker exec bugsinpy-container ls /bugsinpy/projects",
shell=True,
capture_output=True,
check=True,
)
project_names = {
project_name.decode("utf-8") for project_name in run.stdout.split()
}
logging.info("Found %3d projects" % len(project_names))

# Get all bug names for all project_name
bugs = {}
# for project_name in tqdm.tqdm(project_names):
for project_name in project_names:
run = subprocess.run(
f"docker exec bugsinpy-container ls /bugsinpy/projects/{project_name}/bugs",
shell=True,
capture_output=True,
check=True,
)
# bugs[project_name] = {
# int(bug_id.decode("utf-8")) for bug_id in run.stdout.split()
# }

bugs[project_name] = set()
for bug_id in run.stdout.split():
try:
bug_id_str = bug_id.decode("utf-8").strip()
# Skip invalid bug IDs (files with extensions, special characters, etc.)
if (
not bug_id_str.isdigit()
or "." in bug_id_str
or "~" in bug_id_str
or "$" in bug_id_str
):
logging.warning(f"Skipping invalid bug ID: {bug_id_str}")
continue
bug_id_int = int(bug_id_str)
bugs[project_name].add(bug_id_int)
except ValueError:
logging.warning(
f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
)
Comment on lines +70 to +73
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider logging the specific ValueError exception message for better debugging. This will help identify the cause of the invalid bug ID.

For example, you can log str(e) to capture the error message.

Suggested change
except ValueError:
logging.warning(
f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
)
except ValueError as e:
logging.warning(
f"Skipping invalid bug ID: {bug_id.decode('utf-8')}. Error: {str(e)}"
)


logging.info(
"Found %3d bugs for project %s"
% (len(bugs[project_name]), project_name)
)

# Initialize dataset
for project_name in project_names:
# Create a DataFrame to store the failing test cases and trigger causes
df = pd.DataFrame(columns=["bid", "tests", "errors"])

for bug_id in bugs[project_name]:
# Extract ground truth diff
diff_path = (
f"/bugsinpy/projects/{project_name}/bugs/{bug_id}/bug_patch.txt"
)
try:
run = subprocess.run(
f"docker exec bugsinpy-container cat {diff_path}",
shell=True,
capture_output=True,
check=True,
)
diff = run.stdout.decode("utf-8")

# Skip bugs with empty ground truth
if not diff.strip():
logging.warning(
f"Empty ground truth for {project_name}-{bug_id}, skipping..."
)
continue

except subprocess.CalledProcessError:
logging.warning(
f"Could not read bug_patch.txt for {project_name}-{bug_id}, skipping..."
)
continue

# Extract failing test cases and trigger causes
# failing_test_cases = df[df["bug_id"] == bug_id]["tests"].values[0]
# trigger_cause = df[df["bug_id"] == bug_id]["errors"].values[0]

# Moved into BugsInPybug.py
# # Checkout the bug
# checkout_run = subprocess.run(
# f"docker exec -it bugsinpy-container {self.benchmark.get_bin()}bugsinpy-checkout -p {self.project_name} -v {self.version_id} -i {self.bug_id}",
# shell=True,
# capture_output=True,
# check=True,
# )

# # Compile and test the bug
# path = f"{self.benchmark.get_bin()}/temp/{project_name}"
# checkout_compile = subprocess.run(
# f"docker exec -it bugsinpy-container {self.benchmark.get_bin()}bugsinpy-compile -w {path}",
# shell=True,
# capture_output=True,
# check=True,
# )

# checkout_compile = subprocess.run(
# f"docker exec -it bugsinpy-container {self.benchmark.get_bin()}bugsinpy-test -w {path}",
# shell=True,
# capture_output=True,
# check=True,
# )

# # Check with default path
# fail_path = f"{self.benchmark.get_bin()}/temp/{project_name}/bugsinpy_fail.txt"
# with open(fail_path, "r", encoding="ISO-8859-1") as fail_file:
# failing_tests_content = fail_file.read()

# # Use a regular expression to extract the test name and its context
# pattern = r"FAIL: ([\w_.]+ \([\w_.]+\))"
# matches = re.findall(pattern, failing_tests_content)

# # Store the results in a dictionary if needed
# failing_tests = {"failing_tests": matches}

self.add_bug(
BugsInPyBug(
self,
project_name=project_name,
bug_id=bug_id,
version_id="0", # 0 buggy -- is this always the case?
ground_truth=diff,
failing_tests={}, # needs to be checked out for this?
)
)
191 changes: 191 additions & 0 deletions elleelleaime/core/benchmarks/BugsInPy/BugsInPybug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import subprocess
import shutil
import re
import os

from elleelleaime.core.benchmarks.benchmark import Benchmark
from elleelleaime.core.benchmarks.bug import RichBug
from elleelleaime.core.benchmarks.test_result import TestResult
from elleelleaime.core.benchmarks.compile_result import CompileResult


class BugsInPyBug(RichBug):
"""
The class for representing BugsInPy bugs
"""

def __init__(
self,
benchmark: Benchmark,
project_name: str,
bug_id: str,
version_id: str, # 1 fixed, 0 buggy
ground_truth: str,
failing_tests: dict[str, str],
) -> None:
self.project_name = project_name
self.bug_id = bug_id
self.version_id = version_id
super().__init__(
benchmark,
f"{project_name}-{bug_id}",
ground_truth,
failing_tests,
ground_truth_inverted=False,
)

def checkout(self, path: str, fixed: bool = False) -> bool:
project_name, bug_id = path.rsplit("-", 1)

# Remove the directory if it exists (inside the container)
subprocess.run(
f"docker exec bugsinpy-container rm -rf /bugsinpy/framework/bin/temp/{project_name}",
shell=True,
capture_output=True,
check=False, # Don't fail if directory doesn't exist
)

# Checkout the bug
checkout_run = subprocess.run(
f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-checkout -p {project_name} -v {fixed} -i {bug_id}", # 1 fixed, 0 buggy
shell=True,
capture_output=True,
check=True,
)

# Convert line endings to unix
dos2unix_run = subprocess.run(
f"docker exec bugsinpy-container find /bugsinpy/framework/bin/temp/{project_name} -type f -name '*.py' -print0 | xargs -0 -n 1 -P 4 dos2unix",
shell=True,
capture_output=True,
check=False, # Don't fail if dos2unix has issues
)

return checkout_run.returncode == 0

def compile(self, path: str) -> CompileResult:
project_name, bug_id = path.rsplit("-", 1)
run = subprocess.run(
f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-compile -w /bugsinpy/framework/bin/temp/{project_name}",
shell=True,
capture_output=True,
check=True,
)

return CompileResult(run.returncode == 0)

def test(self, path: str) -> TestResult:
project_name, bug_id = path.rsplit("-", 1)

run = subprocess.run(
f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-test -w /bugsinpy/framework/bin/temp/{project_name}",
shell=True,
capture_output=True,
check=False,
)

# Decode the output and extract the last line
stdout_lines = run.stdout.decode("utf-8").strip().splitlines()
last_line = stdout_lines[-1] if stdout_lines else ""

success = False
# Check for various success indicators in pytest output
if "OK" in last_line or "passed" in last_line or "PASSED" in last_line:
success = True

return TestResult(success)

def get_src_test_dir(self, path: str) -> str:
project_name, bug_id = path.rsplit("-", 1)
path = f"/bugsinpy/framework/bin/temp/{project_name}/test"

return path

def get_failing_tests(self) -> dict[str, str]:
"""
Gets the failing test cases and their error messages for this bug.
For BugsInPy, this requires running the tests to get the actual failure information.
"""
if not hasattr(self, "_failing_tests") or self._failing_tests is None:
self._failing_tests = self._extract_failing_tests()
return self._failing_tests

def _extract_failing_tests(self) -> dict[str, str]:
"""
Extracts failing test cases by running the tests for the buggy version.
"""
try:
# Checkout buggy version
self.checkout(self.get_identifier(), fixed=False)

# Run tests to get failure information
run = subprocess.run(
f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-test -w /bugsinpy/framework/bin/temp/{self.project_name}",
shell=True,
capture_output=True,
check=False,
)

# Parse the test output to extract failing tests
stdout = run.stdout.decode("utf-8")
stderr = run.stderr.decode("utf-8")

failing_tests = {}

# Look for pytest-style failures
import re

# Pattern to match pytest failure format
failure_pattern = r"FAILED\s+([^\s]+)::([^\s]+)\s+-\s+(.*?)(?=\n\s*FAILED|\n\s*ERROR|\n\s*===|\Z)"
matches = re.findall(failure_pattern, stdout + stderr, re.DOTALL)

for test_file, test_method, error_msg in matches:
test_name = f"{test_file}::{test_method}"
failing_tests[test_name] = error_msg.strip()

# If no pytest failures found, try to extract from stderr
if not failing_tests and stderr:
# Look for assertion errors or other test failures
assertion_pattern = r"AssertionError:\s*(.*?)(?=\n|\Z)"
assertion_matches = re.findall(assertion_pattern, stderr)
if assertion_matches:
failing_tests["test_assertion"] = assertion_matches[0]

return failing_tests

except Exception as e:
print(f"Failed to extract failing tests for {self.get_identifier()}: {e}")
return {}

def checkout_fixed(self, path: str, fixed: bool = False) -> bool:
"""
Fixed version of checkout that properly handles the version parameter.
"""
project_name, bug_id = path.rsplit("-", 1)

# Remove the directory if it exists (inside the container)
subprocess.run(
f"docker exec bugsinpy-container rm -rf /bugsinpy/framework/bin/temp/{project_name}",
shell=True,
capture_output=True,
check=False, # Don't fail if directory doesn't exist
)

# Checkout the bug with correct version parameter
version = "1" if fixed else "0" # 1 fixed, 0 buggy
checkout_run = subprocess.run(
f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-checkout -p {project_name} -v {version} -i {bug_id}",
shell=True,
capture_output=True,
check=True,
)

# Convert line endings to unix
dos2unix_run = subprocess.run(
f"docker exec bugsinpy-container find /bugsinpy/framework/bin/temp/{project_name} -type f -name '*.py' -print0 | xargs -0 -n 1 -P 4 dos2unix",
shell=True,
capture_output=True,
check=False, # Don't fail if dos2unix has issues
)

return checkout_run.returncode == 0
Empty file.
Loading
Loading