Skip to content
Open
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
a09695d
add BugsInPy submodule
t-sorger Nov 24, 2024
c9384d5
add initial BugsInPybug.py
t-sorger Nov 27, 2024
ce48490
add initial BugsInPy.py to benchmark
t-sorger Nov 27, 2024
865975b
add BugsInPy to core utils
t-sorger Dec 7, 2024
e8976c5
add initial tests for BugsInPy; fix typo
t-sorger Dec 7, 2024
9a3325d
add BugsInPy submodule
t-sorger Nov 24, 2024
96d79c5
add initial BugsInPybug.py
t-sorger Nov 27, 2024
83b35cd
add initial BugsInPy.py to benchmark
t-sorger Nov 27, 2024
0cf0179
add BugsInPy to core utils
t-sorger Dec 7, 2024
e09839c
add initial tests for BugsInPy; fix typo
t-sorger Dec 7, 2024
f335bdf
add test implementation for BugsInPybug
t-sorger Jan 14, 2025
2bc479a
fix bin path issues
t-sorger Jan 14, 2025
bd08ec1
lint code
t-sorger Jan 14, 2025
11600a3
rework tests for BugsInPy
t-sorger Jan 14, 2025
1cc7bc6
update submodules
t-sorger Jan 14, 2025
0d28f9d
Merge branch 'BugsInPy' of github.com:ASSERT-KTH/repairbench-framewor…
t-sorger Jan 14, 2025
d3de871
add BugsInPy submodule
t-sorger Nov 24, 2024
56f4502
add initial BugsInPybug.py
t-sorger Nov 27, 2024
8274a8d
add initial BugsInPy.py to benchmark
t-sorger Nov 27, 2024
63f5834
add BugsInPy to core utils
t-sorger Dec 7, 2024
8e761a6
add initial tests for BugsInPy; fix typo
t-sorger Dec 7, 2024
41821d4
add test implementation for BugsInPybug
t-sorger Jan 14, 2025
28e4c9a
fix bin path issues
t-sorger Jan 14, 2025
21420fd
lint code
t-sorger Jan 14, 2025
5962796
rework tests for BugsInPy
t-sorger Jan 14, 2025
ea287fa
update submodules
t-sorger Jan 14, 2025
17c438d
Merge branch 'BugsInPy' of github.com:ASSERT-KTH/repairbench-framewor…
t-sorger Jan 22, 2025
7177e86
adds RichBug and fixes process calls
t-sorger Jan 26, 2025
7a195e0
add checks and fix path issues
t-sorger Jan 26, 2025
1c2f662
fix code and first tests
t-sorger Jan 26, 2025
1845b6d
fix error in tests
t-sorger Jan 26, 2025
f0cfa76
lint code
t-sorger Jan 26, 2025
1c1ea5e
start adding instruct test and new python utils
t-sorger Feb 4, 2025
1e0ffd0
update python.py
t-sorger Feb 9, 2025
edd053f
update Python utils and comment other test cases
t-sorger Feb 25, 2025
c74c397
add InfillingPromptingPython
t-sorger Feb 25, 2025
b679250
update utils for Python
t-sorger Feb 25, 2025
994e21e
add test infilling for BugsInPy codellama
t-sorger Feb 25, 2025
4d3561c
lint files
t-sorger Feb 25, 2025
c583a39
uncomment other infilling tests
t-sorger Feb 25, 2025
779340a
add initial files for language_utils
t-sorger Feb 27, 2025
76272cf
add get_language_utils method
t-sorger Feb 27, 2025
b1e684f
add usage of LanguageUtils for infilling
t-sorger Feb 27, 2025
b72565c
add first docker adoptations
t-sorger Jun 27, 2025
5507ee7
update BugsInPy for Docker
t-sorger Jun 27, 2025
029538a
lint files
t-sorger Jun 27, 2025
04a0fc0
update steup
t-sorger Jun 27, 2025
b629e73
add sample/instruct test for BugsInPy
t-sorger Sep 21, 2025
70e7251
add sample/infilling test for BugsInPy
t-sorger Sep 21, 2025
6dd1290
add evaluation tests for BugsInPy
t-sorger Sep 21, 2025
7c21a6d
add missing tests for RichBug implementation of BugsInPy
t-sorger Sep 22, 2025
4963e5b
remove prints
t-sorger Sep 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@
[submodule "cache"]
path = cache
url = https://github.com/ASSERT-KTH/elle-elle-aime-cache.git
[submodule "benchmarks/BugsInPy"]
path = benchmarks/BugsInPy
url = https://github.com/ASSERT-KTH/BugsInPy
1 change: 1 addition & 0 deletions benchmarks/BugsInPy
Submodule BugsInPy added at 38afff
2 changes: 1 addition & 1 deletion benchmarks/gitbug-java
Submodule gitbug-java updated 5 files
+27 −80 README.md
+1 −1 gitbug-java
+3 −13 gitbug/bug.py
+277 −501 poetry.lock
+3 −3 pyproject.toml
2 changes: 1 addition & 1 deletion cache
Submodule cache updated from 06cd07 to 0d3f97
136 changes: 136 additions & 0 deletions elleelleaime/core/benchmarks/BugsInPy/BugsInPy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from pathlib import Path
from typing import Optional
from io import StringIO
from elleelleaime.core.benchmarks.benchmark import Benchmark
from elleelleaime.core.benchmarks.BugsInPy.BugsInPybug import BugsInPyBug

import subprocess
import logging

# import tqdm
import re

# import os
import pandas as pd


class BugsInPy(Benchmark):
"""
The class for representing the BugsInPy benchmark.
"""

def __init__(self, path: Path = Path("benchmarks/BugsInPy").absolute()) -> None:
super().__init__("BugsInPy", path)

def get_bin(self, options: str = "") -> Optional[str]:
return f'{Path(self.path, "framework/bin/")}'

def initialize(self) -> None:
"""
Initializes the BugsInPy benchmark object by collecting the list of all projects and bugs.
"""
logging.info("Initializing BugsInPy benchmark...")

# Get all project names
run = subprocess.run(
f"ls {self.path}/projects",
shell=True,
capture_output=True,
check=True,
)
project_names = {
project_name.decode("utf-8") for project_name in run.stdout.split()
}
logging.info("Found %3d projects" % len(project_names))

# Get all bug names for all project_name
bugs = {}
# for project_name in tqdm.tqdm(project_names):
for project_name in project_names:
run = subprocess.run(
f"ls {self.path}/projects/{project_name}/bugs",
shell=True,
capture_output=True,
check=True,
)
# bugs[project_name] = {
# int(bug_id.decode("utf-8")) for bug_id in run.stdout.split()
# }

bugs[project_name] = set()
for bug_id in run.stdout.split():
try:
bug_id_int = int(bug_id.decode("utf-8"))
bugs[project_name].add(bug_id_int)
except ValueError:
logging.warning(
f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
)
Comment on lines +70 to +73
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider logging the specific ValueError exception message for better debugging. This will help identify the cause of the invalid bug ID.

For example, you can log str(e) to capture the error message.

Suggested change
except ValueError:
logging.warning(
f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
)
except ValueError as e:
logging.warning(
f"Skipping invalid bug ID: {bug_id.decode('utf-8')}. Error: {str(e)}"
)


logging.info(
"Found %3d bugs for project %s"
% (len(bugs[project_name]), project_name)
)

# Initialize dataset
for project_name in project_names:
# Create a DataFrame to store the failing test cases and trigger causes
df = pd.DataFrame(columns=["bid", "tests", "errors"])

for bug_id in bugs[project_name]:
# Extract ground truth diff
diff_path = f"benchmarks/BugsInPy/projects/{project_name}/bugs/{bug_id}/bug_patch.txt"
with open(diff_path, "r", encoding="ISO-8859-1") as diff_file:
diff = diff_file.read()

# Extract failing test cases and trigger causes
# failing_test_cases = df[df["bug_id"] == bug_id]["tests"].values[0]
# trigger_cause = df[df["bug_id"] == bug_id]["errors"].values[0]

# Moved into BugsInPybug.py
# # Checkout the bug
# checkout_run = subprocess.run(
# f"{self.benchmark.get_bin()}bugsinpy-checkout -p {self.project_name} -v {self.version_id} -i {self.bug_id}",
# shell=True,
# capture_output=True,
# check=True,
# )

# # Compile and test the bug
# path = f"{self.benchmark.get_bin()}/temp/{project_name}"
# checkout_compile = subprocess.run(
# f"{self.benchmark.get_bin()}bugsinpy-compile -w {path}",
# shell=True,
# capture_output=True,
# check=True,
# )

# checkout_compile = subprocess.run(
# f"{self.benchmark.get_bin()}bugsinpy-test -w {path}",
# shell=True,
# capture_output=True,
# check=True,
# )

# # Check with default path
# fail_path = f"{self.benchmark.get_bin()}/temp/{project_name}/bugsinpy_fail.txt"
# with open(fail_path, "r", encoding="ISO-8859-1") as fail_file:
# failing_tests_content = fail_file.read()

# # Use a regular expression to extract the test name and its context
# pattern = r"FAIL: ([\w_.]+ \([\w_.]+\))"
# matches = re.findall(pattern, failing_tests_content)

# # Store the results in a dictionary if needed
# failing_tests = {"failing_tests": matches}

self.add_bug(
BugsInPyBug(
self,
project_name=project_name,
bug_id=bug_id,
version_id=0, # 0 buggy -- is this always the case?
ground_truth=diff,
failing_tests=None, # needs to be checked out for this?
)
)
100 changes: 100 additions & 0 deletions elleelleaime/core/benchmarks/BugsInPy/BugsInPybug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import subprocess
import shutil
import re
import os

from elleelleaime.core.benchmarks.benchmark import Benchmark

# TODO: Implement as `RichBug` later on
from elleelleaime.core.benchmarks.bug import RichBug
from elleelleaime.core.benchmarks.test_result import TestResult
from elleelleaime.core.benchmarks.compile_result import CompileResult


class BugsInPyBug(RichBug):
"""
The class for representing BugsInPy bugs
"""

def __init__(
self,
benchmark: Benchmark,
project_name: str,
bug_id: str,
version_id: str, # 1 fixed, 0 buggy
ground_truth: str,
failing_tests: dict[str, str],
) -> None:
self.project_name = project_name
self.bug_id = bug_id
self.version_id = version_id
super().__init__(
benchmark,
f"{project_name}-{bug_id}",
ground_truth,
failing_tests,
# ground_truth_inverted=True, # TODO: TypeError: Bug.__init__() got multiple values for argument 'ground_truth_inverted'
)

def checkout(self, path: str, fixed: bool = False) -> bool:
project_name, bug_id = path.rsplit("-", 1)

# Remove the directory if it exists
shutil.rmtree(path, ignore_errors=True)

# Checkout the bug
checkout_run = subprocess.run(
f"{self.benchmark.get_bin()}/bugsinpy-checkout -p {project_name} -v {fixed} -i {bug_id}", # 1 fixed, 0 buggy
# f"{self.benchmark.get_bin()}/bugsinpy-checkout -p {self.project_name} -v {self.version_id} -i {self.bug_id}",
shell=True,
capture_output=True,
check=True,
)

# Convert line endings to unix
dos2unix_run = subprocess.run(
f"find {path} -type f -print0 | xargs -0 -n 1 -P 4 dos2unix",
shell=True,
capture_output=True,
check=True,
)

return checkout_run.returncode == 0 and dos2unix_run.returncode == 0

def compile(self, path: str) -> CompileResult:
project_name, bug_id = path.rsplit("-", 1)
run = subprocess.run(
f"{self.benchmark.get_bin()}/bugsinpy-compile -w {self.benchmark.get_bin()}/temp/{project_name}",
shell=True,
capture_output=True,
check=True,
)

return CompileResult(run.returncode == 0)

def test(self, path: str) -> TestResult:
project_name, bug_id = path.rsplit("-", 1)

run = subprocess.run(
f"{self.benchmark.get_bin()}/bugsinpy-test -w {self.benchmark.get_bin()}/temp/{project_name}",
shell=True,
capture_output=True,
check=False,
)

# Decode the output and extract the last line
stdout_lines = run.stdout.decode("utf-8").strip().splitlines()
last_line = stdout_lines[-1] if stdout_lines else ""

if "OK" in last_line:
success = True
elif "FAILED" in last_line:
success = False

return TestResult(success)

def get_src_test_dir(self, path: str) -> str:
project_name, bug_id = path.rsplit("-", 1)
path = f"{self.benchmark.get_bin()}/temp/{project_name}/test"

return path
Empty file.
2 changes: 2 additions & 0 deletions elleelleaime/core/utils/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from elleelleaime.core.benchmarks.humanevaljava.humanevaljava import HumanEvalJava
from elleelleaime.core.benchmarks.quixbugs.quixbugs import QuixBugs
from elleelleaime.core.benchmarks.gitbugjava.gitbugjava import GitBugJava
from elleelleaime.core.benchmarks.BugsInPy.BugsInPy import BugsInPy

from typing import Optional

Expand All @@ -11,6 +12,7 @@
"HumanEvalJava": HumanEvalJava,
"QuixBugs": QuixBugs,
"GitBugJava": GitBugJava,
"BugsInPy": BugsInPy,
}


Expand Down
75 changes: 75 additions & 0 deletions elleelleaime/core/utils/python/python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from typing import Optional, Tuple, List
from unidiff import PatchSet
from uuid import uuid4
import uuid
from pathlib import Path
import logging
import getpass, tempfile, difflib, shutil
import subprocess
import re
import ast

from elleelleaime.core.benchmarks.bug import Bug, RichBug


def extract_functions(source_code):
# Parse the source code into an AST
tree = ast.parse(source_code)

# Extract all function definitions
functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]

# Convert the function nodes back to source code
function_sources = [ast.get_source_segment(source_code, func) for func in functions]

return function_sources


def extract_single_function(bug: Bug) -> Optional[Tuple[str, str]]:
"""
Extracts the buggy and fixed code of single-function bugs.
Returns None is bug is not single-function

Args:
bug (Bug): The bug to extract the code from

Returns:
Optional[Tuple[str, str]]: None if the bug is not single-function, otherwise a tuple of the form (buggy_code, fixed_code)
"""
project_name, _ = bug.get_identifier().rsplit("-", 1)
path = f"./benchmarks/BugsInPy/projects/{project_name}"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This path is hardcoded. Consider using os.path.join or pathlib to construct the path, making the code more portable and robust to directory structure changes.

Suggested change
path = f"./benchmarks/BugsInPy/projects/{project_name}"
path = Path("./benchmarks/BugsInPy/projects", project_name)


print(f"{path=}")

try:
# Checkout the buggy version of the bug
bug.checkout(bug.get_identifier(), fixed=0)
bug.compile(bug.get_identifier())
# Test fixed version
# test_result = bug.test(bug.get_identifier())


path_bin = f"./benchmarks/BugsInPy/framework/bin/temp/{project_name}"
with open(Path(path_bin, "test", f"test_aes.py")) as f:
buggy_code = f.read()

buggy_functions = extract_functions(buggy_code)

# Checkout the fixed version of the bug
bug.checkout(bug.get_identifier(), fixed=1)
bug.compile(bug.get_identifier())

with open(Path(path_bin, "test", f"test_aes.py")) as f:
fixed_code = f.read()

buggy_functions = extract_functions(buggy_code)
fixed_functions = extract_functions(fixed_code)

assert len(buggy_functions) == len(fixed_functions)

return buggy_code, fixed_code

finally:
# Remove the checked-out bugs
# shutil.rmtree(path_bin, ignore_errors=True)
pass
2 changes: 2 additions & 0 deletions elleelleaime/sample/registry.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .strategy import PromptingStrategy
from .strategies.infilling import InfillingPrompting
from .strategies.instruct import InstructPrompting
from .strategies.instruct_python import InstructPromptingPython


class PromptStrategyRegistry:
Expand All @@ -11,6 +12,7 @@ class PromptStrategyRegistry:
__STRATEGIES: dict[str, type] = {
"infilling": InfillingPrompting,
"instruct": InstructPrompting,
"instruct_python": InstructPromptingPython,
}

@classmethod
Expand Down
Loading