ASSERT-KTH · t-sorger · Nov 24, 2024 · Nov 27, 2024 · Nov 27, 2024 · Dec 7, 2024
diff --git a/.gitmodules b/.gitmodules
@@ -13,3 +13,6 @@
 [submodule "cache"]
 	path = cache
 	url = https://github.com/ASSERT-KTH/elle-elle-aime-cache.git
+[submodule "benchmarks/BugsInPy"]
+	path = benchmarks/BugsInPy
+	url = https://github.com/ASSERT-KTH/BugsInPy
diff --git a/benchmarks/BugsInPy b/benchmarks/BugsInPy
diff --git a/benchmarks/gitbug-java b/benchmarks/gitbug-java
diff --git a/cache b/cache
diff --git a/elleelleaime/core/benchmarks/BugsInPy/BugsInPy.py b/elleelleaime/core/benchmarks/BugsInPy/BugsInPy.py
@@ -0,0 +1,136 @@
+from pathlib import Path
+from typing import Optional
+from io import StringIO
+from elleelleaime.core.benchmarks.benchmark import Benchmark
+from elleelleaime.core.benchmarks.BugsInPy.BugsInPybug import BugsInPyBug
+
+import subprocess
+import logging
+
+# import tqdm
+import re
+
+# import os
+import pandas as pd
+
+
+class BugsInPy(Benchmark):
+    """
+    The class for representing the BugsInPy benchmark.
+    """
+
+    def __init__(self, path: Path = Path("benchmarks/BugsInPy").absolute()) -> None:
+        super().__init__("BugsInPy", path)
+
+    def get_bin(self, options: str = "") -> Optional[str]:
+        return f'{Path(self.path, "framework/bin/")}'
+
+    def initialize(self) -> None:
+        """
+        Initializes the BugsInPy benchmark object by collecting the list of all projects and bugs.
+        """
+        logging.info("Initializing BugsInPy benchmark...")
+
+        # Get all project names
+        run = subprocess.run(
+            f"ls {self.path}/projects",
+            shell=True,
+            capture_output=True,
+            check=True,
+        )
+        project_names = {
+            project_name.decode("utf-8") for project_name in run.stdout.split()
+        }
+        logging.info("Found %3d projects" % len(project_names))
+
+        # Get all bug names for all project_name
+        bugs = {}
+        # for project_name in tqdm.tqdm(project_names):
+        for project_name in project_names:
+            run = subprocess.run(
+                f"ls {self.path}/projects/{project_name}/bugs",
+                shell=True,
+                capture_output=True,
+                check=True,
+            )
+            # bugs[project_name] = {
+            #     int(bug_id.decode("utf-8")) for bug_id in run.stdout.split()
+            # }
+
+            bugs[project_name] = set()
+            for bug_id in run.stdout.split():
+                try:
+                    bug_id_int = int(bug_id.decode("utf-8"))
+                    bugs[project_name].add(bug_id_int)
+                except ValueError:
+                    logging.warning(
+                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
+                    )
-                except ValueError:
-                    logging.warning(
-                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
-                    )
+                except ValueError as e:
+                    logging.warning(
+                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}. Error: {str(e)}"
+                    )
-                except ValueError:
-                    logging.warning(
-                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
-                    )
+                except ValueError as e:
+                    logging.warning(
+                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}. Error: {str(e)}"
+                    )
+
+            logging.info(
+                "Found %3d bugs for project %s"
+                % (len(bugs[project_name]), project_name)
+            )
+
+        # Initialize dataset
+        for project_name in project_names:
+            # Create a DataFrame to store the failing test cases and trigger causes
+            df = pd.DataFrame(columns=["bid", "tests", "errors"])
+
+            for bug_id in bugs[project_name]:
+                # Extract ground truth diff
+                diff_path = f"benchmarks/BugsInPy/projects/{project_name}/bugs/{bug_id}/bug_patch.txt"
+                with open(diff_path, "r", encoding="ISO-8859-1") as diff_file:
+                    diff = diff_file.read()
+
+                # Extract failing test cases and trigger causes
+                # failing_test_cases = df[df["bug_id"] == bug_id]["tests"].values[0]
+                # trigger_cause = df[df["bug_id"] == bug_id]["errors"].values[0]
+
+                # Moved into BugsInPybug.py
+                # # Checkout the bug
+                # checkout_run = subprocess.run(
+                #     f"{self.benchmark.get_bin()}bugsinpy-checkout -p {self.project_name} -v {self.version_id} -i {self.bug_id}",
+                #     shell=True,
+                #     capture_output=True,
+                #     check=True,
+                # )
+
+                # # Compile and test the bug
+                # path = f"{self.benchmark.get_bin()}/temp/{project_name}"
+                # checkout_compile = subprocess.run(
+                #     f"{self.benchmark.get_bin()}bugsinpy-compile -w {path}",
+                #     shell=True,
+                #     capture_output=True,
+                #     check=True,
+                # )
+
+                # checkout_compile = subprocess.run(
+                #     f"{self.benchmark.get_bin()}bugsinpy-test -w {path}",
+                #     shell=True,
+                #     capture_output=True,
+                #     check=True,
+                # )
+
+                # # Check with default path
+                # fail_path = f"{self.benchmark.get_bin()}/temp/{project_name}/bugsinpy_fail.txt"
+                # with open(fail_path, "r", encoding="ISO-8859-1") as fail_file:
+                #     failing_tests_content = fail_file.read()
+
+                # # Use a regular expression to extract the test name and its context
+                # pattern = r"FAIL: ([\w_.]+ \([\w_.]+\))"
+                # matches = re.findall(pattern, failing_tests_content)
+
+                # # Store the results in a dictionary if needed
+                # failing_tests = {"failing_tests": matches}
+
+                self.add_bug(
+                    BugsInPyBug(
+                        self,
+                        project_name=project_name,
+                        bug_id=bug_id,
+                        version_id=0,  # 0 buggy -- is this always the case?
+                        ground_truth=diff,
+                        failing_tests=None,  # needs to be checked out for this?
+                    )
+                )
diff --git a/elleelleaime/core/benchmarks/BugsInPy/BugsInPybug.py b/elleelleaime/core/benchmarks/BugsInPy/BugsInPybug.py
@@ -0,0 +1,100 @@
+import subprocess
+import shutil
+import re
+import os
+
+from elleelleaime.core.benchmarks.benchmark import Benchmark
+
+# TODO: Implement as `RichBug` later on
+from elleelleaime.core.benchmarks.bug import RichBug
+from elleelleaime.core.benchmarks.test_result import TestResult
+from elleelleaime.core.benchmarks.compile_result import CompileResult
+
+
+class BugsInPyBug(RichBug):
+    """
+    The class for representing BugsInPy bugs
+    """
+
+    def __init__(
+        self,
+        benchmark: Benchmark,
+        project_name: str,
+        bug_id: str,
+        version_id: str,  # 1 fixed, 0 buggy
+        ground_truth: str,
+        failing_tests: dict[str, str],
+    ) -> None:
+        self.project_name = project_name
+        self.bug_id = bug_id
+        self.version_id = version_id
+        super().__init__(
+            benchmark,
+            f"{project_name}-{bug_id}",
+            ground_truth,
+            failing_tests,
+            # ground_truth_inverted=True, # TODO: TypeError: Bug.__init__() got multiple values for argument 'ground_truth_inverted'
+        )
+
+    def checkout(self, path: str, fixed: bool = False) -> bool:
+        project_name, bug_id = path.rsplit("-", 1)
+
+        # Remove the directory if it exists
+        shutil.rmtree(path, ignore_errors=True)
+
+        # Checkout the bug
+        checkout_run = subprocess.run(
+            f"{self.benchmark.get_bin()}/bugsinpy-checkout -p {project_name} -v {fixed} -i {bug_id}",  # 1 fixed, 0 buggy
+            # f"{self.benchmark.get_bin()}/bugsinpy-checkout -p {self.project_name} -v {self.version_id} -i {self.bug_id}",
+            shell=True,
+            capture_output=True,
+            check=True,
+        )
+
+        # Convert line endings to unix
+        dos2unix_run = subprocess.run(
+            f"find {path} -type f -print0 | xargs -0 -n 1 -P 4 dos2unix",
+            shell=True,
+            capture_output=True,
+            check=True,
+        )
+
+        return checkout_run.returncode == 0 and dos2unix_run.returncode == 0
+
+    def compile(self, path: str) -> CompileResult:
+        project_name, bug_id = path.rsplit("-", 1)
+        run = subprocess.run(
+            f"{self.benchmark.get_bin()}/bugsinpy-compile -w {self.benchmark.get_bin()}/temp/{project_name}",
+            shell=True,
+            capture_output=True,
+            check=True,
+        )
+
+        return CompileResult(run.returncode == 0)
+
+    def test(self, path: str) -> TestResult:
+        project_name, bug_id = path.rsplit("-", 1)
+
+        run = subprocess.run(
+            f"{self.benchmark.get_bin()}/bugsinpy-test -w {self.benchmark.get_bin()}/temp/{project_name}",
+            shell=True,
+            capture_output=True,
+            check=False,
+        )
+
+        # Decode the output and extract the last line
+        stdout_lines = run.stdout.decode("utf-8").strip().splitlines()
+        last_line = stdout_lines[-1] if stdout_lines else ""
+
+        if "OK" in last_line:
+            success = True
+        elif "FAILED" in last_line:
+            success = False
+
+        return TestResult(success)
+
+    def get_src_test_dir(self, path: str) -> str:
+        project_name, bug_id = path.rsplit("-", 1)
+        path = f"{self.benchmark.get_bin()}/temp/{project_name}/test"
+
+        return path
diff --git a/elleelleaime/core/benchmarks/BugsInPy/__init__.py b/elleelleaime/core/benchmarks/BugsInPy/__init__.py
diff --git a/elleelleaime/core/utils/benchmarks.py b/elleelleaime/core/utils/benchmarks.py
@@ -3,6 +3,7 @@
 from elleelleaime.core.benchmarks.humanevaljava.humanevaljava import HumanEvalJava
 from elleelleaime.core.benchmarks.quixbugs.quixbugs import QuixBugs
 from elleelleaime.core.benchmarks.gitbugjava.gitbugjava import GitBugJava
+from elleelleaime.core.benchmarks.BugsInPy.BugsInPy import BugsInPy
 
 from typing import Optional
 
@@ -11,6 +12,7 @@
     "HumanEvalJava": HumanEvalJava,
     "QuixBugs": QuixBugs,
     "GitBugJava": GitBugJava,
+    "BugsInPy": BugsInPy,
 }
 
 

diff --git a/elleelleaime/core/utils/python/python.py b/elleelleaime/core/utils/python/python.py
@@ -0,0 +1,75 @@
+from typing import Optional, Tuple, List
+from unidiff import PatchSet
+from uuid import uuid4
+import uuid
+from pathlib import Path
+import logging
+import getpass, tempfile, difflib, shutil
+import subprocess
+import re
+import ast
+
+from elleelleaime.core.benchmarks.bug import Bug, RichBug
+
+
+def extract_functions(source_code):
+    # Parse the source code into an AST
+    tree = ast.parse(source_code)
+
+    # Extract all function definitions
+    functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]
+
+    # Convert the function nodes back to source code
+    function_sources = [ast.get_source_segment(source_code, func) for func in functions]
+
+    return function_sources
+
+
+def extract_single_function(bug: Bug) -> Optional[Tuple[str, str]]:
+    """
+    Extracts the buggy and fixed code of single-function bugs.
+    Returns None is bug is not single-function
+
+    Args:
+        bug (Bug): The bug to extract the code from
+
+    Returns:
+        Optional[Tuple[str, str]]: None if the bug is not single-function, otherwise a tuple of the form (buggy_code, fixed_code)
+    """
+    project_name, _ = bug.get_identifier().rsplit("-", 1)
+    path = f"./benchmarks/BugsInPy/projects/{project_name}"
-    path = f"./benchmarks/BugsInPy/projects/{project_name}"
+    path = Path("./benchmarks/BugsInPy/projects", project_name)
-    path = f"./benchmarks/BugsInPy/projects/{project_name}"
+    path = Path("./benchmarks/BugsInPy/projects", project_name)
+
+    print(f"{path=}")
+
+    try:
+        # Checkout the buggy version of the bug
+        bug.checkout(bug.get_identifier(), fixed=0)
+        bug.compile(bug.get_identifier())
+        # Test fixed version
+        # test_result = bug.test(bug.get_identifier())
+
+
+        path_bin = f"./benchmarks/BugsInPy/framework/bin/temp/{project_name}"
+        with open(Path(path_bin, "test", f"test_aes.py")) as f:
+            buggy_code = f.read()
+
+        buggy_functions = extract_functions(buggy_code)
+
+        # Checkout the fixed version of the bug
+        bug.checkout(bug.get_identifier(), fixed=1)
+        bug.compile(bug.get_identifier())
+
+        with open(Path(path_bin, "test", f"test_aes.py")) as f:
+            fixed_code = f.read()
+
+        buggy_functions = extract_functions(buggy_code)
+        fixed_functions = extract_functions(fixed_code)
+
+        assert len(buggy_functions) == len(fixed_functions)
+
+        return buggy_code, fixed_code
+
+    finally:
+        # Remove the checked-out bugs
+        # shutil.rmtree(path_bin, ignore_errors=True)
+        pass
diff --git a/elleelleaime/sample/registry.py b/elleelleaime/sample/registry.py
@@ -1,6 +1,7 @@
 from .strategy import PromptingStrategy
 from .strategies.infilling import InfillingPrompting
 from .strategies.instruct import InstructPrompting
+from .strategies.instruct_python import InstructPromptingPython
 
 
 class PromptStrategyRegistry:
@@ -11,6 +12,7 @@ class PromptStrategyRegistry:
     __STRATEGIES: dict[str, type] = {
         "infilling": InfillingPrompting,
         "instruct": InstructPrompting,
+        "instruct_python": InstructPromptingPython,
     }
 
     @classmethod
+27 −80		README.md
+1 −1		gitbug-java
+3 −13		gitbug/bug.py
+277 −501		poetry.lock
+3 −3		pyproject.toml