ASSERT-KTH · t-sorger · Nov 24, 2024 · Nov 27, 2024 · Nov 27, 2024 · Dec 7, 2024
diff --git a/.gitmodules b/.gitmodules
@@ -13,3 +13,6 @@
 [submodule "cache"]
 	path = cache
 	url = https://github.com/ASSERT-KTH/elle-elle-aime-cache.git
+[submodule "benchmarks/BugsInPy"]
+	path = benchmarks/BugsInPy
+	url = https://github.com/ASSERT-KTH/BugsInPy
diff --git a/benchmarks/BugsInPy b/benchmarks/BugsInPy
diff --git a/benchmarks/gitbug-java b/benchmarks/gitbug-java
diff --git a/cache b/cache
diff --git a/elleelleaime/core/benchmarks/BugsInPy/BugsInPy.py b/elleelleaime/core/benchmarks/BugsInPy/BugsInPy.py
@@ -0,0 +1,162 @@
+from pathlib import Path
+from typing import Optional
+from io import StringIO
+from elleelleaime.core.benchmarks.benchmark import Benchmark
+from elleelleaime.core.benchmarks.BugsInPy.BugsInPybug import BugsInPyBug
+import subprocess
+import logging
+import re
+import pandas as pd
+
+
+class BugsInPy(Benchmark):
+    """
+    The class for representing the BugsInPy benchmark.
+    """
+
+    def __init__(self, path: Path = Path("benchmarks/BugsInPy").absolute()) -> None:
+        super().__init__("BugsInPy", path)
+
+    def get_bin(self, options: str = "") -> Optional[str]:
+        return f'{Path(self.path, "framework/bin/")}'
+
+    def initialize(self) -> None:
+        """
+        Initializes the BugsInPy benchmark object by collecting the list of all projects and bugs.
+        """
+        logging.info("Initializing BugsInPy benchmark...")
+
+        # Get all project names
+        run = subprocess.run(
+            f"docker exec bugsinpy-container ls /bugsinpy/projects",
+            shell=True,
+            capture_output=True,
+            check=True,
+        )
+        project_names = {
+            project_name.decode("utf-8") for project_name in run.stdout.split()
+        }
+        logging.info("Found %3d projects" % len(project_names))
+
+        # Get all bug names for all project_name
+        bugs = {}
+        # for project_name in tqdm.tqdm(project_names):
+        for project_name in project_names:
+            run = subprocess.run(
+                f"docker exec bugsinpy-container ls /bugsinpy/projects/{project_name}/bugs",
+                shell=True,
+                capture_output=True,
+                check=True,
+            )
+            # bugs[project_name] = {
+            #     int(bug_id.decode("utf-8")) for bug_id in run.stdout.split()
+            # }
+
+            bugs[project_name] = set()
+            for bug_id in run.stdout.split():
+                try:
+                    bug_id_str = bug_id.decode("utf-8").strip()
+                    # Skip invalid bug IDs (files with extensions, special characters, etc.)
+                    if (
+                        not bug_id_str.isdigit()
+                        or "." in bug_id_str
+                        or "~" in bug_id_str
+                        or "$" in bug_id_str
+                    ):
+                        logging.warning(f"Skipping invalid bug ID: {bug_id_str}")
+                        continue
+                    bug_id_int = int(bug_id_str)
+                    bugs[project_name].add(bug_id_int)
+                except ValueError:
+                    logging.warning(
+                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
+                    )
-                except ValueError:
-                    logging.warning(
-                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
-                    )
+                except ValueError as e:
+                    logging.warning(
+                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}. Error: {str(e)}"
+                    )
-                except ValueError:
-                    logging.warning(
-                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}"
-                    )
+                except ValueError as e:
+                    logging.warning(
+                        f"Skipping invalid bug ID: {bug_id.decode('utf-8')}. Error: {str(e)}"
+                    )
+
+            logging.info(
+                "Found %3d bugs for project %s"
+                % (len(bugs[project_name]), project_name)
+            )
+
+        # Initialize dataset
+        for project_name in project_names:
+            # Create a DataFrame to store the failing test cases and trigger causes
+            df = pd.DataFrame(columns=["bid", "tests", "errors"])
+
+            for bug_id in bugs[project_name]:
+                # Extract ground truth diff
+                diff_path = (
+                    f"/bugsinpy/projects/{project_name}/bugs/{bug_id}/bug_patch.txt"
+                )
+                try:
+                    run = subprocess.run(
+                        f"docker exec bugsinpy-container cat {diff_path}",
+                        shell=True,
+                        capture_output=True,
+                        check=True,
+                    )
+                    diff = run.stdout.decode("utf-8")
+
+                    # Skip bugs with empty ground truth
+                    if not diff.strip():
+                        logging.warning(
+                            f"Empty ground truth for {project_name}-{bug_id}, skipping..."
+                        )
+                        continue
+
+                except subprocess.CalledProcessError:
+                    logging.warning(
+                        f"Could not read bug_patch.txt for {project_name}-{bug_id}, skipping..."
+                    )
+                    continue
+
+                # Extract failing test cases and trigger causes
+                # failing_test_cases = df[df["bug_id"] == bug_id]["tests"].values[0]
+                # trigger_cause = df[df["bug_id"] == bug_id]["errors"].values[0]
+
+                # Moved into BugsInPybug.py
+                # # Checkout the bug
+                # checkout_run = subprocess.run(
+                #     f"docker exec -it bugsinpy-container {self.benchmark.get_bin()}bugsinpy-checkout -p {self.project_name} -v {self.version_id} -i {self.bug_id}",
+                #     shell=True,
+                #     capture_output=True,
+                #     check=True,
+                # )
+
+                # # Compile and test the bug
+                # path = f"{self.benchmark.get_bin()}/temp/{project_name}"
+                # checkout_compile = subprocess.run(
+                #     f"docker exec -it bugsinpy-container {self.benchmark.get_bin()}bugsinpy-compile -w {path}",
+                #     shell=True,
+                #     capture_output=True,
+                #     check=True,
+                # )
+
+                # checkout_compile = subprocess.run(
+                #     f"docker exec -it bugsinpy-container {self.benchmark.get_bin()}bugsinpy-test -w {path}",
+                #     shell=True,
+                #     capture_output=True,
+                #     check=True,
+                # )
+
+                # # Check with default path
+                # fail_path = f"{self.benchmark.get_bin()}/temp/{project_name}/bugsinpy_fail.txt"
+                # with open(fail_path, "r", encoding="ISO-8859-1") as fail_file:
+                #     failing_tests_content = fail_file.read()
+
+                # # Use a regular expression to extract the test name and its context
+                # pattern = r"FAIL: ([\w_.]+ \([\w_.]+\))"
+                # matches = re.findall(pattern, failing_tests_content)
+
+                # # Store the results in a dictionary if needed
+                # failing_tests = {"failing_tests": matches}
+
+                self.add_bug(
+                    BugsInPyBug(
+                        self,
+                        project_name=project_name,
+                        bug_id=bug_id,
+                        version_id="0",  # 0 buggy -- is this always the case?
+                        ground_truth=diff,
+                        failing_tests={},  # needs to be checked out for this?
+                    )
+                )
diff --git a/elleelleaime/core/benchmarks/BugsInPy/BugsInPybug.py b/elleelleaime/core/benchmarks/BugsInPy/BugsInPybug.py
@@ -0,0 +1,191 @@
+import subprocess
+import shutil
+import re
+import os
+
+from elleelleaime.core.benchmarks.benchmark import Benchmark
+from elleelleaime.core.benchmarks.bug import RichBug
+from elleelleaime.core.benchmarks.test_result import TestResult
+from elleelleaime.core.benchmarks.compile_result import CompileResult
+
+
+class BugsInPyBug(RichBug):
+    """
+    The class for representing BugsInPy bugs
+    """
+
+    def __init__(
+        self,
+        benchmark: Benchmark,
+        project_name: str,
+        bug_id: str,
+        version_id: str,  # 1 fixed, 0 buggy
+        ground_truth: str,
+        failing_tests: dict[str, str],
+    ) -> None:
+        self.project_name = project_name
+        self.bug_id = bug_id
+        self.version_id = version_id
+        super().__init__(
+            benchmark,
+            f"{project_name}-{bug_id}",
+            ground_truth,
+            failing_tests,
+            ground_truth_inverted=False,
+        )
+
+    def checkout(self, path: str, fixed: bool = False) -> bool:
+        project_name, bug_id = path.rsplit("-", 1)
+
+        # Remove the directory if it exists (inside the container)
+        subprocess.run(
+            f"docker exec bugsinpy-container rm -rf /bugsinpy/framework/bin/temp/{project_name}",
+            shell=True,
+            capture_output=True,
+            check=False,  # Don't fail if directory doesn't exist
+        )
+
+        # Checkout the bug
+        checkout_run = subprocess.run(
+            f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-checkout -p {project_name} -v {fixed} -i {bug_id}",  # 1 fixed, 0 buggy
+            shell=True,
+            capture_output=True,
+            check=True,
+        )
+
+        # Convert line endings to unix
+        dos2unix_run = subprocess.run(
+            f"docker exec bugsinpy-container find /bugsinpy/framework/bin/temp/{project_name} -type f -name '*.py' -print0 | xargs -0 -n 1 -P 4 dos2unix",
+            shell=True,
+            capture_output=True,
+            check=False,  # Don't fail if dos2unix has issues
+        )
+
+        return checkout_run.returncode == 0
+
+    def compile(self, path: str) -> CompileResult:
+        project_name, bug_id = path.rsplit("-", 1)
+        run = subprocess.run(
+            f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-compile -w /bugsinpy/framework/bin/temp/{project_name}",
+            shell=True,
+            capture_output=True,
+            check=True,
+        )
+
+        return CompileResult(run.returncode == 0)
+
+    def test(self, path: str) -> TestResult:
+        project_name, bug_id = path.rsplit("-", 1)
+
+        run = subprocess.run(
+            f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-test -w /bugsinpy/framework/bin/temp/{project_name}",
+            shell=True,
+            capture_output=True,
+            check=False,
+        )
+
+        # Decode the output and extract the last line
+        stdout_lines = run.stdout.decode("utf-8").strip().splitlines()
+        last_line = stdout_lines[-1] if stdout_lines else ""
+
+        success = False
+        # Check for various success indicators in pytest output
+        if "OK" in last_line or "passed" in last_line or "PASSED" in last_line:
+            success = True
+
+        return TestResult(success)
+
+    def get_src_test_dir(self, path: str) -> str:
+        project_name, bug_id = path.rsplit("-", 1)
+        path = f"/bugsinpy/framework/bin/temp/{project_name}/test"
+
+        return path
+
+    def get_failing_tests(self) -> dict[str, str]:
+        """
+        Gets the failing test cases and their error messages for this bug.
+        For BugsInPy, this requires running the tests to get the actual failure information.
+        """
+        if not hasattr(self, "_failing_tests") or self._failing_tests is None:
+            self._failing_tests = self._extract_failing_tests()
+        return self._failing_tests
+
+    def _extract_failing_tests(self) -> dict[str, str]:
+        """
+        Extracts failing test cases by running the tests for the buggy version.
+        """
+        try:
+            # Checkout buggy version
+            self.checkout(self.get_identifier(), fixed=False)
+
+            # Run tests to get failure information
+            run = subprocess.run(
+                f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-test -w /bugsinpy/framework/bin/temp/{self.project_name}",
+                shell=True,
+                capture_output=True,
+                check=False,
+            )
+
+            # Parse the test output to extract failing tests
+            stdout = run.stdout.decode("utf-8")
+            stderr = run.stderr.decode("utf-8")
+
+            failing_tests = {}
+
+            # Look for pytest-style failures
+            import re
+
+            # Pattern to match pytest failure format
+            failure_pattern = r"FAILED\s+([^\s]+)::([^\s]+)\s+-\s+(.*?)(?=\n\s*FAILED|\n\s*ERROR|\n\s*===|\Z)"
+            matches = re.findall(failure_pattern, stdout + stderr, re.DOTALL)
+
+            for test_file, test_method, error_msg in matches:
+                test_name = f"{test_file}::{test_method}"
+                failing_tests[test_name] = error_msg.strip()
+
+            # If no pytest failures found, try to extract from stderr
+            if not failing_tests and stderr:
+                # Look for assertion errors or other test failures
+                assertion_pattern = r"AssertionError:\s*(.*?)(?=\n|\Z)"
+                assertion_matches = re.findall(assertion_pattern, stderr)
+                if assertion_matches:
+                    failing_tests["test_assertion"] = assertion_matches[0]
+
+            return failing_tests
+
+        except Exception as e:
+            print(f"Failed to extract failing tests for {self.get_identifier()}: {e}")
+            return {}
+
+    def checkout_fixed(self, path: str, fixed: bool = False) -> bool:
+        """
+        Fixed version of checkout that properly handles the version parameter.
+        """
+        project_name, bug_id = path.rsplit("-", 1)
+
+        # Remove the directory if it exists (inside the container)
+        subprocess.run(
+            f"docker exec bugsinpy-container rm -rf /bugsinpy/framework/bin/temp/{project_name}",
+            shell=True,
+            capture_output=True,
+            check=False,  # Don't fail if directory doesn't exist
+        )
+
+        # Checkout the bug with correct version parameter
+        version = "1" if fixed else "0"  # 1 fixed, 0 buggy
+        checkout_run = subprocess.run(
+            f"docker exec bugsinpy-container /bugsinpy/framework/bin/bugsinpy-checkout -p {project_name} -v {version} -i {bug_id}",
+            shell=True,
+            capture_output=True,
+            check=True,
+        )
+
+        # Convert line endings to unix
+        dos2unix_run = subprocess.run(
+            f"docker exec bugsinpy-container find /bugsinpy/framework/bin/temp/{project_name} -type f -name '*.py' -print0 | xargs -0 -n 1 -P 4 dos2unix",
+            shell=True,
+            capture_output=True,
+            check=False,  # Don't fail if dos2unix has issues
+        )
+
+        return checkout_run.returncode == 0
diff --git a/elleelleaime/core/benchmarks/BugsInPy/__init__.py b/elleelleaime/core/benchmarks/BugsInPy/__init__.py
+27 −80		README.md
+1 −1		gitbug-java
+3 −13		gitbug/bug.py
+277 −501		poetry.lock
+3 −3		pyproject.toml