diff --git a/README.md b/README.md index ff9a2c9..5e3a4ad 100644 --- a/README.md +++ b/README.md @@ -176,11 +176,20 @@ python enterprise_cred_detections.py -o org_name #Ex: python enterprise_c python enterprise_cred_detections.py -r org_name/repo_name #Ex: python enterprise_cred_detections.py -r test_org/public_docker ``` +##### Command to Run Enterprise Credentials Scanner for a specific branch + +``` +# Run for a specific branch, +python enterprise_cred_detections.py -r org_name/repo_name -b branch_name #Ex: python enterprise_cred_detections.py -r test_org/public_docker -b develop +``` + +> **Note:** If the specified branch does not exist in the repo, the scanner will exit with an error. + ##### Command-Line Arguments for Credential Scanner ``` Run usage: -enterprise_cred_detections.py [-h] [-s Secondary Keywords] [-e Extensions] [-m Ml prediction] [-u Unmask Secret] [-o org_name] [-r repo_name] [-l Logger Level] [-c Console Logging] +enterprise_cred_detections.py [-h] [-s Secondary Keywords] [-e Extensions] [-m Ml prediction] [-u Unmask Secret] [-o org_name] [-r repo_name] [-b Branch] [-l Logger Level] [-c Console Logging] optional arguments: -h, --help show this help message and exit @@ -194,6 +203,8 @@ optional arguments: Pass the flag as Yes or No. Default is No -o pass org name, --org Pass the targeted org list as a comma-separated string -r pass repo name, --repo Pass the targeted repo list as a comma-separated string + -b Branch, --branch Branch + Pass the Branch name to scan. If the branch does not exist, the scanner will exit with an error -l Logger Level, --log_level Logger Level Pass the Logging level as for CRITICAL - 50, ERROR - 40 WARNING - 30 INFO - 20 DEBUG - 10. Default is 20 -c Console Logging, --console_logging Console Logging @@ -234,6 +245,15 @@ python enterprise_key_detections.py -o org_name #Ex: python enterprise_ke python enterprise_key_detections.py -r org_name/repo_name #Ex: python enterprise_key_detections.py -r test_org/public_docker ``` +##### Command to Run Enterprise Keys and Tokens Scanner for a specific branch + +``` +# Run for a specific branch, +python enterprise_key_detections.py -r org_name/repo_name -b branch_name #Ex: python enterprise_key_detections.py -r test_org/public_docker -b develop +``` + +> **Note:** If the specified branch does not exist in the repo, the scanner will exit with an error. + ##### Detections With ML Filter xGitGuard also has an additional ML filter where users can collect their organization/targeted data and train their model. Having this ML filter helps in reducing the false positives from the detection. @@ -260,7 +280,7 @@ python enterprise_key_detections.py -m Yes ``` Run usage: -enterprise_key_detections.py [-h] [-s Secondary Keywords] [-e Extensions] [-m Ml prediction] [-u Unmask Secret] [-o org_name] [-r repo_name] [-l Logger Level] [-c Console Logging] +enterprise_key_detections.py [-h] [-s Secondary Keywords] [-e Extensions] [-m Ml prediction] [-u Unmask Secret] [-o org_name] [-r repo_name] [-b Branch] [-l Logger Level] [-c Console Logging] optional arguments: -h, --help show this help message and exit @@ -274,6 +294,8 @@ optional arguments: Pass the flag as Yes or No. Default is No -o pass org name, --org Pass the targeted org list as a comma-separated string -r pass repo name, --repo Pass the targeted repo list as a comma-separated string + -b Branch, --branch Branch + Pass the Branch name to scan. If the branch does not exist, the scanner will exit with an error -l Logger Level, --log_level Logger Level Pass the Logging level as for CRITICAL - 50, ERROR - 40 WARNING - 30 INFO - 20 DEBUG - 10. Default is 20 -c Console Logging, --console_logging Console Logging @@ -354,6 +376,15 @@ python public_cred_detections.py -o org_name #Ex: python public_cred_det python public_cred_detections.py -r org_name/repo_name #Ex: python public_cred_detections.py -r test_org/public_docker ``` +##### Command to Run Public Credential Scanner for a specific branch + +``` +# Run for a specific branch, +python public_cred_detections.py -r org_name/repo_name -b branch_name #Ex: python public_cred_detections.py -r test_org/public_docker -b develop +``` + +> **Note:** If the specified branch does not exist in the repo, the scanner will exit with an error. + ##### Detections With ML Filter xGitGuard also has an additional ML filter, where users can collect their organization/targeted data and train their model. Having this ML filter helps in reducing the false positives from the detection. @@ -379,7 +410,7 @@ python public_cred_detections.py -m Yes ``` Run usage: -usage: public_cred_detections.py [-h] [-p Primary Keywords] [-s Secondary Keywords] [-e Extensions] [-m Ml prediction] [-u Unmask Secret] [-o org_name] [-r repo_name] [-l Logger Level] [-c Console Logging] +usage: public_cred_detections.py [-h] [-p Primary Keywords] [-s Secondary Keywords] [-e Extensions] [-m Ml prediction] [-u Unmask Secret] [-o org_name] [-r repo_name] [-b Branch] [-l Logger Level] [-c Console Logging] optional arguments: -h, --help show this help message and exit @@ -395,6 +426,8 @@ Pass the Extensions list as a comma-separated string Pass the flag as Yes or No. Default is No -o pass org name, --org Pass the targeted org list as a comma-separated string -r pass repo name, --repo Pass the targeted repo list as a comma-separated string +-b Branch, --branch Branch + Pass the Branch name to scan. If the branch does not exist, the scanner will exit with an error -l Logger Level, --log_level Logger Level Pass the Logging level as for CRITICAL - 50, ERROR - 40 WARNING - 30 INFO - 20 DEBUG - 10. Default is 20 -c Console Logging, --console_logging Console Logging @@ -438,6 +471,15 @@ python public_key_detections.py -o org_name #Ex: python public_key_det python public_key_detections.py -r org_name/repo_name #Ex: python public_key_detections.py -r test_org/public_docker ``` +##### Command to Run Public Keys and Tokens Scanner for a specific branch + +``` +# Run for a specific branch, +python public_key_detections.py -r org_name/repo_name -b branch_name #Ex: python public_key_detections.py -r test_org/public_docker -b develop +``` + +> **Note:** If the specified branch does not exist in the repo, the scanner will exit with an error. + ##### Detections With ML Filter xGitGuard also has an additional ML filter, where users can collect their organization/targeted data and train their model. Having this ML filter helps in reducing the false positives from the detection. @@ -462,7 +504,7 @@ python public_key_detections.py -m Yes ``` usage: -public_key_detections.py [-h] [-s Secondary Keywords] [-e Extensions] [-m Ml prediction][-u Unmask Secret] [-o org_name] [-r repo_name] [-l Logger Level] [-c Console Logging] +public_key_detections.py [-h] [-s Secondary Keywords] [-e Extensions] [-m Ml prediction][-u Unmask Secret] [-o org_name] [-r repo_name] [-b Branch] [-l Logger Level] [-c Console Logging] optional arguments: -h, --help show this help message and exit @@ -476,6 +518,8 @@ Pass the Extensions list as a comma-separated string Pass the flag as Yes or No. Default is No -o pass org name, --org Pass the targeted org list as a comma-separated string -r pass repo name, --repo Pass the targeted repo list as a comma-separated string +-b Branch, --branch Branch + Pass the Branch name to scan. If the branch does not exist, the scanner will exit with an error -l Logger Level, --log_level Logger Level Pass the Logging level as for CRITICAL - 50, ERROR - 40 WARNING - 30 INFO - 20 DEBUG - 10. Default is 20 -c Console Logging, --console_logging Console Logging diff --git a/xgitguard/common/github_calls.py b/xgitguard/common/github_calls.py index df79d6f..4318745 100644 --- a/xgitguard/common/github_calls.py +++ b/xgitguard/common/github_calls.py @@ -325,3 +325,61 @@ def get_github_enterprise_commits(self, user_name, repo_name, file_path, header) except Exception as e: logger.error(f"Github API commit content get Error: {e}") return {} + + def check_public_branch_exists(self, user_name, repo_name, branch): + """ + Check if a branch exists in a public GitHub repository + params: user_name - string + params: repo_name - string + params: branch - string + returns: True if branch exists, False otherwise + """ + logger.debug("<<<< 'Current Executing Function' >>>>") + token_var = "GITHUB_TOKEN" + if not os.getenv(token_var): + logger.error( + f"GitHub API Token Environment variable '{token_var}' not set." + ) + return False + try: + time.sleep(self._throttle_time) + base = self._base_url.replace("/search/code", "") + url = f"{base}/repos/{user_name}/{repo_name}/branches/{branch}" + response = requests.get( + url, auth=("token", os.getenv(token_var)), timeout=10 + ) + return response.status_code == 200 + except Exception as e: + logger.error(f"Public branch existence check failed for '{user_name}/{repo_name}' branch '{branch}': {e}") + return False + + def check_enterprise_branch_exists(self, user_name, repo_name, branch, header): + """ + Check if a branch exists in an enterprise GitHub repository + params: user_name - string + params: repo_name - string + params: branch - string + params: header - dict + returns: True if branch exists, False otherwise + """ + logger.debug("<<<< 'Current Executing Function' >>>>") + token_var = "GITHUB_ENTERPRISE_TOKEN" + if not os.getenv(token_var): + logger.error( + f"GitHub API Token Environment variable '{token_var}' not set." + ) + return False + try: + time.sleep(self._throttle_time) + base = self._base_url.replace("/search/code", "") + url = f"{base}/repos/{user_name}/{repo_name}/branches/{branch}" + response = requests.get( + url, + auth=("token", os.getenv(token_var)), + headers=header, + timeout=10, + ) + return response.status_code == 200 + except Exception as e: + logger.error(f"Enterprise branch existence check failed for '{user_name}/{repo_name}' branch '{branch}': {e}") + return False diff --git a/xgitguard/config/xgg_configs.yaml b/xgitguard/config/xgg_configs.yaml index 93afd1e..cce46fb 100644 --- a/xgitguard/config/xgg_configs.yaml +++ b/xgitguard/config/xgg_configs.yaml @@ -7,6 +7,7 @@ github: # GitHub Public public_api_url: "https://api.github.com/search/code" public_commits_url: "https://api.github.com/repos/%s/%s/commits?path=%s" + public_pre_url: "https://api.github.com/repos/" # GitHub Enterprise - For Open Source enterprise_api_url: "https://github.<< Enterprise Name >>.com/api/v3/search/code" diff --git a/xgitguard/github-enterprise/enterprise_cred_detections.py b/xgitguard/github-enterprise/enterprise_cred_detections.py index db8db0d..8f379f3 100644 --- a/xgitguard/github-enterprise/enterprise_cred_detections.py +++ b/xgitguard/github-enterprise/enterprise_cred_detections.py @@ -372,7 +372,7 @@ def check_existing_detections(org_url_list, url_list, search_query): return new_org_url_list, new_urls_list, new_hashed_urls -def process_search_results(search_response_lines, search_query, ml_prediction): +def process_search_results(search_response_lines, search_query, ml_prediction, branch=""): """ For each search response items, process as below Get the html urls from the search response @@ -389,6 +389,7 @@ def process_search_results(search_response_lines, search_query, ml_prediction): params: search_response_lines - list params: search_query - string params: ml_prediction - boolean + params: branch - string - optional returns: detection_writes_per_query - int - Total detections written to file returns: new_results_per_query - int - No of new urls per query @@ -415,6 +416,9 @@ def process_search_results(search_response_lines, search_query, ml_prediction): + "/contents/" + line["path"] ) + # If branch is specified, add ref parameter to the contents API URL + if branch: + html_url = html_url + "?ref=" + branch url_list.append(html_url) if url_list: @@ -542,7 +546,7 @@ def format_search_query_list(secondary_keywords): def run_detection( - secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[] + secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[], branch="" ): """ Run GitHub detections @@ -657,7 +661,7 @@ def run_detection( new_results_per_query, detections_per_query, ) = process_search_results( - search_response_lines, search_query, ml_prediction + search_response_lines, search_query, ml_prediction, branch ) logger.info( f"Detection writes in current search query: {detection_writes_per_query}" @@ -806,6 +810,16 @@ def arg_parser(): help="Pass the Console Logging as Yes or No. Default is Yes", ) + argparser.add_argument( + "-b", + "--branch", + metavar="Branch Name", + action="store", + type=str, + default="", + help="Pass the Branch name to scan. If branch does not exist, falls back to default branch", + ) + args = argparser.parse_args() if args.secondary_keywords: @@ -850,6 +864,8 @@ def arg_parser(): else: console_logging = False + branch = args.branch.strip() if args.branch else "" + return ( secondary_keywords, extensions, @@ -859,6 +875,7 @@ def arg_parser(): repo, log_level, console_logging, + branch, ) @@ -873,6 +890,7 @@ def arg_parser(): repo, log_level, console_logging, + branch, ) = arg_parser() # Setting up Logger @@ -898,6 +916,19 @@ def arg_parser(): ) sys.exit(1) - run_detection(secondary_keywords, extensions, ml_prediction, org, repo) + # Validate branch if specified + if branch and repo: + repo_parts = repo[0].split("/") + if len(repo_parts) == 2: + header = configs.xgg_configs["github"]["enterprise_header"] + if githubCalls.check_enterprise_branch_exists(repo_parts[0], repo_parts[1], branch, header): + logger.info(f"Branch '{branch}' exists in repo '{repo[0]}'. Scanning branch '{branch}'.") + else: + logger.error(f"Branch '{branch}' not found in repo '{repo[0]}'. Please provide a valid branch name") + sys.exit(1) + elif branch and not repo: + logger.info(f"Branch '{branch}' specified. Will attempt to scan files on this branch.") + + run_detection(secondary_keywords, extensions, ml_prediction, org, repo, branch) logger.info("xGitGuard Credentials Detection Process Completed") diff --git a/xgitguard/github-enterprise/enterprise_key_detections.py b/xgitguard/github-enterprise/enterprise_key_detections.py index cfa109a..e3e4e42 100644 --- a/xgitguard/github-enterprise/enterprise_key_detections.py +++ b/xgitguard/github-enterprise/enterprise_key_detections.py @@ -351,7 +351,7 @@ def check_existing_detections(org_url_list, url_list, search_query): return new_org_url_list, new_urls_list, new_hashed_urls -def process_search_results(search_response_lines, search_query, ml_prediction): +def process_search_results(search_response_lines, search_query, ml_prediction, branch=""): """ For each search response items, process as below Get the html urls from the search response @@ -368,6 +368,7 @@ def process_search_results(search_response_lines, search_query, ml_prediction): params: search_response_lines - list params: search_query - string params: ml_prediction - boolean + params: branch - string - optional returns: detection_writes_per_query - int - Total detections written to file returns: new_results_per_query - int - No of new urls per query @@ -394,6 +395,9 @@ def process_search_results(search_response_lines, search_query, ml_prediction): + "/contents/" + line["path"] ) + # If branch is specified, add ref parameter to the contents API URL + if branch: + html_url = html_url + "?ref=" + branch url_list.append(html_url) if url_list: @@ -521,7 +525,7 @@ def format_search_query_list(secondary_keywords): def run_detection( - secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[] + secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[], branch="" ): """ Run GitHub detections @@ -636,7 +640,7 @@ def run_detection( new_results_per_query, detections_per_query, ) = process_search_results( - search_response_lines, search_query, ml_prediction + search_response_lines, search_query, ml_prediction, branch ) logger.info( f"Detection writes in current search query: {detection_writes_per_query}" @@ -786,6 +790,16 @@ def arg_parser(): help="Pass the Console Logging as Yes or No. Default is Yes", ) + argparser.add_argument( + "-b", + "--branch", + metavar="Branch Name", + action="store", + type=str, + default="", + help="Pass the Branch name to scan. If branch does not exist, falls back to default branch", + ) + args = argparser.parse_args() if args.secondary_keywords: @@ -830,6 +844,8 @@ def arg_parser(): else: console_logging = False + branch = args.branch.strip() if args.branch else "" + return ( secondary_keywords, extensions, @@ -839,6 +855,7 @@ def arg_parser(): repo, log_level, console_logging, + branch, ) @@ -853,6 +870,7 @@ def arg_parser(): repo, log_level, console_logging, + branch, ) = arg_parser() # Setting up Logger @@ -878,6 +896,19 @@ def arg_parser(): ) sys.exit(1) - run_detection(secondary_keywords, extensions, ml_prediction, org, repo) + # Validate branch if specified + if branch and repo: + repo_parts = repo[0].split("/") + if len(repo_parts) == 2: + header = configs.xgg_configs["github"]["enterprise_header"] + if githubCalls.check_enterprise_branch_exists(repo_parts[0], repo_parts[1], branch, header): + logger.info(f"Branch '{branch}' exists in repo '{repo[0]}'. Scanning branch '{branch}'.") + else: + logger.error(f"Branch '{branch}' not found in repo '{repo[0]}'. Please provide a valid branch name") + sys.exit(1) + elif branch and not repo: + logger.info(f"Branch '{branch}' specified. Will attempt to scan files on this branch.") + + run_detection(secondary_keywords, extensions, ml_prediction, org, repo, branch) logger.info("xGitGuard Enterprise Keys and Token Detection Process Completed") diff --git a/xgitguard/github-public/public_cred_detections.py b/xgitguard/github-public/public_cred_detections.py index 5aca714..cc50034 100644 --- a/xgitguard/github-public/public_cred_detections.py +++ b/xgitguard/github-public/public_cred_detections.py @@ -371,7 +371,7 @@ def check_existing_detections(url_list, search_query): return new_urls_list, new_hashed_urls -def process_search_results(search_response_lines, search_query, ml_prediction): +def process_search_results(search_response_lines, search_query, ml_prediction, branch=""): """ For each search response items, process as below Get the html urls from the search response @@ -388,6 +388,7 @@ def process_search_results(search_response_lines, search_query, ml_prediction): params: search_response_lines - list params: search_query - string params: ml_prediction - boolean + params: branch - string - optional returns: detection_writes_per_query - int - Total detections written to file returns: new_results_per_query - int - No of new urls per query @@ -411,6 +412,12 @@ def process_search_results(search_response_lines, search_query, ml_prediction): html_url = html_url.replace( "https://github.com", "https://raw.githubusercontent.com" ) + # If branch is specified, replace the branch segment in the raw URL + if branch: + url_parts = html_url.split("/") + if len(url_parts) > 5: + url_parts[5] = branch + html_url = "/".join(url_parts) url_list.append(html_url) if url_list: @@ -546,6 +553,7 @@ def run_detection( ml_prediction=False, org=[], repo=[], + branch="", ): """ Run GitHub detections @@ -676,6 +684,7 @@ def run_detection( search_response_lines, search_query, ml_prediction, + branch, ) logger.info( f"Detection writes in current search query: {detection_writes_per_query}" @@ -707,7 +716,7 @@ def run_detection( def run_detections_from_file( - secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[] + secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[], branch="" ): """ Run detection for Primary Keywords present in the default config file @@ -739,6 +748,7 @@ def run_detections_from_file( ml_prediction, org, repo, + branch, ) status = True except Exception as e: @@ -769,6 +779,7 @@ def run_detections_from_list( ml_prediction=False, org=[], repo=[], + branch="", ): """ Run detection for Primary Keywords present in the given input list @@ -817,6 +828,7 @@ def run_detections_from_list( ml_prediction, org, repo, + branch, ) except Exception as e: logger.error(f"Process Error: {e}") @@ -969,6 +981,16 @@ def arg_parser(): help="Pass the Console Logging as Yes or No. Default is Yes", ) + argparser.add_argument( + "-b", + "--branch", + metavar="Branch Name", + action="store", + type=str, + default="", + help="Pass the Branch name to scan. If branch does not exist, falls back to default branch", + ) + args = argparser.parse_args() if args.primary_keywords: @@ -1017,6 +1039,8 @@ def arg_parser(): else: console_logging = False + branch = args.branch.strip() if args.branch else "" + return ( primary_keywords, secondary_keywords, @@ -1027,6 +1051,7 @@ def arg_parser(): repo, log_level, console_logging, + branch, ) @@ -1042,6 +1067,7 @@ def arg_parser(): repo, log_level, console_logging, + branch, ) = arg_parser() # Setting up Logger @@ -1069,13 +1095,25 @@ def arg_parser(): ) sys.exit(1) + # Validate branch if specified + if branch and repo: + repo_parts = repo[0].split("/") + if len(repo_parts) == 2: + if githubCalls.check_public_branch_exists(repo_parts[0], repo_parts[1], branch): + logger.info(f"Branch '{branch}' exists in repo '{repo[0]}'. Scanning branch '{branch}'.") + else: + logger.error(f"Branch '{branch}' not found in repo '{repo[0]}'. Please provide a valid branch name.") + sys.exit(1) + elif branch and not repo: + logger.info(f"Branch '{branch}' specified. Will attempt to scan files on this branch.") + if primary_keywords: run_detections_from_list( - primary_keywords, secondary_keywords, extensions, ml_prediction, org, repo + primary_keywords, secondary_keywords, extensions, ml_prediction, org, repo, branch ) else: run_detections_from_file( - secondary_keywords, extensions, ml_prediction, org, repo + secondary_keywords, extensions, ml_prediction, org, repo, branch ) logger.info("xGitGuard Credentials Detection Process Completed") diff --git a/xgitguard/github-public/public_key_detections.py b/xgitguard/github-public/public_key_detections.py index a887b80..0b1d839 100644 --- a/xgitguard/github-public/public_key_detections.py +++ b/xgitguard/github-public/public_key_detections.py @@ -344,7 +344,7 @@ def check_existing_detections(url_list, search_query): return new_urls_list, new_hashed_urls -def process_search_results(search_response_lines, search_query, ml_prediction): +def process_search_results(search_response_lines, search_query, ml_prediction, branch=""): """ For each search response items, process as below Get the html urls from the search response @@ -361,6 +361,7 @@ def process_search_results(search_response_lines, search_query, ml_prediction): params: search_response_lines - list params: search_query - string params: ml_prediction - boolean + params: branch - string - optional returns: detection_writes_per_query - int - Total detections written to file returns: new_results_per_query - int - No of new urls per query @@ -384,6 +385,12 @@ def process_search_results(search_response_lines, search_query, ml_prediction): html_url = html_url.replace( "https://github.com", "https://raw.githubusercontent.com" ) + # If branch is specified, replace the branch segment in the raw URL + if branch: + url_parts = html_url.split("/") + if len(url_parts) > 5: + url_parts[5] = branch + html_url = "/".join(url_parts) url_list.append(html_url) if url_list: @@ -519,6 +526,7 @@ def run_detection( ml_prediction=False, org=[], repo=[], + branch="", ): """ Run GitHub detections @@ -647,7 +655,7 @@ def run_detection( new_results_per_query, detections_per_query, ) = process_search_results( - search_response_lines, search_query, ml_prediction + search_response_lines, search_query, ml_prediction, branch ) logger.info( f"Detection writes in current search query: {detection_writes_per_query}" @@ -679,7 +687,7 @@ def run_detection( def run_detections_from_file( - secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[] + secondary_keywords=[], extensions=[], ml_prediction=False, org=[], repo=[], branch="" ): """ Run detection for Primary Keywords present in the default config file @@ -711,6 +719,7 @@ def run_detections_from_file( ml_prediction, org, repo, + branch, ) status = True except Exception as e: @@ -741,6 +750,7 @@ def run_detections_from_list( ml_prediction=False, org=[], repo=[], + branch="", ): """ Run detection for Primary Keywords present in the given input list @@ -789,6 +799,7 @@ def run_detections_from_list( ml_prediction, org, repo, + branch, ) except Exception as e: logger.error(f"Process Error: {e}") @@ -939,6 +950,16 @@ def arg_parser(): help="Pass the Console Logging as Yes or No. Default is Yes", ) + argparser.add_argument( + "-b", + "--branch", + metavar="Branch Name", + action="store", + type=str, + default="", + help="Pass the Branch name to scan. If branch does not exist, falls back to default branch", + ) + args = argparser.parse_args() if args.primary_keywords: @@ -987,6 +1008,8 @@ def arg_parser(): else: console_logging = False + branch = args.branch.strip() if args.branch else "" + return ( primary_keywords, secondary_keywords, @@ -997,6 +1020,7 @@ def arg_parser(): repo, log_level, console_logging, + branch, ) @@ -1012,6 +1036,7 @@ def arg_parser(): repo, log_level, console_logging, + branch, ) = arg_parser() # Setting up Logger @@ -1039,13 +1064,25 @@ def arg_parser(): ) sys.exit(1) + # Validate branch if specified + if branch and repo: + repo_parts = repo[0].split("/") + if len(repo_parts) == 2: + if githubCalls.check_public_branch_exists(repo_parts[0], repo_parts[1], branch): + logger.info(f"Branch '{branch}' exists in repo '{repo[0]}'. Scanning branch '{branch}'.") + else: + logger.error(f"Branch '{branch}' not found in repo '{repo[0]}'. Please provide a valid branch name.") + sys.exit(1) + elif branch and not repo: + logger.info(f"Branch '{branch}' specified. Will attempt to scan files on this branch.") + if primary_keywords: run_detections_from_list( - primary_keywords, secondary_keywords, extensions, ml_prediction, org, repo + primary_keywords, secondary_keywords, extensions, ml_prediction, org, repo, branch ) else: run_detections_from_file( - secondary_keywords, extensions, ml_prediction, org, repo + secondary_keywords, extensions, ml_prediction, org, repo, branch ) logger.info("xGitGuard Keys and Token Detection Process Completed")