diff --git a/crawlers/lib/crawl.py b/crawlers/lib/crawl.py
index 8d3ab9f..d2e5f9e 100644
--- a/crawlers/lib/crawl.py
+++ b/crawlers/lib/crawl.py
@@ -6,11 +6,14 @@
 import uuid
 from typing import List, Generator
 from flask import current_app
+from requests import ConnectionError, Timeout, TooManyRedirects
 
 from crawlers.constants import BLOCK_KEY_CALLBACK_URL
 
 from crawlers.lib.platforms.i_crawler import ICrawler
 from crawlers.lib.platforms import platforms
+from urllib3.exceptions import MaxRetryError
+from requests.exceptions import RequestException
 
 logger = logging.getLogger(__name__)
 
@@ -54,7 +57,11 @@ def process_block_url(session, block_url) -> None:
             f"skip crawl - no callback_url found! - key: {BLOCK_KEY_CALLBACK_URL}, block_data: {block_data}"
         )
     else:
-        repos = run_block(block_data)
+        try:
+            repos = run_block(block_data)
+        except (MaxRetryError, ConnectionError, Timeout, TooManyRedirects):
+            logger.exception("hosting service not reachable - no indexer callback issued")
+            return
         _hoster_session_request(
             "PUT", session, url=block_data[BLOCK_KEY_CALLBACK_URL], json=repos
         )
@@ -76,8 +83,9 @@ def crawl(platform: ICrawler) -> Generator[List[dict], None, None]:
         else:
             # right now we dont want to emit failures (via yield) because that will send empty results back
             # to the indexer, which can trigger a state reset (i.e. reached end, start over).
-            # TODO deal with failures - what are they?
+            # - complete connection failures and such should be handled via raised exceptions within crawlers!
             pass
+
     logger.debug(f"END block: {platform.type} - final state: {platform.state}")
 
 
diff --git a/crawlers/lib/platforms/__init__.py b/crawlers/lib/platforms/__init__.py
index 9910dac..624d719 100644
--- a/crawlers/lib/platforms/__init__.py
+++ b/crawlers/lib/platforms/__init__.py
@@ -1,15 +1,14 @@
-from typing import Dict, Any, Type, Union
+from typing import Dict
 from crawlers.lib.platforms.i_crawler import ICrawler
 from crawlers.lib.platforms.gitea import GiteaCrawler
 from crawlers.lib.platforms.gitlab import GitLabCrawler
 from crawlers.lib.platforms.bitbucket import BitBucketCrawler
-from crawlers.lib.platforms.github import GitHubV4Crawler, GitHubRESTCrawler
+from crawlers.lib.platforms.github import GitHubV4Crawler
 
 platforms: Dict[str, ICrawler] = {
     GiteaCrawler.type: GiteaCrawler,
     GitLabCrawler.type: GitLabCrawler,
     GitHubV4Crawler.type: GitHubV4Crawler,
-    GitHubRESTCrawler.type: GitHubRESTCrawler,
     BitBucketCrawler.type: BitBucketCrawler,
 }
 
diff --git a/crawlers/lib/platforms/bitbucket.py b/crawlers/lib/platforms/bitbucket.py
index ecd4870..e60a7f7 100644
--- a/crawlers/lib/platforms/bitbucket.py
+++ b/crawlers/lib/platforms/bitbucket.py
@@ -1,7 +1,7 @@
 import logging
 import time
 import requests
-from typing import List, Tuple
+from typing import List, Tuple, Union
 from urllib.parse import urljoin
 
 from crawlers.lib.platforms.i_crawler import ICrawler
@@ -65,7 +65,7 @@ def crawl(self, state: dict = None) -> Tuple[bool, List[dict], dict]:
                 logger.error(e)
                 logger.error(e.response.reason)
                 logger.error(e.response.text)
-                return False, [], {}
+                return False, [], {}, e
 
             response_json = response.json()
             repos = response_json['values']
diff --git a/crawlers/lib/platforms/gitea.py b/crawlers/lib/platforms/gitea.py
index 144d228..73565cd 100644
--- a/crawlers/lib/platforms/gitea.py
+++ b/crawlers/lib/platforms/gitea.py
@@ -1,5 +1,8 @@
 import logging
-from typing import List, Tuple
+from typing import List, Tuple, Union
+
+from requests import ConnectionError, Timeout, TooManyRedirects
+from urllib3.exceptions import MaxRetryError
 
 from crawlers.constants import GITEA_PER_PAGE_MAX, DEFAULT_REQUEST_TIMEOUT
 from crawlers.lib.platforms.i_crawler import ICrawler
@@ -40,6 +43,10 @@ def crawl(self, state: dict = None) -> Tuple[bool, List[dict], dict]:
                                    f"- response not ok, status: {response.status_code}")
                     return False, [], state  # nr.1 - we skip rest of this block, hope we get it next time
                 result = response.json()
+            except (MaxRetryError, ConnectionError, Timeout, TooManyRedirects) as e:
+                logger.exception(f"{self} - crawler cannot reach hoster")
+                # we re-raise these, as we want to avoid returning empty results to the indexer
+                raise e
             except Exception as e:
                 logger.exception(f"(skipping block chunk) gitea crawler crashed")
                 return False, [], state  # nr.2 - we skip rest of this block, hope we get it next time
diff --git a/crawlers/lib/platforms/github/__init__.py b/crawlers/lib/platforms/github/__init__.py
index 0182d72..d2b82b8 100644
--- a/crawlers/lib/platforms/github/__init__.py
+++ b/crawlers/lib/platforms/github/__init__.py
@@ -1,2 +1 @@
 from .github_v4 import GitHubV4Crawler
-from .github_rest import GitHubRESTCrawler
diff --git a/crawlers/lib/platforms/github/github_rest.py b/crawlers/lib/platforms/github/github_rest.py
deleted file mode 100644
index 7745c5a..0000000
--- a/crawlers/lib/platforms/github/github_rest.py
+++ /dev/null
@@ -1,249 +0,0 @@
-"""
-Crawl through GitHub via their REST API.
-Gets repositories connected to users.
-"""
-import logging
-import time
-from typing import List, Tuple
-from urllib.parse import urljoin
-
-from crawlers.lib.platforms.i_crawler import ICrawler
-from crawlers.constants import DEFAULT_REQUEST_TIMEOUT
-
-logger = logging.getLogger(__name__)
-
-
-class GitHubRESTCrawler(ICrawler):
-    """
-    Accept-Ranges: bytes
-    Content-Length: 32867
-    X-GitHub-Request-Id: DE4C:7325:72FE05F:88CCA3F:5F74ECF6
-    X-Ratelimit-Limit: 60
-    X-Ratelimit-Remaining: 46
-    X-Ratelimit-Reset: 1601501700
-    X-Ratelimit-Used: 14
-    access-control-allow-origin: *
-    access-control-expose-headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, Deprecation, Sunset
-    cache-control: public, max-age=60, s-maxage=60
-    content-encoding: gzip
-    content-security-policy: default-src 'none'
-    content-type: application/json; charset=utf-8
-    date: Wed, 30 Sep 2020 20:39:07 GMT
-    etag: W/"f95a90519ac2d5dbc76753515500268383c3b666f6fdaf187d82444e25ba14a5"
-    link: <https://api.github.com/repositories?since=369>; rel="next", <https://api.github.com/repositories{?since}>; rel="first"
-    referrer-policy: origin-when-cross-origin, strict-origin-when-cross-origin
-    server: GitHub.com
-    status: 200 OK
-    strict-transport-security: max-age=31536000; includeSubdomains; preload
-    vary: Accept, Accept-Encoding, Accept, X-Requested-With, Accept-Encoding
-    x-content-type-options: nosniff
-    x-frame-options: deny
-    x-github-media-type: github.v3; format=json
-    x-xss-protection: 1; mode=block
-    """
-
-    type: str = 'github_rest'
-
-    def __init__(self, base_url, state=None, api_key=None, **kwargs):
-        super().__init__(
-            base_url=base_url,
-            path='',
-            state=state,
-            api_key=api_key,
-            **kwargs
-        )
-        if api_key:
-            self.requests.auth = (
-                api_key['client_id'],
-                api_key['client_secret'])
-
-    def request(self, url, params=None):
-        response = False
-        while not response:
-            try:
-                response = self.requests.get(url, params=params, timeout=DEFAULT_REQUEST_TIMEOUT)
-                response.raise_for_status()
-            except Exception as e:
-                logger.error(e)
-
-                # todo: test this
-                logger.warning(
-                    f'{self} sleeping for 10min...')
-                time.sleep(60 * 10)
-                response = False
-        return response
-
-    def handle_ratelimit(self, response):
-        h = response.headers
-        ratelimit_remaining = int(h.get('X-Ratelimit-Remaining'))
-        ratelimit_reset_timestamp = int(h.get('X-Ratelimit-Reset'))
-        reset_in = ratelimit_reset_timestamp - time.time()
-
-        logger.info(
-            f'{self} {ratelimit_remaining} requests remaining, reset in {reset_in}s')
-        if ratelimit_remaining < 1:
-            logger.warning(
-                f'{self} rate limiting: {ratelimit_remaining} requests remaining, sleeping {reset_in}s')
-            time.sleep(reset_in)
-
-    def get_user_repos(self, user_repos_url):
-        while user_repos_url:
-            response = self.request(user_repos_url, params=dict(per_page=100))
-            results = response.json()
-
-            yield results
-
-            self.handle_ratelimit(response)
-            header_next = response.links.get('next', {})
-            user_repos_url = header_next.get('url', False)
-
-    def crawl(self, state=None) -> Tuple[bool, List[dict], dict]:
-        """ :return: success, repos, state """
-        user_url = False
-        if state:
-            user_url = state.get('user_url', False)
-            if not user_url:
-                logger.warning('{self} broken state, defaulting to start')
-
-        if not user_url:
-            user_url = '/users'
-
-        while user_url:
-            user_response = self.request(urljoin(self.base_url, user_url))
-            self.handle_ratelimit(user_response)
-
-            users_page = user_response.json()
-            for user in users_page:
-                user_repos = []
-                for repo_page in self.get_user_repos(user['repos_url']):
-                    logger.debug(f'{self} {len(repo_page)} repos in page')
-                    user_repos += repo_page
-                state = {'user_url': user_url}
-                yield True, user_repos, state
-
-            # https://stackoverflow.com/questions/32312758/python-requests-link-headers
-            user_header_next = user_response.links.get('next', {})
-            user_url = user_header_next.get('url', False)
-            if not user_url:
-                # not hit rate limit, and we dont have a next url - finished!
-                # reset state
-                yield True, [], None
-            time.sleep(.01)
-
-        """ expected GitHub result
-        {
-            "id": 1296269,
-            "node_id": "MDEwOlJlcG9zaXRvcnkxMjk2MjY5",
-            "name": "Hello-World",
-            "full_name": "octocat/Hello-World",
-            "owner": {
-              "login": "octocat",
-              "id": 1,
-              "node_id": "MDQ6VXNlcjE=",
-              "avatar_url": "https://github.com/images/error/octocat_happy.gif",
-              "gravatar_id": "",
-              "url": "https://api.github.com/users/octocat",
-              "html_url": "https://github.com/octocat",
-              "followers_url": "https://api.github.com/users/octocat/followers",
-              "following_url": "https://api.github.com/users/octocat/following{/other_user}",
-              "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}",
-              "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}",
-              "subscriptions_url": "https://api.github.com/users/octocat/subscriptions",
-              "organizations_url": "https://api.github.com/users/octocat/orgs",
-              "repos_url": "https://api.github.com/users/octocat/repos",
-              "events_url": "https://api.github.com/users/octocat/events{/privacy}",
-              "received_events_url": "https://api.github.com/users/octocat/received_events",
-              "type": "User",
-              "site_admin": false
-            },
-            "private": false,
-            "html_url": "https://github.com/octocat/Hello-World",
-            "description": "This your first repo!",
-            "fork": false,
-            "url": "https://api.github.com/repos/octocat/Hello-World",
-            "archive_url": "https://api.github.com/repos/octocat/Hello-World/{archive_format}{/ref}",
-            "assignees_url": "https://api.github.com/repos/octocat/Hello-World/assignees{/user}",
-            "blobs_url": "https://api.github.com/repos/octocat/Hello-World/git/blobs{/sha}",
-            "branches_url": "https://api.github.com/repos/octocat/Hello-World/branches{/branch}",
-            "collaborators_url": "https://api.github.com/repos/octocat/Hello-World/collaborators{/collaborator}",
-            "comments_url": "https://api.github.com/repos/octocat/Hello-World/comments{/number}",
-            "commits_url": "https://api.github.com/repos/octocat/Hello-World/commits{/sha}",
-            "compare_url": "https://api.github.com/repos/octocat/Hello-World/compare/{base}...{head}",
-            "contents_url": "https://api.github.com/repos/octocat/Hello-World/contents/{+path}",
-            "contributors_url": "https://api.github.com/repos/octocat/Hello-World/contributors",
-            "deployments_url": "https://api.github.com/repos/octocat/Hello-World/deployments",
-            "downloads_url": "https://api.github.com/repos/octocat/Hello-World/downloads",
-            "events_url": "https://api.github.com/repos/octocat/Hello-World/events",
-            "forks_url": "https://api.github.com/repos/octocat/Hello-World/forks",
-            "git_commits_url": "https://api.github.com/repos/octocat/Hello-World/git/commits{/sha}",
-            "git_refs_url": "https://api.github.com/repos/octocat/Hello-World/git/refs{/sha}",
-            "git_tags_url": "https://api.github.com/repos/octocat/Hello-World/git/tags{/sha}",
-            "git_url": "git:github.com/octocat/Hello-World.git",
-            "issue_comment_url": "https://api.github.com/repos/octocat/Hello-World/issues/comments{/number}",
-            "issue_events_url": "https://api.github.com/repos/octocat/Hello-World/issues/events{/number}",
-            "issues_url": "https://api.github.com/repos/octocat/Hello-World/issues{/number}",
-            "keys_url": "https://api.github.com/repos/octocat/Hello-World/keys{/key_id}",
-            "labels_url": "https://api.github.com/repos/octocat/Hello-World/labels{/name}",
-            "languages_url": "https://api.github.com/repos/octocat/Hello-World/languages",
-            "merges_url": "https://api.github.com/repos/octocat/Hello-World/merges",
-            "milestones_url": "https://api.github.com/repos/octocat/Hello-World/milestones{/number}",
-            "notifications_url": "https://api.github.com/repos/octocat/Hello-World/notifications{?since,all,participating}",
-            "pulls_url": "https://api.github.com/repos/octocat/Hello-World/pulls{/number}",
-            "releases_url": "https://api.github.com/repos/octocat/Hello-World/releases{/id}",
-            "ssh_url": "git@github.com:octocat/Hello-World.git",
-            "stargazers_url": "https://api.github.com/repos/octocat/Hello-World/stargazers",
-            "statuses_url": "https://api.github.com/repos/octocat/Hello-World/statuses/{sha}",
-            "subscribers_url": "https://api.github.com/repos/octocat/Hello-World/subscribers",
-            "subscription_url": "https://api.github.com/repos/octocat/Hello-World/subscription",
-            "tags_url": "https://api.github.com/repos/octocat/Hello-World/tags",
-            "teams_url": "https://api.github.com/repos/octocat/Hello-World/teams",
-            "trees_url": "https://api.github.com/repos/octocat/Hello-World/git/trees{/sha}",
-            "clone_url": "https://github.com/octocat/Hello-World.git",
-            "mirror_url": "git:git.example.com/octocat/Hello-World",
-            "hooks_url": "https://api.github.com/repos/octocat/Hello-World/hooks",
-            "svn_url": "https://svn.github.com/octocat/Hello-World",
-            "homepage": "https://github.com",
-            "language": null,
-            "forks_count": 9,
-            "stargazers_count": 80,
-            "watchers_count": 80,
-            "size": 108,
-            "default_branch": "master",
-            "open_issues_count": 0,
-            "is_template": true,
-            "topics": [
-              "octocat",
-              "atom",
-              "electron",
-              "api"
-            ],
-            "has_issues": true,
-            "has_projects": true,
-            "has_wiki": true,
-            "has_pages": false,
-            "has_downloads": true,
-            "archived": false,
-            "disabled": false,
-            "visibility": "public",
-            "pushed_at": "2011-01-26T19:06:43Z",
-            "created_at": "2011-01-26T19:01:12Z",
-            "updated_at": "2011-01-26T19:14:43Z",
-            "permissions": {
-              "admin": false,
-              "push": false,
-              "pull": true
-            },
-            "template_repository": "octocat/template",
-            "temp_clone_token": "ABTLWHOULUVAXGTRYU7OC2876QJ2O",
-            "delete_branch_on_merge": true,
-            "subscribers_count": 42,
-            "network_count": 0,
-            "license": {
-              "key": "mit",
-              "name": "MIT License",
-              "spdx_id": "MIT",
-              "url": "https://api.github.com/licenses/mit",
-              "node_id": "MDc6TGljZW5zZW1pdA=="
-            }
-        }
-        """
diff --git a/crawlers/lib/platforms/github/github_rest_legacy.py b/crawlers/lib/platforms/github/github_rest_legacy.py
deleted file mode 100644
index 73ef24f..0000000
--- a/crawlers/lib/platforms/github/github_rest_legacy.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""
-Legacy note:
-
-This crawler is not finished and wont be used.
-It needs to run extra requests per repository (+1 per data-point)
-to get the real data, which we dont want to do.
-"""
-import logging
-import time
-import math
-from typing import List, Tuple
-from urllib.parse import urljoin
-
-from crawlers.constants import DEFAULT_REQUEST_TIMEOUT
-
-from crawlers.lib.platforms.i_crawler import ICrawler
-
-logger = logging.getLogger(__name__)
-
-
-class GitHubRESTCrawler(ICrawler):
-    """
-    Accept-Ranges: bytes
-    Content-Length: 32867
-    X-GitHub-Request-Id: DE4C:7325:72FE05F:88CCA3F:5F74ECF6
-    X-Ratelimit-Limit: 60
-    X-Ratelimit-Remaining: 46
-    X-Ratelimit-Reset: 1601501700
-    X-Ratelimit-Used: 14
-    access-control-allow-origin: *
-    access-control-expose-headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, Deprecation, Sunset
-    cache-control: public, max-age=60, s-maxage=60
-    content-encoding: gzip
-    content-security-policy: default-src 'none'
-    content-type: application/json; charset=utf-8
-    date: Wed, 30 Sep 2020 20:39:07 GMT
-    etag: W/"f95a90519ac2d5dbc76753515500268383c3b666f6fdaf187d82444e25ba14a5"
-    link: <https://api.github.com/repositories?since=369>; rel="next", <https://api.github.com/repositories{?since}>; rel="first"
-    referrer-policy: origin-when-cross-origin, strict-origin-when-cross-origin
-    server: GitHub.com
-    status: 200 OK
-    strict-transport-security: max-age=31536000; includeSubdomains; preload
-    vary: Accept, Accept-Encoding, Accept, X-Requested-With, Accept-Encoding
-    x-content-type-options: nosniff
-    x-frame-options: deny
-    x-github-media-type: github.v3; format=json
-    x-xss-protection: 1; mode=block
-    """
-
-    type: str = 'github_rest_legacy'
-
-    def __init__(self, base_url, state=None, api_key=None, **kwargs):
-        super().__init__(
-            base_url=base_url,
-            path='',
-            state=state,
-            api_key=api_key,
-            **kwargs
-        )
-        if api_key:
-            self.requests.headers.update(
-                {"Authorization": f"Bearer {api_key['access_token']}"})
-
-    def request(self, url, params=None):
-        response = False
-        while not response:
-            try:
-                response = self.requests.get(url, params=params, timeout=DEFAULT_REQUEST_TIMEOUT)
-                response.raise_for_status()
-            except Exception as e:
-                logger.error(e)
-
-                # todo: test this
-                logger.warning(
-                    f'{self} sleeping for 10min...')
-                time.sleep(60 * 10)
-                response = False
-        return response
-
-    def handle_ratelimit(self, response):
-        h = response.headers
-        ratelimit_remaining = int(h.get('X-Ratelimit-Remaining'))
-        ratelimit_reset_timestamp = int(h.get('X-Ratelimit-Reset'))
-        reset_in = ratelimit_reset_timestamp - time.time()
-
-        logger.info(
-            f'{self} {ratelimit_remaining} requests remaining, reset in {reset_in}s')
-        if ratelimit_remaining < 1:
-            logger.warning(
-                f'{self} rate limiting: {ratelimit_remaining} requests remaining, sleeping {reset_in}s')
-            time.sleep(reset_in)
-
-    @staticmethod
-    def get_next_link(response) -> (str, int):
-        link = None
-        index = None
-        pagination = response.headers.get('link', '')
-        if 'next' in pagination:
-            # should contain the following:
-            # <https://api.github.com/repositories?since=1531>; rel="next", <https://api.github.com/repositories{?since}>; rel="first"
-            link = pagination.split(">")[0][1:]
-            index = int(link.split("since=")[1].split("&")[0])
-        return link, index
-
-    def init_state(self, state: dict = None):
-        if not state:
-            state = {}
-        state['start_at'] = state.get('start_at', 0)
-        state['end_at'] = state.get('end_at', math.inf)
-        state['current'] = state.get('current', 0)
-        state['next_link'] = state.get('next_link', urljoin(self.base_url, f'/repositories?since={state["start_at"]}'))
-        return state
-
-    def crawl(self, state: dict = None) -> Tuple[bool, List[dict], dict]:
-        """ :return: success, repos, state """
-        state = self.init_state(state)
-        while state["next_link"]:
-            time.sleep(.01)  # default self-throttling
-            repo_response = self.request(state["next_link"])
-            self.handle_ratelimit(repo_response)  # sleep when needed
-
-            results = repo_response.json()
-            logger.debug(f'{self} {len(results)} repos in page')
-            state["next_link"], state["current"] = self.get_next_link(repo_response)
-            if len(results) == 0 or state["current"] >= state["end_at"]:
-                state["next_link"] = None  # finished
-
-            yield True, results, state
-
-        """ expected GitHub response
-        {
-           "id": 1296269,
-           "node_id": "MDEwOlJlcG9zaXRvcnkxMjk2MjY5",
-           "name": "Hello-World",
-           "full_name": "octocat/Hello-World",
-           "owner": {
-             "login": "octocat",
-             "id": 1,
-             "node_id": "MDQ6VXNlcjE=",
-             "avatar_url": "https://github.com/images/error/octocat_happy.gif",
-             "gravatar_id": "",
-             "url": "https://api.github.com/users/octocat",
-             "html_url": "https://github.com/octocat",
-             "followers_url": "https://api.github.com/users/octocat/followers",
-             "following_url": "https://api.github.com/users/octocat/following{/other_user}",
-             "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}",
-             "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}",
-             "subscriptions_url": "https://api.github.com/users/octocat/subscriptions",
-             "organizations_url": "https://api.github.com/users/octocat/orgs",
-             "repos_url": "https://api.github.com/users/octocat/repos",
-             "events_url": "https://api.github.com/users/octocat/events{/privacy}",
-             "received_events_url": "https://api.github.com/users/octocat/received_events",
-             "type": "User",
-             "site_admin": false
-           },
-           "private": false,
-           "html_url": "https://github.com/octocat/Hello-World",
-           "description": "This your first repo!",
-           "fork": false,
-           "url": "https://api.github.com/repos/octocat/Hello-World",
-           "archive_url": "https://api.github.com/repos/octocat/Hello-World/{archive_format}{/ref}",
-           "assignees_url": "https://api.github.com/repos/octocat/Hello-World/assignees{/user}",
-           "blobs_url": "https://api.github.com/repos/octocat/Hello-World/git/blobs{/sha}",
-           "branches_url": "https://api.github.com/repos/octocat/Hello-World/branches{/branch}",
-           "collaborators_url": "https://api.github.com/repos/octocat/Hello-World/collaborators{/collaborator}",
-           "comments_url": "https://api.github.com/repos/octocat/Hello-World/comments{/number}",
-           "commits_url": "https://api.github.com/repos/octocat/Hello-World/commits{/sha}",
-           "compare_url": "https://api.github.com/repos/octocat/Hello-World/compare/{base}...{head}",
-           "contents_url": "https://api.github.com/repos/octocat/Hello-World/contents/{+path}",
-           "contributors_url": "https://api.github.com/repos/octocat/Hello-World/contributors",
-           "deployments_url": "https://api.github.com/repos/octocat/Hello-World/deployments",
-           "downloads_url": "https://api.github.com/repos/octocat/Hello-World/downloads",
-           "events_url": "https://api.github.com/repos/octocat/Hello-World/events",
-           "forks_url": "https://api.github.com/repos/octocat/Hello-World/forks",
-           "git_commits_url": "https://api.github.com/repos/octocat/Hello-World/git/commits{/sha}",
-           "git_refs_url": "https://api.github.com/repos/octocat/Hello-World/git/refs{/sha}",
-           "git_tags_url": "https://api.github.com/repos/octocat/Hello-World/git/tags{/sha}",
-           "git_url": "git:github.com/octocat/Hello-World.git",
-           "issue_comment_url": "https://api.github.com/repos/octocat/Hello-World/issues/comments{/number}",
-           "issue_events_url": "https://api.github.com/repos/octocat/Hello-World/issues/events{/number}",
-           "issues_url": "https://api.github.com/repos/octocat/Hello-World/issues{/number}",
-           "keys_url": "https://api.github.com/repos/octocat/Hello-World/keys{/key_id}",
-           "labels_url": "https://api.github.com/repos/octocat/Hello-World/labels{/name}",
-           "languages_url": "https://api.github.com/repos/octocat/Hello-World/languages",
-           "merges_url": "https://api.github.com/repos/octocat/Hello-World/merges",
-           "milestones_url": "https://api.github.com/repos/octocat/Hello-World/milestones{/number}",
-           "notifications_url": "https://api.github.com/repos/octocat/Hello-World/notifications{?since,all,participating}",
-           "pulls_url": "https://api.github.com/repos/octocat/Hello-World/pulls{/number}",
-           "releases_url": "https://api.github.com/repos/octocat/Hello-World/releases{/id}",
-           "ssh_url": "git@github.com:octocat/Hello-World.git",
-           "stargazers_url": "https://api.github.com/repos/octocat/Hello-World/stargazers",
-           "statuses_url": "https://api.github.com/repos/octocat/Hello-World/statuses/{sha}",
-           "subscribers_url": "https://api.github.com/repos/octocat/Hello-World/subscribers",
-           "subscription_url": "https://api.github.com/repos/octocat/Hello-World/subscription",
-           "tags_url": "https://api.github.com/repos/octocat/Hello-World/tags",
-           "teams_url": "https://api.github.com/repos/octocat/Hello-World/teams",
-           "trees_url": "https://api.github.com/repos/octocat/Hello-World/git/trees{/sha}",
-           "clone_url": "https://github.com/octocat/Hello-World.git",
-           "mirror_url": "git:git.example.com/octocat/Hello-World",
-           "hooks_url": "https://api.github.com/repos/octocat/Hello-World/hooks",
-           "svn_url": "https://svn.github.com/octocat/Hello-World",
-           "homepage": "https://github.com",
-           "language": null,
-           "forks_count": 9,
-           "stargazers_count": 80,
-           "watchers_count": 80,
-           "size": 108,
-           "default_branch": "master",
-           "open_issues_count": 0,
-           "is_template": true,
-           "topics": [
-             "octocat",
-             "atom",
-             "electron",
-             "api"
-           ],
-           "has_issues": true,
-           "has_projects": true,
-           "has_wiki": true,
-           "has_pages": false,
-           "has_downloads": true,
-           "archived": false,
-           "disabled": false,
-           "visibility": "public",
-           "pushed_at": "2011-01-26T19:06:43Z",
-           "created_at": "2011-01-26T19:01:12Z",
-           "updated_at": "2011-01-26T19:14:43Z",
-           "permissions": {
-             "admin": false,
-             "push": false,
-             "pull": true
-           },
-           "template_repository": "octocat/template",
-           "temp_clone_token": "ABTLWHOULUVAXGTRYU7OC2876QJ2O",
-           "delete_branch_on_merge": true,
-           "subscribers_count": 42,
-           "network_count": 0,
-           "license": {
-             "key": "mit",
-             "name": "MIT License",
-             "spdx_id": "MIT",
-             "url": "https://api.github.com/licenses/mit",
-             "node_id": "MDc6TGljZW5zZW1pdA=="
-           }
-         }
-        """
diff --git a/crawlers/lib/platforms/github/github_v4.py b/crawlers/lib/platforms/github/github_v4.py
index 47a349e..34d9bda 100644
--- a/crawlers/lib/platforms/github/github_v4.py
+++ b/crawlers/lib/platforms/github/github_v4.py
@@ -7,9 +7,11 @@
 import logging
 import time
 import base64
-from typing import List, Tuple
+from typing import List, Tuple, Union
 from iso8601 import iso8601
 from requests import Response
+from requests.exceptions import Timeout, TooManyRedirects, ConnectionError
+from urllib3.exceptions import MaxRetryError
 
 from crawlers.lib.platforms.i_crawler import ICrawler
 from crawlers.constants import (
@@ -195,10 +197,9 @@ def send_query() -> Response:
                 failed_count = 0
                 while response.status_code == 403 and failed_count < GITHUB_ABUSE_RETRY_MAX:
                     # we sometimes run in to some "hidden" abuse detection on multiple crawlers
-                    # it tells use to wait a few minutes, but a few seconds is enough to be allowed again
+                    # it may tell us to wait a few minutes, but a few seconds is enough to be allowed again
                     # thus, we repeatedly try again to avoid having holes in our data (skipped block chunks)
-                    # TODO don't see a way to avoid triggering this right now
-                    # TODO it triggers even though we have plenty of ratelimit to spare
+                    # however, it might be other reason and other severities - hence we limit the retries
                     failed_count += 1
                     logger.warning(f"status 403 - retry block chunk in {GITHUB_API_ABUSE_SLEEP}s"
                                    f"- probably triggered abuse flag? json:\n{response.json()}")
@@ -235,6 +236,10 @@ def send_query() -> Response:
                     yield False, [], state
                 self.handle_ratelimit(response)
 
+            except (MaxRetryError, ConnectionError, Timeout, TooManyRedirects) as e:
+                logger.exception(f"{self} - crawler cannot reach hoster")
+                # we re-raise these, as we want to avoid returning empty results to the indexer
+                raise e
             except Exception as e:
                 logger.exception(f"(skipping block chunk) github crawler crashed")
                 yield False, [], state
diff --git a/crawlers/lib/platforms/github/github_v4_legacy.py b/crawlers/lib/platforms/github/github_v4_legacy.py
deleted file mode 100644
index af29517..0000000
--- a/crawlers/lib/platforms/github/github_v4_legacy.py
+++ /dev/null
@@ -1,120 +0,0 @@
-"""
-Legacy note:
-
-There is a limit at 1000 results for the search api (both rest and graphql)
-https://stackoverflow.com/questions/48371313/github-api-pagination-limit
-https://developer.github.com/v3/search/#about-the-search-api
-
-This version is not usable for us as we cannot get around this limit!
-"""
-import pathlib
-import logging
-import time
-from typing import List, Tuple
-from iso8601 import iso8601
-from urllib.parse import urljoin
-
-from crawlers.lib.platforms.i_crawler import ICrawler
-from crawlers.constants import DEFAULT_REQUEST_TIMEOUT
-
-logger = logging.getLogger(__name__)
-
-
-def get_query():
-    current_folder_path = pathlib.Path(__file__).parent.absolute()
-    with open(current_folder_path.joinpath('query_repos_search.graphql')) as f:
-        query = f.read()
-    return query
-
-
-query = get_query()
-
-
-class GitHubV4Crawler(ICrawler):
-    """
-    """
-    type: str = 'github_v4_legacy'
-
-    def __init__(self, base_url, state=None, api_key=None, **kwargs):
-        super().__init__(
-            base_url=base_url,
-            path='graphql',
-            state=state,
-            api_key=api_key,
-            **kwargs
-        )
-        if api_key:
-            self.requests.headers.update(
-                {"Authorization": f"Bearer {api_key}"})
-
-    def handle_ratelimit(self, response):
-        """
-        {
-          "data": {
-            "rateLimit": {
-              "cost": 1,
-              "remaining": 4984,
-              "resetAt": "2020-11-29T14:26:15Z"
-            },
-        """
-        rate_limit = response.json().get('data').get('rateLimit')
-        ratelimit_remaining = rate_limit['remaining']
-
-        reset_at = iso8601.parse_date(rate_limit['resetAt'])
-        ratelimit_reset_timestamp = reset_at.timestamp()
-
-        reset_in = ratelimit_reset_timestamp - time.time()
-
-        logger.info(
-            f'{self} {ratelimit_remaining} requests remaining, reset in {reset_in}s')
-        if ratelimit_remaining < 1:
-            logger.warning(
-                f'{self} rate limiting: {ratelimit_remaining} requests remaining, sleeping {reset_in}s')
-            time.sleep(reset_in)
-
-    def get_variables(self, cursor):
-        # todo: there is no way to order by created_at?
-        # -> https://github.community/t/graphql-sorting-search-results/14088/2
-        variables = {
-            #"queryString": "is:public archived:false created:2020-11-28T13:00:00Z..2020-11-28T14:00:00Z"
-
-            "queryString": "is:public",
-            "cursor": cursor,
-        }
-        return variables
-
-    def crawl(self, state: dict = None) -> Tuple[bool, List[dict], dict]:
-        """ :return: success, repos, state """
-        cursor = None
-        if state:
-            cursor = state.get('cursor', None)
-
-        hasNextPage = True
-        while hasNextPage:
-            variables = self.get_variables(cursor)
-            response = self.requests.post(
-                urljoin(self.base_url, self.path),
-                json=dict(query=query, variables=variables),
-                timeout=DEFAULT_REQUEST_TIMEOUT
-            )
-            try:
-                data = response.json()
-                edges = data['data']['search']['edges']
-
-                page_info = data['search']['pageInfo']
-                cursor = page_info['endCursor']
-                hasNextPage = page_info['hasNextPage']
-
-                repos = [result['node'] for result in edges]
-
-                print(len(repos))
-                print(hasNextPage)
-
-                state = dict(cursor=cursor)
-                yield True, repos, state
-
-                self.handle_ratelimit(response)
-            except Exception as e:
-                logger.error(f'failed. response was: {response.json()}')
-                raise e
-            time.sleep(.01)
diff --git a/crawlers/lib/platforms/github/query_repos_search.graphql b/crawlers/lib/platforms/github/query_repos_search.graphql
deleted file mode 100644
index e5ab97f..0000000
--- a/crawlers/lib/platforms/github/query_repos_search.graphql
+++ /dev/null
@@ -1,37 +0,0 @@
-query listRepos($queryString: String!, $cursor: String) {
-  rateLimit {
-    cost
-    remaining
-    resetAt
-  }
-  search(query: $queryString, type: REPOSITORY, first: 100, after: $cursor) {
-    repositoryCount
-    pageInfo {
-      startCursor
-      endCursor
-      hasNextPage
-    }
-    edges {
-      node {
-        ... on Repository {
-          id
-          name
-          createdAt
-          updatedAt
-          pushedAt
-          description
-          isArchived
-          isPrivate
-          url
-          owner {
-            login
-            id
-            __typename
-            url
-          }
-        }
-      }
-    }
-  }
-}
-
diff --git a/crawlers/lib/platforms/gitlab.py b/crawlers/lib/platforms/gitlab.py
index 7318e73..a99ae62 100644
--- a/crawlers/lib/platforms/gitlab.py
+++ b/crawlers/lib/platforms/gitlab.py
@@ -2,11 +2,15 @@
 import time
 from typing import List, Tuple
 
+from requests import ConnectionError, Timeout, TooManyRedirects
+from urllib3.exceptions import MaxRetryError
+
 from crawlers.constants import GITLAB_PER_PAGE_MAX, DEFAULT_REQUEST_TIMEOUT
 from crawlers.lib.platforms.i_crawler import ICrawler
 
 logger = logging.getLogger(__name__)
 
+
 class GitLabCrawler(ICrawler):
     type: str = 'gitlab'
 
@@ -29,7 +33,7 @@ def set_state(cls, state: dict = None) -> dict:
         state = super().set_state(state)
         return state
 
-    def handle_ratelimit(self, response = None):
+    def handle_ratelimit(self, response=None):
         if response:
             remaining = int(response.headers.get("RateLimit-Remaining", -1))
             reset_ts = int(response.headers.get("RateLimit-Reset", -1))
@@ -62,6 +66,10 @@ def crawl(self, state: dict = None) -> Tuple[bool, List[dict], dict]:
                     logger.warning(response.headers.__dict__)
                     return False, [], state  # nr.1 - we skip rest of this block, hope we get it next time
                 repos = response.json()
+            except (MaxRetryError, ConnectionError, Timeout, TooManyRedirects) as e:
+                logger.exception(f"{self} - crawler cannot reach hoster")
+                # we re-raise these, as we want to avoid returning empty results to the indexer
+                raise e
             except Exception as e:
                 logger.exception(f"(skipping block chunk) gitlab crawler crashed")
                 return False, [], state  # nr.2 - we skip rest of this block, hope we get it next time
diff --git a/crawlers/lib/platforms/i_crawler.py b/crawlers/lib/platforms/i_crawler.py
index 87a91f4..794dd81 100644
--- a/crawlers/lib/platforms/i_crawler.py
+++ b/crawlers/lib/platforms/i_crawler.py
@@ -4,7 +4,7 @@
 import requests
 import time
 from urllib.parse import urljoin
-from typing import List, Tuple
+from typing import List, Tuple, Union
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
 
@@ -42,7 +42,7 @@ def handle_ratelimit(self, response=None):
         time.sleep(CRAWLER_DEFAULT_THROTTLE)
 
     def crawl(self, state: dict = None) -> Tuple[bool, List[dict], dict]:
-        """ :return: success, repos, state """
+        """ :return: success, repos, state, Exception (if any) """
         raise NotImplementedError
 
     @staticmethod