Skip to content
64 changes: 52 additions & 12 deletions git_hg_sync/repo_synchronizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ def sync(
logger.info(f"Syncing {operations} to {destination_url} ...")
try:
repo = self.get_clone_repo()
except PermissionError as exc:
except PermissionError as e:
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a from git import Repo, exc above, which I'd rather not shadow.

raise PermissionError(
f"Failed to create local clone from {destination_url}"
) from exc
) from e

destination_remote = f"hg::{destination_url}"

Expand All @@ -83,21 +83,57 @@ def sync(
op for op in operations if isinstance(op, SyncBranchOperation)
]
for branch_operation in branch_ops:
try:
push_args.append(
f"{branch_operation.source_commit}:refs/heads/branches/{branch_operation.destination_branch}/tip"
)
except Exception as e:
raise RepoSyncError(branch_operation, e) from e
# Here, we use `<BRANCH>/<SHA1>` rather than `tip` to work around inherent
# limitations in the mapping between Git and Hg references.
#
# We could use `<BRANCH>/tip`, which would work in most cases. However, when
# reprocessing old messages (as is sometimes necessary to recover from
# issues), we may find ourselves processing a push for a commit which is now
# an ancestor of the current `tip`. In this situation, git would refuse to
# push, claiming it's not a fast-forward.
#
# To handle this case, we push each commit to a separate reference matching
# their own SHA1. Those references only exist on the git side, so their name
# doesn't impact what gets created on the Mercurial side. [The name of the
# branch matters with `cinnabar.experiments=branch`, but not the name of the
# final reference.]
#
# Mercurial maintains `tip` automatically to be the latest new commit (and
# we only allow single heads on pushable repositories, which guarantees it's
# the furthest from the root).
#
destination_ref = f"refs/heads/branches/{branch_operation.destination_branch}/${branch_operation.source_commit}"
# We only push the commit if it's not already present, because Mercurial
# refuses pushes which don't change anything.
if self._commit_has_mercurial_metadata(
repo, branch_operation.source_commit
):
# Resolving the HG SHA is not sufficient, because we may know it from
# another repository, so we need to make sure it's not already present here.
hg_sha = self._git2hg(repo, branch_operation.source_commit)
if hg_sha not in repo.git.execute(
[
"git",
"ls-remote",
destination_remote,
f"refs/heads/branches/{branch_operation.destination_branch}/{hg_sha}",
],
stdout_as_string=True,
):
logger.info(
f"Commit {branch_operation.source_commit} is already present on {destination_remote}, skipping ..."
)
continue
push_args.append(f"{branch_operation.source_commit}:{destination_ref}")

os.environ[REQUEST_USER_ENV_VAR] = request_user
logger.debug(f"{REQUEST_USER_ENV_VAR} set to {request_user}")

# Add mercurial metadata to new commits from synced branches
# Some of these commits could be tagged in the same synchronization and
# tagging can only be done on a commit that already have mercurial
# metadata
if branch_ops:
# metadata.
if len(push_args) > 1:
retry(
"adding mercurial metadata to git commits",
lambda: repo.git.execute(
Expand All @@ -117,7 +153,7 @@ def sync(
for tag_operation in tag_ops:
tag_branch = tag_operation.tags_destination_branch
remote_tag_ref = f"refs/heads/branches/{tag_branch}/tip"
if repo.git.execute(
if remote_tag_ref in repo.git.execute(
["git", "ls-remote", destination_remote, remote_tag_ref],
stdout_as_string=True,
):
Expand Down Expand Up @@ -203,7 +239,11 @@ def _ensure_cinnabar_metadata(self, repo: Repo, destination_remote: str) -> None

# This is needed only on first initialisation of the repository, as subsequent
# pushes update the metadata locally.

#
# WARNING: While we make a direct reference to `refs/cinnabar` here, it MUST NOT
# be used explicitely in subsequent git operations. This set of references get
Comment thread
shtrom marked this conversation as resolved.
Outdated
# updated on every `fetch`, and is therefore not stable enough to be trusted.
#
# Repo.git_dir is a PathLike union which is either a str, or a smarter thing. We
# assume the less smart one.
cinnabar_metadata_dir = Path(repo.git_dir) / "refs/cinnabar/metadata"
Expand Down
41 changes: 41 additions & 0 deletions tests/test_repo_synchronizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,47 @@ def test_sync_process_(
assert hg_rev(hg_destination, branch) in tag_log


def test_sync_process_ancestor(
git_source: Repo,
hg_destination: Path,
tmp_path: Path,
) -> None:
branch = "bar"

repo = Repo(git_source)

# Create a new commit on git repo
bar_path = git_source / "bar.txt"
bar_path.write_text("BAR CONTENT")
repo.index.add([bar_path])
git_commit_sha1 = repo.index.commit("add bar.txt").hexsha

baz_path = git_source / "baz.txt"
baz_path.write_text("BAZ CONTENT")
repo.index.add([baz_path])
git_commit_sha2 = repo.index.commit("add baz.txt").hexsha

# Sync new commit with mercurial repository
git_local_repo_path = tmp_path / "clones" / "myrepo"
syncrepos = RepoSynchronizer(git_local_repo_path, str(git_source))
operations: list[SyncBranchOperation | SyncTagOperation] = [
SyncBranchOperation(source_commit=git_commit_sha2, destination_branch=branch),
]

request_user = "request_user@example.com"
syncrepos.sync(str(hg_destination), operations, request_user)

# Sync an earlier commit.
operations: list[SyncBranchOperation | SyncTagOperation] = [
SyncBranchOperation(source_commit=git_commit_sha1, destination_branch=branch),
]
syncrepos.sync(str(hg_destination), operations, request_user)

# test
assert "BAR CONTENT" in hg_cat(hg_destination, "bar.txt", branch)
assert "BAZ CONTENT" in hg_cat(hg_destination, "baz.txt", branch)


def test_sync_process_duplicate_tags(
git_source: Repo,
hg_destination: Path,
Expand Down