Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions node-src/git/git.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
findFilesFromRepositoryRoot,
getBranch,
getChangedFiles,
getChangedFilesWithStatus,
getCommit,
getCommittedFileCount,
getNumberOfCommitters,
Expand Down Expand Up @@ -203,6 +204,63 @@ describe('getChangedFiles', () => {
});
});

describe('getChangedFilesWithStatus', () => {
it('parses single-path rows (added, modified, deleted) into status objects', async () => {
execGitCommand.mockResolvedValue(
['A\tpackage.json', 'M\tsrc/index.ts', 'D\told.ts'].join('\n')
);

const result = await getChangedFilesWithStatus(ctx, 'abc123', 'HEAD');

expect(result).toEqual([
{ status: 'added', path: 'package.json' },
{ status: 'modified', path: 'src/index.ts' },
{ status: 'deleted', path: 'old.ts' },
]);
expect(execGitCommand).toHaveBeenCalledWith(
ctx,
'git diff --name-status --find-renames=20% abc123 HEAD'
);
});

it('parses a rename row into a renamed status carrying both paths', async () => {
execGitCommand.mockResolvedValue('R097\tlibs/old/package.json\tlibs/app/package.json\n');

const result = await getChangedFilesWithStatus(ctx, 'abc123', 'HEAD');

expect(result).toEqual([
{ status: 'renamed', fromPath: 'libs/old/package.json', path: 'libs/app/package.json' },
]);
});

it('maps an unrecognized status code to unknown', async () => {
execGitCommand.mockResolvedValue('X\tweird.txt\n');

const result = await getChangedFilesWithStatus(ctx, 'abc123', 'HEAD');

expect(result).toEqual([{ status: 'unknown', path: 'weird.txt' }]);
});

it('returns an empty array when there are no changes', async () => {
execGitCommand.mockResolvedValue('');

const result = await getChangedFilesWithStatus(ctx, 'abc123', 'HEAD');

expect(result).toEqual([]);
});

it('appends a quoted pathspec when one is provided', async () => {
execGitCommand.mockResolvedValue('');

await getChangedFilesWithStatus(ctx, 'abc123', 'HEAD', ':(glob,top)**/pnpm-lock.yaml');

expect(execGitCommand).toHaveBeenCalledWith(
ctx,
'git diff --name-status --find-renames=20% abc123 HEAD -- ":(glob,top)**/pnpm-lock.yaml"'
);
});
});

describe('findFilesFromRepositoryRoot', () => {
it('finds files relative to the repository root', async () => {
const filesFound = ['package.json', 'another/package/package.json'];
Expand Down
67 changes: 67 additions & 0 deletions node-src/git/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,73 @@ export async function getChangedFiles(deps: GitDeps, baseCommit: string, headCom
}
}

/** The kind of change Git reported for a file in a `--name-status` diff. */
export type ChangedFileStatus =
| 'added'
| 'deleted'
| 'modified'
| 'renamed'
| 'typeChanged'
| 'unknown';

/** A single row of a `--name-status` diff, parsed into a usable shape. */
export interface ChangedFileWithStatus {
/** The kind of change Git reported. */
status: ChangedFileStatus;
/** The file's current path (the new path for renames). */
path: string;
/** The original path for renames; otherwise undefined. */
fromPath?: string;
}

// Maps the leading character of a `--name-status` row to a readable status. Renames carry a
// similarity score suffix (e.g. `R097`), so we only key off the first character.
const NAME_STATUS_CODES: Record<string, ChangedFileStatus> = {
A: 'added',
D: 'deleted',
M: 'modified',
R: 'renamed',
T: 'typeChanged',
};

/**
* Get the name-status diff of a single commit or between two, with rename detection, parsed into
* one object per changed file.
*
* @param deps Function dependencies.
* @param baseCommit The base commit to diff from.
* @param headCommit The head commit to diff to (empty includes uncommitted changes).
* @param pathspec Optional pathspec limiting the diff (e.g. a `:(glob)` magic pathspec).
* See: https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-pathspec for details on
* magic pathspecs.
*
* @returns One {@link ChangedFileWithStatus} per changed file.
*/
export async function getChangedFilesWithStatus(
deps: GitDeps,
baseCommit: string,
headCommit = '',
pathspec = ''
): Promise<ChangedFileWithStatus[]> {
const pathspecArgument = pathspec ? ` -- "${pathspec}"` : '';
const output = await execGitCommand(
deps,
`git diff --name-status --find-renames=20% ${baseCommit} ${headCommit}${pathspecArgument}`

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getChangedFiles uses --no-relative in its git diff command. I think we need that here or running from a subdirectory would have bizarre behavior. Probably use --no-pager too, for consistency.

);

return output
.split(newline)
.filter(Boolean)
.map((line) => {
const [code, ...paths] = line.split('\t');
const status = NAME_STATUS_CODES[code[0]] ?? 'unknown';
// Rename rows carry the old path first, then the new path.
return status === 'renamed'
? { status, fromPath: paths[0], path: paths.at(-1) as string }
: { status, path: paths[0] };
});
}

/**
* Returns a boolean indicating whether the workspace is up-to-date (neither ahead nor behind) with
* the remote. Returns true on error, assuming the workspace is up-to-date.
Expand Down
17 changes: 15 additions & 2 deletions node-src/lib/turbosnap/captureBailException.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,28 @@ import * as Sentry from '@sentry/node';
* @param options Bail context.
* @param options.bailSubreason The classified subreason, or `undefined` to use default Sentry grouping.
* @param options.bailPath A stable identifier for the bail site (used as the `bail_path` tag).
* @param options.additionalTags Extra tags to attach, e.g. a refined root-cause classification.
*
* @returns The Sentry event ID for the captured exception.
*/
export function captureBailException(
error: unknown,
{ bailSubreason, bailPath }: { bailSubreason: string | undefined; bailPath: string }
{
bailSubreason,
bailPath,
additionalTags,
}: {
bailSubreason: string | undefined;
bailPath: string;
additionalTags?: Record<string, string>;
}
): string {
return Sentry.captureException(error, {
tags: { bail_path: bailPath, ...(bailSubreason && { bail_detail: bailSubreason }) },
tags: {
bail_path: bailPath,
...(bailSubreason && { bail_detail: bailSubreason }),
...additionalTags,
},
...(bailSubreason && { fingerprint: [bailSubreason] }),
});
}
102 changes: 102 additions & 0 deletions node-src/lib/turbosnap/classifyBailRootCause.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { describe, expect, it, vi } from 'vitest';

import { getChangedFilesWithStatus as getChangedFilesWithStatusDep } from '../../git/git';
import TestLogger from '../testLogger';
import { classifyTagsFromError } from './classifyBailRootCause';
import { BaselineCheckoutFailedError } from './errors';

vi.mock('../../git/git');
const getChangedFilesWithStatus = vi.mocked(getChangedFilesWithStatusDep);

const deps = { log: new TestLogger(), options: {} } as any;

describe('classifyTagsFromError', () => {
it('returns undefined for an error that is not classified yet', async () => {
const result = await classifyTagsFromError(
deps,
new Error("some random error which won't have special handling")
);

expect(result).toBeUndefined();
expect(getChangedFilesWithStatus).not.toHaveBeenCalled();
});

describe('BaselineCheckoutFailedError', () => {
it('classifies a file rename as "baselineManifestMoved"', async () => {
getChangedFilesWithStatus.mockResolvedValue([
{ status: 'renamed', fromPath: 'libs/old/package.json', path: 'libs/app/package.json' },
]);

const result = await classifyTagsFromError(
deps,
new BaselineCheckoutFailedError('abc123:libs/app/package.json')
);

expect(result).toEqual({ baseline_failure_kind: 'baselineManifestMoved' });
});

it('classifies a file add as "baselineManifestAdded"', async () => {
getChangedFilesWithStatus.mockResolvedValue([
{ status: 'added', path: 'libs/app/package.json' },
]);

const result = await classifyTagsFromError(
deps,
new BaselineCheckoutFailedError('abc123:libs/app/package.json')
);

expect(result).toEqual({ baseline_failure_kind: 'baselineManifestAdded' });
});

it('classifies a file rename as "baselineManifestMoved" over "baselineManifestAdded" when the diff has both rows', async () => {
getChangedFilesWithStatus.mockResolvedValue([
{ status: 'added', path: 'libs/app/package.json' },
{ status: 'renamed', fromPath: 'libs/old/package.json', path: 'libs/app/package.json' },
]);

const result = await classifyTagsFromError(
deps,
new BaselineCheckoutFailedError('abc123:libs/app/package.json')
);

expect(result).toEqual({ baseline_failure_kind: 'baselineManifestMoved' });
});

it('returns "unknownBaselineCheckoutFailure" when we cannot classify the change', async () => {
getChangedFilesWithStatus.mockResolvedValue([
{ status: 'modified', path: 'some/other/package.json' },
]);

const result = await classifyTagsFromError(
deps,
new BaselineCheckoutFailedError('abc123:libs/app/package.json')
);

expect(result).toEqual({ baseline_failure_kind: 'unknownBaselineCheckoutFailure' });
});

it('returns "unknownBaselineCheckoutFailure" when a git command throws unexpectedly', async () => {
getChangedFilesWithStatus.mockRejectedValue(new Error('git diff blew up'));

const result = await classifyTagsFromError(
deps,
new BaselineCheckoutFailedError('abc123:libs/app/package.json')
);

expect(result).toEqual({ baseline_failure_kind: 'unknownBaselineCheckoutFailure' });
});

it('splits the pathspec on the first colon so a filename containing colons still matches', async () => {
getChangedFilesWithStatus.mockResolvedValue([
{ status: 'renamed', fromPath: 'libs/old/a:b.json', path: 'libs/app/a:b.json' },
]);

const result = await classifyTagsFromError(
deps,
new BaselineCheckoutFailedError('abc123:libs/app/a:b.json')
);

expect(result).toEqual({ baseline_failure_kind: 'baselineManifestMoved' });
});
});
});
119 changes: 119 additions & 0 deletions node-src/lib/turbosnap/classifyBailRootCause.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import * as Sentry from '@sentry/node';
import path from 'path';

import { GitDeps } from '../../git/execGit';
import { ChangedFileWithStatus, getChangedFilesWithStatus } from '../../git/git';
import { BaselineCheckoutFailedError } from './errors';

/**
* Classify an error captured at the TurboSnap bail site into additional Sentry tags describing its
* root cause.
*
* @param deps Function dependencies.
* @param error The error captured at the bail site.
*
* @returns A map of Sentry tags, or `undefined` when the error type is not recognized.
*/
export async function classifyTagsFromError(
deps: GitDeps,
error: unknown
): Promise<Record<string, string> | undefined> {
if (error instanceof BaselineCheckoutFailedError) {
return classifyBaselineCheckoutFailureTags(deps, error);
}

return undefined;
}

/**
* Refined root-cause kinds for a BaselineCheckoutFailedError.
*
* These exist for Sentry ONLY and are not sent to the backend.
*/
export type BaselineCheckoutFailureKind =
| 'baselineManifestMoved'
| 'baselineManifestAdded'
| 'unknownBaselineCheckoutFailure';

/**
* Check git to determine the root cause of a BaselineCheckoutFailedError.
*
* @param deps Function dependencies.
* @param error The error captured at the bail site.
*
* @returns The refined failure kind.
*/
async function classifyBaselineCheckoutFailureTags(
deps: GitDeps,
error: BaselineCheckoutFailedError
): Promise<Record<'baseline_failure_kind', BaselineCheckoutFailureKind>> {
const { reference, fileName } = parsePathspec(error.pathspec);

try {
// Determine how the file changed between the baseline and HEAD (such as if it was added, moved,
// renamed, etc).
const basename = path.basename(fileName);
const changes = await getChangedFilesWithStatus(

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this will fail if we have a shallow clone because the git diff call inside getChangedFilesWithStatus will fail. We already have a commitExists function. Should we use that at the start of classifyBaselineCheckoutFailureTags to see if the commit doesn't exist, and if it doesn't, throw a new bail reason?

I know there's been talk around shallow clones lately, so they might be handled elsewhere, making this a moot point. But I can't recall for sure, so figured I'd raise it.

deps,
reference,
'HEAD',
// See https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-pathspec for details
//
// `glob` allows us to pass in a glob pattern
// `top` anchors the response to the root of the repository (so we can catch cross-directory changes)
`:(glob,top)**/${basename}`
);
return { baseline_failure_kind: classifyManifestChange(changes, fileName) };
} catch (err) {
// We capture the exception higher in the stack so we can simply attach the error context here.
Sentry.addBreadcrumb({
level: 'error',
category: 'classifyBaselineCheckoutFailure',
message: 'Error classifying baseline checkout failure',
data: {
error: err,

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any concern about serialization of the error here? Would String(err) or message + stack be better?

},
});

return { baseline_failure_kind: 'unknownBaselineCheckoutFailure' };
}
}

/**
* Determine if the file was moved or added or something else.
*
* @param changes The parsed changed files from the diff.
* @param fileName The failed file path to match against.
*
* @returns The matching manifest failure kind.
*/
function classifyManifestChange(
changes: ChangedFileWithStatus[],
fileName: string
): BaselineCheckoutFailureKind {
// Check renames first so a moved manifest is never mislabeled as added.
if (changes.some((change) => change.status === 'renamed' && change.path === fileName)) {
return 'baselineManifestMoved';
}
if (changes.some((change) => change.status === 'added' && change.path === fileName)) {
return 'baselineManifestAdded';
}
return 'unknownBaselineCheckoutFailure';
}

/**
* Split a pathspec of the form `<reference>:<fileName>` into its parts.
*
* @param pathspec The pathspec carried by a `BaselineCheckoutFailedError`.
*
* @returns The baseline reference (SHA) and the file path.
*/
function parsePathspec(pathspec: string): { reference: string; fileName: string } {
// Find the first colon which separates the reference from the file path. We can't just split on
// colon and return the first and second parts since files can contain colons.
const separatorIndex = pathspec.indexOf(':');
return {
reference: pathspec.slice(0, separatorIndex),
fileName: pathspec.slice(separatorIndex + 1),
};
}
Loading
Loading