diff --git a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java index 31be5d9d24ba97..118043297edd02 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java +++ b/src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java @@ -24,11 +24,14 @@ import static com.google.devtools.build.lib.remote.util.Utils.mergeBulkTransfer; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Sets; +import com.google.common.collect.SetMultimap; import com.google.common.flogger.GoogleLogger; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.Striped; import com.google.devtools.build.lib.actions.Action; import com.google.devtools.build.lib.actions.ActionExecutionMetadata; import com.google.devtools.build.lib.actions.ActionInput; @@ -60,10 +63,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; import java.util.function.Supplier; import javax.annotation.Nullable; @@ -206,6 +210,12 @@ void setOutputPermissions(Path dir) throws IOException { private final DirectoryTracker directoryTracker = new DirectoryTracker(); + // TreeArtifact downloads use temporary paths. Coordinate finalization and permission restoration + // by resolved tree root. Weak locks over the full hash space allow unrelated trees to finalize + // concurrently without retaining every lock for the lifetime of the prefetcher. + private final Striped treeArtifactLocks = + Striped.lazyWeakReadWriteLock(Integer.MAX_VALUE); + /** A symlink in the output tree that points to another artifact's absolute path. */ record Symlink(Path linkPath, Path targetPath) { Symlink { @@ -346,21 +356,18 @@ public ListenableFuture prefetchFilesInterruptibly( return immediateVoidFuture(); } - // Collect the set of directories whose output permissions must be set at the end of this call. - // This responsibility cannot lie with the downloading of an individual file, because multiple - // files may be concurrently downloaded into the same directory within a single call to - // prefetchFiles, and two concurrent calls to prefetchFiles may prefetch the same file. In the - // latter case, the second call will have its downloads deduplicated against the first call, but - // it must still synchronize on the output permissions having been set. - Set dirsWithOutputPermissions = Sets.newConcurrentHashSet(); + // Collect directories whose output permissions must be restored at the end of this call, + // grouped by tree root for synchronization. Keeping restoration call-scoped avoids toggling + // shared ancestors between child finalizations. A concurrent call for the same child may join + // the cached download but must still participate in permission restoration before returning. + SetMultimap directoriesByTreeRoot = HashMultimap.create(); // Using plain futures to avoid RxJava overheads. List> transfers = new ArrayList<>(files.size()); try (var s = Profiler.instance().profile("compose prefetches")) { for (var file : files) { transfers.add( - prefetchFile( - action, dirsWithOutputPermissions, metadataSupplier, file, priority, reason)); + prefetchFile(action, directoriesByTreeRoot, metadataSupplier, file, priority, reason)); } } @@ -373,10 +380,19 @@ public ListenableFuture prefetchFilesInterruptibly( mergedTransfer, unused -> { try { - // Set output permissions on tree artifact subdirectories, matching the behavior of - // SkyframeActionExecutor#checkOutputs for artifacts produced by local actions. - for (Path dir : dirsWithOutputPermissions) { - directoryTracker.setOutputPermissions(dir); + // Match the directory output permissions set by SkyframeActionExecutor#checkOutputs. + // Take the root write lock so restoration cannot make an ancestor read-only while any + // call holds a read lock to move a child into the tree. + for (var entry : directoriesByTreeRoot.asMap().entrySet()) { + Lock lock = treeArtifactLocks.get(entry.getKey().asFragment()).writeLock(); + lock.lock(); + try { + for (Path dir : entry.getValue()) { + directoryTracker.setOutputPermissions(dir); + } + } finally { + lock.unlock(); + } } } catch (IOException e) { return immediateFailedFuture(e); @@ -388,7 +404,7 @@ public ListenableFuture prefetchFilesInterruptibly( private ListenableFuture prefetchFile( @Nullable ActionExecutionMetadata action, - Set dirsWithOutputPermissions, + SetMultimap directoriesByTreeRoot, MetadataSupplier metadataSupplier, ActionInput input, Priority priority, @@ -436,7 +452,7 @@ private ListenableFuture prefetchFile( input, inputPath, treeRootPath, - dirsWithOutputPermissions, + directoriesByTreeRoot, input, metadata, priority, @@ -560,7 +576,7 @@ private Completable downloadFileNoCheckRx( ActionInput input, Path path, @Nullable Path treeRoot, - Set dirsWithOutputPermissions, + SetMultimap directoriesByTreeRoot, ActionInput actionInput, FileArtifactValue metadata, Priority priority, @@ -581,24 +597,27 @@ private Completable downloadFileNoCheckRx( return Completable.error(e); } + // Downloads are written to the actual host file system, not any overlays. + Path finalPath = path.forHostFileSystem(); + + ImmutableList.Builder dirsWithOutputPermissions = ImmutableList.builder(); if (treeRoot != null && actionInput instanceof Artifact artifact && artifact.isChildOfDeclaredDirectory()) { - // Arrange for the output permissions to be set on every directory inside the tree artifact. - // This must be done at assembly time to ensure that the permissions are set before the - // prefetchFiles call returns, even when the actual downloads are deduplicated against a - // concurrent call. See finalizeDownload for why we don't do so in other cases. - for (Path dir = path.getParentDirectory(); - dir.startsWith(treeRoot); + Path finalTreeRoot = treeRoot.forHostFileSystem(); + // Record directories child-to-root. Every ancestor must be explicitly reopened because + // ActionOutputDirectoryHelper caches existing directories and may otherwise leave a + // previously restored ancestor read-only. + for (Path dir = finalPath.getParentDirectory(); + dir.startsWith(finalTreeRoot); dir = dir.getParentDirectory()) { - if (!dirsWithOutputPermissions.add(dir)) { - break; - } + dirsWithOutputPermissions.add(dir); } } - - // Downloads should always be written to the "actual" host file system, not any overlays. - Path finalPath = path.forHostFileSystem(); + ImmutableList directories = dirsWithOutputPermissions.build(); + if (!directories.isEmpty()) { + directoriesByTreeRoot.putAll(directories.getLast(), directories); + } Completable download = usingTempPath( @@ -617,10 +636,7 @@ private Completable downloadFileNoCheckRx( .doOnComplete( () -> { finalizeDownload( - metadata, - tempPath.forHostFileSystem(), - finalPath, - dirsWithOutputPermissions); + metadata, tempPath.forHostFileSystem(), finalPath, directories); alreadyDeleted.set(true); })); @@ -637,40 +653,61 @@ private Completable downloadFileNoCheckRx( } private void finalizeDownload( - FileArtifactValue metadata, Path tmpPath, Path finalPath, Set dirsWithOutputPermissions) + FileArtifactValue metadata, + Path tmpPath, + Path finalPath, + ImmutableList dirsWithOutputPermissions) throws IOException { + // Set file output permissions, matching SkyframeActionExecutor#checkOutputs for artifacts + // produced by local actions. The temporary path is outside the output tree, so this can happen + // before entering the TreeArtifact root's critical section. + tmpPath.chmod(outputPermissions.getPermissionsMode()); + Path parentDir = checkNotNull(finalPath.getParentDirectory()); + // A read lock allows concurrent child moves, while preventing a prefetch call from restoring + // the tree's output permissions until every move in progress has completed. + @Nullable Lock treeArtifactLock = + dirsWithOutputPermissions.isEmpty() + ? null + : treeArtifactLocks + .get(dirsWithOutputPermissions.getLast().asFragment()) + .readLock(); + if (treeArtifactLock != null) { + treeArtifactLock.lock(); + } - // Compare as fragments since execRoot may be located on a file system overlaying the host - // file system where the download is written to. - if (finalPath.asFragment().startsWith(execRoot.asFragment())) { - // Ensure the parent directory exists and is writable. We cannot rely on this precondition to - // have been established by the execution of the owning action in a previous invocation, since - // the output tree may have been externally modified in between invocations. - if (dirsWithOutputPermissions.contains(parentDir)) { - // The file belongs to a tree artifact created by an action that declared an output - // directory (as opposed to an action template expansion). The output permissions should be - // set on the parent directory after prefetching. - directoryTracker.setTemporarilyWritable(parentDir); + try { + // Compare as fragments since execRoot may be located on a file system overlaying the host + // file system where the download is written to. + if (finalPath.asFragment().startsWith(execRoot.asFragment())) { + // Ensure the parent directory exists and is writable. We cannot rely on this precondition + // to have been established by the execution of the owning action in a previous invocation, + // since the output tree may have been externally modified in between invocations. + if (!dirsWithOutputPermissions.isEmpty()) { + for (Path dir : dirsWithOutputPermissions.reverse()) { + directoryTracker.setTemporarilyWritable(dir); + } + } else { + // One of the following must apply: + // (1) The file does not belong to a tree artifact. + // (2) The file belongs to a tree artifact created by an action template expansion. + // In case (1), the parent directory is a package or a subdirectory of a package, and + // should remain writable. In case (2), even though we arguably ought to set the output + // permissions on the parent directory to match local execution, we choose not to do it + // and avoid the additional implementation complexity required to detect a race condition + // between concurrent calls touching the same directory. + directoryTracker.setPermanentlyWritable(parentDir); + } } else { - // One of the following must apply: - // (1) The file does not belong to a tree artifact. - // (2) The file belongs to a tree artifact created by an action template expansion. - // In case (1), the parent directory is a package or a subdirectory of a package, and should - // remain writable. In case (2), even though we arguably ought to set the output permissions - // on the parent directory to match local execution, we choose not to do it and avoid the - // additional implementation complexity required to detect a race condition between - // concurrent calls touching the same directory. - directoryTracker.setPermanentlyWritable(parentDir); + parentDir.createDirectoryAndParents(); } - } else { - parentDir.createDirectoryAndParents(); - } - // Set output permissions on files, matching the behavior of SkyframeActionExecutor#checkOutputs - // for artifacts produced by local actions. - tmpPath.chmod(outputPermissions.getPermissionsMode()); - FileSystemUtils.moveFile(tmpPath, finalPath); + FileSystemUtils.moveFile(tmpPath, finalPath); + } finally { + if (treeArtifactLock != null) { + treeArtifactLock.unlock(); + } + } // Set the contents proxy when supported, to make future modification checks cheaper. metadata.setContentsProxy(FileContentsProxy.create(finalPath.stat())); diff --git a/src/test/java/com/google/devtools/build/lib/remote/ActionInputPrefetcherTestBase.java b/src/test/java/com/google/devtools/build/lib/remote/ActionInputPrefetcherTestBase.java index 5834322c1a8e46..5ecccc42b4e142 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/ActionInputPrefetcherTestBase.java +++ b/src/test/java/com/google/devtools/build/lib/remote/ActionInputPrefetcherTestBase.java @@ -76,6 +76,7 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; @@ -620,6 +621,46 @@ public void prefetchFiles_treeFiles_minimizeFilesystemOperations() throws Except verify(fs, never()).chmod(subdir, 0555); } + @Test + public void prefetchFiles_treeFiles_sequentialCalls_reopenAncestors() throws Exception { + Map metadata = new HashMap<>(); + Map cas = new HashMap<>(); + Pair> treeAndChildren = + createRemoteTreeArtifact( + "dir", + /* localContentMap= */ ImmutableMap.of(), + /* remoteContentMap= */ ImmutableMap.of( + "subdir1/file1", "content1", "subdir2/file2", "content2"), + metadata, + cas); + SpecialArtifact tree = treeAndChildren.getFirst(); + ImmutableList children = treeAndChildren.getSecond(); + AbstractActionInputPrefetcher prefetcher = createPrefetcher(cas); + + // The first call leaves the directory helper's cache populated but the tree read-only. The + // second call must explicitly reopen the cached root before creating a sibling directory, then + // restore its output permissions. + wait( + prefetcher.prefetchFilesInterruptibly( + action, + ImmutableList.of(children.get(0)), + metadata::get, + Priority.MEDIUM, + Reason.INPUTS)); + assertTreeReadableNonWritableAndExecutable(tree.getPath()); + + wait( + prefetcher.prefetchFilesInterruptibly( + action, + ImmutableList.of(children.get(1)), + metadata::get, + Priority.MEDIUM, + Reason.INPUTS)); + assertThat(FileSystemUtils.readContent(children.get(1).getPath(), UTF_8)) + .isEqualTo("content2"); + assertTreeReadableNonWritableAndExecutable(tree.getPath()); + } + @Test public void prefetchFiles_treeFiles_multipleThreads_waitForPermissionsToBeSet() throws Exception { Map metadata = new HashMap<>(); @@ -666,6 +707,168 @@ public void prefetchFiles_treeFiles_multipleThreads_waitForPermissionsToBeSet() f2.get(); } + @Test + public void prefetchFiles_treeFiles_concurrentFinalizations_doNotRacePermissions() + throws Exception { + Map metadata = new HashMap<>(); + Map cas = new HashMap<>(); + Pair> treeAndChildren = + createRemoteTreeArtifact( + "dir", + /* localContentMap= */ ImmutableMap.of(), + /* remoteContentMap= */ ImmutableMap.of( + "subdir1/file1", "content1", "subdir2/file2", "content2"), + metadata, + cas); + SpecialArtifact tree = treeAndChildren.getFirst(); + ImmutableList children = treeAndChildren.getSecond(); + + SettableFuture firstDownload = SettableFuture.create(); + SettableFuture secondDownload = SettableFuture.create(); + LinkedBlockingQueue> downloads = new LinkedBlockingQueue<>(); + downloads.add(firstDownload); + downloads.add(secondDownload); + AbstractActionInputPrefetcher prefetcher = spy(createPrefetcher(cas)); + mockDownload(prefetcher, cas, downloads::remove); + + ListenableFuture firstPrefetch = + prefetcher.prefetchFilesInterruptibly( + action, + ImmutableList.of(children.get(0)), + metadata::get, + Priority.MEDIUM, + Reason.INPUTS); + ListenableFuture secondPrefetch = + prefetcher.prefetchFilesInterruptibly( + action, + ImmutableList.of(children.get(1)), + metadata::get, + Priority.MEDIUM, + Reason.INPUTS); + + Semaphore secondFinalizationStarted = new Semaphore(0); + Semaphore secondFinalizationMayContinue = new Semaphore(0); + AtomicBoolean blockNextTempFileMove = new AtomicBoolean(true); + // Pause the second finalization after it has made the tree writable, but before it moves its + // temporary file into the tree. + doAnswer( + invocation -> { + PathFragment source = invocation.getArgument(0); + PathFragment target = invocation.getArgument(1); + if (source.startsWith(tempPathGenerator.getTempDir().asFragment()) + && target.startsWith(tree.getPath().asFragment()) + && blockNextTempFileMove.getAndSet(false)) { + secondFinalizationStarted.release(); + secondFinalizationMayContinue.acquireUninterruptibly(); + } + return invocation.callRealMethod(); + }) + .when(fs) + .renameTo(any(), any()); + Thread secondCompletion = new Thread(() -> secondDownload.set(null)); + // Completing the first download runs its direct-executor finalization on this thread. Its move + // may proceed concurrently with the paused move, but its permission restoration must wait for + // the paused finalization to release its read lock. + Thread firstCompletion = new Thread(() -> firstDownload.set(null)); + secondCompletion.start(); + try { + assertThat(secondFinalizationStarted.tryAcquire(10, SECONDS)).isTrue(); + firstCompletion.start(); + + long deadline = System.nanoTime() + SECONDS.toNanos(10); + while (!children.get(0).getPath().exists() && System.nanoTime() < deadline) { + Thread.yield(); + } + assertThat(children.get(0).getPath().exists()).isTrue(); + assertThat(firstPrefetch.isDone()).isFalse(); + assertThat(tree.getPath().isWritable()).isTrue(); + } finally { + secondFinalizationMayContinue.release(); + firstCompletion.join(10_000); + secondCompletion.join(10_000); + } + + wait(firstPrefetch); + wait(secondPrefetch); + assertTreeReadableNonWritableAndExecutable(tree.getPath()); + } + + @Test + public void prefetchFiles_treeFiles_manyConcurrentOverlappingCalls() throws Exception { + int treeCount = 4; + int childrenPerTree = 16; + Map metadata = new HashMap<>(); + Map cas = new HashMap<>(); + ImmutableList.Builder trees = ImmutableList.builder(); + ImmutableList.Builder> childrenByTree = + ImmutableList.builder(); + for (int treeIndex = 0; treeIndex < treeCount; treeIndex++) { + ImmutableMap.Builder remoteContent = ImmutableMap.builder(); + for (int childIndex = 0; childIndex < childrenPerTree; childIndex++) { + remoteContent.put("subdir-" + childIndex + "/file", "content"); + } + Pair> treeAndChildren = + createRemoteTreeArtifact( + "dir-" + treeIndex, + /* localContentMap= */ ImmutableMap.of(), + remoteContent.buildOrThrow(), + metadata, + cas); + trees.add(treeAndChildren.getFirst()); + childrenByTree.add(treeAndChildren.getSecond()); + } + ImmutableList treeList = trees.build(); + ImmutableList> childLists = childrenByTree.build(); + AbstractActionInputPrefetcher prefetcher = createPrefetcher(cas); + + int concurrency = 32; + int requestCount = 128; + CountDownLatch requestsReady = new CountDownLatch(concurrency); + CountDownLatch startRequests = new CountDownLatch(1); + ThreadPoolExecutor pool = + new ThreadPoolExecutor( + concurrency, concurrency, 0, SECONDS, new LinkedBlockingQueue<>()); + ImmutableList.Builder> requests = ImmutableList.builder(); + try { + for (int requestIndex = 0; requestIndex < requestCount; requestIndex++) { + int treeIndex = requestIndex % treeCount; + int childIndex = (requestIndex / treeCount) % childrenPerTree; + ImmutableList children = childLists.get(treeIndex); + ImmutableList inputs = + ImmutableList.of( + children.get(childIndex), children.get((childIndex + 1) % childrenPerTree)); + requests.add( + pool.submit( + () -> { + requestsReady.countDown(); + startRequests.await(); + wait( + prefetcher.prefetchFilesInterruptibly( + action, inputs, metadata::get, Priority.MEDIUM, Reason.INPUTS)); + return null; + })); + } + + boolean allRequestsReady = requestsReady.await(10, SECONDS); + startRequests.countDown(); + assertThat(allRequestsReady).isTrue(); + for (Future request : requests.build()) { + request.get(30, SECONDS); + } + } finally { + startRequests.countDown(); + pool.shutdownNow(); + pool.awaitTermination(10, SECONDS); + } + + for (int treeIndex = 0; treeIndex < treeCount; treeIndex++) { + for (TreeFileArtifact child : childLists.get(treeIndex)) { + assertThat(FileSystemUtils.readContent(child.getPath(), UTF_8)).isEqualTo("content"); + } + assertTreeReadableNonWritableAndExecutable(treeList.get(treeIndex).getPath()); + } + } + @Test public void prefetchFiles_multipleThreads_downloadIsCancelled() throws Exception { // Test shared downloads are cancelled if all threads/callers are interrupted