Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 82 additions & 22 deletions crates/networking/p2p/sync/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ use std::cmp::min;
use std::sync::Arc;
use std::time::Duration;

use ethrex_blockchain::{BatchBlockProcessingFailure, Blockchain, error::ChainError};
use ethrex_blockchain::{
BatchBlockProcessingFailure, Blockchain,
error::{ChainError, InvalidBlockError},
};
use ethrex_common::{H256, types::Block};
use ethrex_storage::Store;
use tokio::time::Instant;
Expand Down Expand Up @@ -268,7 +271,9 @@ async fn add_blocks_in_batch(

/// Executes the given blocks and stores them
/// If sync_head_found is true, they will be executed one by one
/// If sync_head_found is false, they will be executed in a single batch
/// If sync_head_found is false, they will be executed in a single batch,
/// falling back to one-by-one pipeline execution if the batch fails with
/// a post-execution error (works around batch-mode state corruption bugs).
async fn add_blocks(
blockchain: Arc<Blockchain>,
blocks: Vec<Block>,
Expand All @@ -277,26 +282,81 @@ async fn add_blocks(
) -> Result<(), (ChainError, Option<BatchBlockProcessingFailure>)> {
// If we found the sync head, run the blocks sequentially to store all the blocks's state
if sync_head_found {
tokio::task::spawn_blocking(move || {
let mut last_valid_hash = H256::default();
for block in blocks {
let block_hash = block.hash();
blockchain.add_block_pipeline(block, None).map_err(|e| {
(
e,
Some(BatchBlockProcessingFailure {
last_valid_hash,
failed_block_hash: block_hash,
}),
)
})?;
last_valid_hash = block_hash;
}
Ok(())
})
return run_blocks_pipeline(blockchain, blocks).await;
}

// Try batch execution first (faster).
// We clone blocks because add_blocks_in_batch takes ownership but we need
// them for the fallback. The clone cost is negligible (~1-5ms) vs batch
// execution time (median ~29s on hoodi).
match blockchain
.add_blocks_in_batch(blocks.clone(), cancel_token)
.await
.map_err(|e| (ChainError::Custom(e.to_string()), None))?
} else {
blockchain.add_blocks_in_batch(blocks, cancel_token).await
{
Ok(()) => Ok(()),
Err((ChainError::InvalidBlock(ref err), ref batch_failure))
if is_post_execution_error(err) =>
{
// Batch execution can produce incorrect results due to cross-block
// state cache pollution (e.g. `mark_modified` setting `exists = true`
// leaking across block boundaries). Fall back to single-block pipeline
// execution which uses fresh state per block.
let failed_block_info = batch_failure
.as_ref()
.and_then(|f| {
blocks
.iter()
.find(|b| b.hash() == f.failed_block_hash)
.map(|b| format!("block {} ({})", b.header.number, f.failed_block_hash))
})
.unwrap_or_else(|| "unknown block".to_string());
warn!(
"Batch execution failed at {failed_block_info} with: {err}. \
Retrying batch with per-block pipeline execution."
);
run_blocks_pipeline(blockchain, blocks).await
}
Err(e) => Err(e),
}
}

/// Returns true for errors that arise from EVM execution and could differ
/// between batch mode (shared VM state) and single-block pipeline mode.
/// Pre-execution validation errors (header, body, structural) would fail
/// identically in both modes, so retrying them is pointless.
fn is_post_execution_error(err: &InvalidBlockError) -> bool {
matches!(
err,
InvalidBlockError::GasUsedMismatch(_, _)
| InvalidBlockError::StateRootMismatch
| InvalidBlockError::ReceiptsRootMismatch
| InvalidBlockError::RequestsHashMismatch
| InvalidBlockError::BlockAccessListHashMismatch
| InvalidBlockError::BlobGasUsedMismatch
)
Comment on lines +330 to +338
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the worry is invalid intermediate state, InvalidTransaction (which includes 'insufficient funds') could also be caused by bad state.

}
Comment on lines +329 to +339
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 InvalidTransaction missing from post-execution error filter

InvalidTransaction can originate from EVM execution (via EvmError::Transaction → ChainError::InvalidBlock(InvalidBlockError::InvalidTransaction(...))). If batch-mode state-cache corruption causes an account's nonce or balance to appear wrong mid-batch, the block's transaction will fail with InvalidTransaction, but is_post_execution_error returns false for it — so the pipeline fallback never fires. The same root cause (cross-block shared state) that produces StateRootMismatch can also produce InvalidTransaction, yet only the former triggers the retry.

Suggested change
fn is_post_execution_error(err: &InvalidBlockError) -> bool {
matches!(
err,
InvalidBlockError::GasUsedMismatch(_, _)
| InvalidBlockError::StateRootMismatch
| InvalidBlockError::ReceiptsRootMismatch
| InvalidBlockError::RequestsHashMismatch
| InvalidBlockError::BlockAccessListHashMismatch
| InvalidBlockError::BlobGasUsedMismatch
)
}
fn is_post_execution_error(err: &InvalidBlockError) -> bool {
matches!(
err,
InvalidBlockError::GasUsedMismatch(_, _)
| InvalidBlockError::StateRootMismatch
| InvalidBlockError::ReceiptsRootMismatch
| InvalidBlockError::RequestsHashMismatch
| InvalidBlockError::BlockAccessListHashMismatch
| InvalidBlockError::BlobGasUsedMismatch
| InvalidBlockError::InvalidTransaction(_)
)
}
Prompt To Fix With AI
This is a comment left during a code review.
Path: crates/networking/p2p/sync/full.rs
Line: 327-337

Comment:
**`InvalidTransaction` missing from post-execution error filter**

`InvalidTransaction` can originate from EVM execution (via `EvmError::Transaction → ChainError::InvalidBlock(InvalidBlockError::InvalidTransaction(...))`). If batch-mode state-cache corruption causes an account's nonce or balance to appear wrong mid-batch, the block's transaction will fail with `InvalidTransaction`, but `is_post_execution_error` returns `false` for it — so the pipeline fallback never fires. The same root cause (cross-block shared state) that produces `StateRootMismatch` can also produce `InvalidTransaction`, yet only the former triggers the retry.

```suggestion
fn is_post_execution_error(err: &InvalidBlockError) -> bool {
    matches!(
        err,
        InvalidBlockError::GasUsedMismatch(_, _)
            | InvalidBlockError::StateRootMismatch
            | InvalidBlockError::ReceiptsRootMismatch
            | InvalidBlockError::RequestsHashMismatch
            | InvalidBlockError::BlockAccessListHashMismatch
            | InvalidBlockError::BlobGasUsedMismatch
            | InvalidBlockError::InvalidTransaction(_)
    )
}
```

How can I resolve this? If you propose a fix, please make it concise.


async fn run_blocks_pipeline(
blockchain: Arc<Blockchain>,
blocks: Vec<Block>,
) -> Result<(), (ChainError, Option<BatchBlockProcessingFailure>)> {
Comment on lines +341 to +344
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should also use the cancel_token

tokio::task::spawn_blocking(move || {
let mut last_valid_hash = H256::default();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the first block fails, the 'last good' value is zero which is unintuitive.

for block in blocks {
let block_hash = block.hash();
blockchain.add_block_pipeline(block, None).map_err(|e| {
(
e,
Some(BatchBlockProcessingFailure {
last_valid_hash,
failed_block_hash: block_hash,
}),
)
})?;
last_valid_hash = block_hash;
}
Ok(())
})
.await
.map_err(|e| (ChainError::Custom(e.to_string()), None))?
}
Loading