Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/Document/CrossReference/CrossReferenceSourceParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

namespace PrinsFrank\PdfParser\Document\CrossReference;

use PrinsFrank\PdfParser\Document\CrossReference\RawStream\ObjectPositionsFromRawStreamParser;
use PrinsFrank\PdfParser\Document\CrossReference\Source\CrossReferenceSource;
use PrinsFrank\PdfParser\Document\CrossReference\Source\RecoveredCrossReferenceSource;
use PrinsFrank\PdfParser\Document\CrossReference\Stream\CrossReferenceStreamParser;
use PrinsFrank\PdfParser\Document\CrossReference\Table\CrossReferenceTableParser;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryKey\DictionaryKey;
Expand Down Expand Up @@ -80,7 +82,15 @@ public static function parse(Stream $stream): CrossReferenceSource {
$crossReferenceSections[] = $currentCrossReferenceSection;
}

return new CrossReferenceSource(... $crossReferenceSections);
$crossReferenceSource = new CrossReferenceSource(... $crossReferenceSections);
if ($crossReferenceSource->hasInvalidByteOffset($stream)) {
return new RecoveredCrossReferenceSource(
ObjectPositionsFromRawStreamParser::parse($stream),
...$crossReferenceSections,
);
}

return $crossReferenceSource;
}

private static function getCrossReferenceType(Stream $stream, int $byteOffsetLastCrossReferenceSection, int $byteOffsetEndOfCurrentLine): ?CrossReferenceType {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?php declare(strict_types=1);

namespace PrinsFrank\PdfParser\Document\CrossReference\RawStream;

use PrinsFrank\PdfParser\Stream\Stream;

class ObjectPositionsFromRawStreamParser {
/** @return array<int, int> */
public static function parse(Stream $stream): array {
$inObjNr = $inObjGenerationNumber = $pendingObjMarker = false;
$startObjNrOffset = $objNrBuffer = $objMarkerBuffer = null;
$discoveredObjects = [];
foreach ($stream->chars(0, $stream->getSizeInBytes()) as $byteOffset => $char) {
if ($char === ' ') {
if ($inObjNr === true) {
$inObjNr = false;
$inObjGenerationNumber = true;
} elseif ($inObjGenerationNumber === true) {
$inObjGenerationNumber = false;
$pendingObjMarker = true;
} else {
$inObjNr = $inObjGenerationNumber = $pendingObjMarker = false;
$startObjNrOffset = $objNrBuffer = $objMarkerBuffer = null;
}
} else if ($char === '0'
|| $char === '1'
|| $char === '2'
|| $char === '3'
|| $char === '4'
|| $char === '5'
|| $char === '6'
|| $char === '7'
|| $char === '8'
|| $char === '9') {
if ($pendingObjMarker === true) {
$pendingObjMarker = false;
$objNrBuffer = null;
} elseif ($inObjGenerationNumber === true) {
} elseif ($inObjNr === false) {
$inObjNr = true;
$startObjNrOffset = $byteOffset;
$objNrBuffer = $char;
} elseif ($inObjNr === true) {
$objNrBuffer .= $char;
}
} elseif ($pendingObjMarker === true) {
if ($objMarkerBuffer === null && $char === 'o') {

Check failure on line 47 in src/Document/CrossReference/RawStream/ObjectPositionsFromRawStreamParser.php

View workflow job for this annotation

GitHub Actions / quality / PHP Stan

Strict comparison using === between null and null will always evaluate to true.
$objMarkerBuffer = $char;
} elseif ($objMarkerBuffer === 'o' && $char === 'b') {

Check failure on line 49 in src/Document/CrossReference/RawStream/ObjectPositionsFromRawStreamParser.php

View workflow job for this annotation

GitHub Actions / quality / PHP Stan

Strict comparison using === between null and 'o' will always evaluate to false.

Check failure on line 49 in src/Document/CrossReference/RawStream/ObjectPositionsFromRawStreamParser.php

View workflow job for this annotation

GitHub Actions / quality / PHP Stan

Result of && is always false.
$objMarkerBuffer .= $char;
} elseif ($objMarkerBuffer === 'ob' && $char === 'j') {

Check failure on line 51 in src/Document/CrossReference/RawStream/ObjectPositionsFromRawStreamParser.php

View workflow job for this annotation

GitHub Actions / quality / PHP Stan

Strict comparison using === between null and 'ob' will always evaluate to false.

Check failure on line 51 in src/Document/CrossReference/RawStream/ObjectPositionsFromRawStreamParser.php

View workflow job for this annotation

GitHub Actions / quality / PHP Stan

Result of && is always false.
$discoveredObjects[$startObjNrOffset] = (int) $objNrBuffer;

Check failure on line 52 in src/Document/CrossReference/RawStream/ObjectPositionsFromRawStreamParser.php

View workflow job for this annotation

GitHub Actions / quality / PHP Stan

Possibly invalid array key type int|null.
$inObjNr = $inObjGenerationNumber = $pendingObjMarker = false;
$startObjNrOffset = $objNrBuffer = $objMarkerBuffer = null;
} else {
$inObjNr = $inObjGenerationNumber = $pendingObjMarker = false;
$startObjNrOffset = $objNrBuffer = $objMarkerBuffer = null;
}
} else {
$inObjNr = $inObjGenerationNumber = $pendingObjMarker = false;
$startObjNrOffset = $objNrBuffer = $objMarkerBuffer = null;
}
}

return $discoveredObjects;
}
}
11 changes: 11 additions & 0 deletions src/Document/CrossReference/Source/CrossReferenceSource.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Name\NameValue;
use PrinsFrank\PdfParser\Document\Dictionary\DictionaryValue\Reference\ReferenceValue;
use PrinsFrank\PdfParser\Exception\ParseFailureException;
use PrinsFrank\PdfParser\Stream\Stream;

/** Can be both from a crossReferenceTable or a crossReferenceStream */
class CrossReferenceSource {
Expand Down Expand Up @@ -75,4 +76,14 @@ public function getFirstId(): string {

return $firstId;
}

public function hasInvalidByteOffset(Stream $stream): bool {
foreach ($this->crossReferenceSections as $crossReferenceSection) {
if ($crossReferenceSection->hasInvalidByteOffset($stream)) {
return true;
}
}

return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php declare(strict_types=1);

namespace PrinsFrank\PdfParser\Document\CrossReference\Source;

use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\CrossReferenceSection;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryCompressed;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryInUseObject;

class RecoveredCrossReferenceSource extends CrossReferenceSource {
/**
* @param array<int, int> $recoveredByteOffsetMap where the key is the byte offset and the value the object nr
* @no-named-arguments
*/
public function __construct(
private array $recoveredByteOffsetMap,
CrossReferenceSection... $crossReferenceSections,
) {
parent::__construct(...$crossReferenceSections);
}

public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUseObject|CrossReferenceEntryCompressed|null {

Check failure on line 21 in src/Document/CrossReference/Source/RecoveredCrossReferenceSource.php

View workflow job for this annotation

GitHub Actions / quality / PHP Stan

Method PrinsFrank\PdfParser\Document\CrossReference\Source\RecoveredCrossReferenceSource::getCrossReferenceEntry() overrides method PrinsFrank\PdfParser\Document\CrossReference\Source\CrossReferenceSource::getCrossReferenceEntry() but is missing the #[\Override] attribute.
foreach ($this->recoveredByteOffsetMap as $byteOffset => $recoveredObjNr) {
if ($recoveredObjNr === $objNumber) {
return new CrossReferenceEntryInUseObject($byteOffset, 0);
}
}

return parent::getCrossReferenceEntry($objNumber);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryCompressed;
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryInUseObject;
use PrinsFrank\PdfParser\Document\Dictionary\Dictionary;
use PrinsFrank\PdfParser\Stream\Stream;

/** There are multiple crossReference sections if there are incremental updates. See 7.5.6 */
readonly class CrossReferenceSection {
Expand All @@ -29,4 +30,14 @@ public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUse

return null;
}

public function hasInvalidByteOffset(Stream $stream): bool {
foreach ($this->crossReferenceSubSections as $crossReferenceSubSection) {
if ($crossReferenceSubSection->hasInvalidByteOffset($stream)) {
return true;
}
}

return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use PrinsFrank\PdfParser\Document\CrossReference\Source\Section\SubSection\Entry\CrossReferenceEntryInUseObject;
use PrinsFrank\PdfParser\Exception\InvalidArgumentException;
use PrinsFrank\PdfParser\Exception\RuntimeException;
use PrinsFrank\PdfParser\Stream\Stream;

readonly class CrossReferenceSubSection {
/** @var array<CrossReferenceEntryInUseObject|CrossReferenceEntryFreeObject|CrossReferenceEntryCompressed> */
Expand Down Expand Up @@ -51,4 +52,24 @@ public function getCrossReferenceEntry(int $objNumber): CrossReferenceEntryInUse

return $object;
}

public function hasInvalidByteOffset(Stream $stream): bool {
foreach ($this->crossReferenceEntries as $index => $crossReferenceEntry) {
if ($crossReferenceEntry instanceof CrossReferenceEntryInUseObject === false) {
continue;
}

if ($crossReferenceEntry->byteOffsetInDecodedStream > $stream->getSizeInBytes()) {
return true;
}

$objNumber = $this->firstObjectNumber + $index;
$expectedObjMarker = $objNumber . ' ' . $crossReferenceEntry->generationNumber . ' obj';
if ($stream->read($crossReferenceEntry->byteOffsetInDecodedStream, strlen($expectedObjMarker)) !== $expectedObjMarker) {
return true;
}
}

return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?php declare(strict_types=1);

namespace PrinsFrank\PdfParser\Tests\Unit\Document\CrossReference\RawStream;

use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\TestCase;
use PrinsFrank\PdfParser\Document\CrossReference\RawStream\ObjectPositionsFromRawStreamParser;
use PrinsFrank\PdfParser\Stream\InMemoryStream;

#[CoversClass(ObjectPositionsFromRawStreamParser::class)]
class ObjectPositionsFromRawStreamParserTest extends TestCase {
public function testParse(): void {
static::assertSame(
[
10 => 1,
42 => 1232131,
],
ObjectPositionsFromRawStreamParser::parse(
new InMemoryStream(
<<<PDF
%%PDF-1.7
1 0 obj
4 0 4 0 testobj
endobj

1232131 0 obj
endobj

PDF
)
)
);
}
}
Loading