-
Notifications
You must be signed in to change notification settings - Fork 129
APP-11676: Change data sync uploading binary files to not read in the entire binary data before uploading #5949
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 9 commits
12432a7
91a0115
1fb0513
f9ac49f
84feb00
a1fd462
c90762f
af8ba4d
f4ee902
790453f
406ba42
742bffc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,8 @@ package data | |
|
|
||
| import ( | ||
| "bufio" | ||
| "encoding/binary" | ||
| "fmt" | ||
| "io" | ||
| "os" | ||
| "path/filepath" | ||
|
|
@@ -12,6 +14,8 @@ import ( | |
| "github.com/matttproud/golang_protobuf_extensions/pbutil" | ||
| "github.com/pkg/errors" | ||
| v1 "go.viam.com/api/app/datasync/v1" | ||
| "google.golang.org/protobuf/encoding/protowire" | ||
| "google.golang.org/protobuf/proto" | ||
| "google.golang.org/protobuf/types/known/anypb" | ||
|
|
||
| "go.viam.com/rdk/resource" | ||
|
|
@@ -274,6 +278,126 @@ func SensorDataFromCaptureFile(f *CaptureFile) ([]*v1.SensorData, error) { | |
| return ret, nil | ||
| } | ||
|
|
||
| // BinaryPayloadReader reads the next SensorData message from f without loading | ||
| // the binary payload into memory. It returns the SensorMetadata, payload size, | ||
| // and an io.Reader for streaming the payload. | ||
| // | ||
| // Successive calls advance through the file; call f.Reset() to restart. | ||
| // Returns io.EOF when no messages remain. | ||
| // | ||
| // Assumes SensorMetadata (field 1) precedes the binary payload (field 3). | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. generaly, there's a lot of proto magic here, so i think some paragraph explaining the internals of what's this doing would be helpful for future readers. also comment on magic numbers like L327-328
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. field 1 = sensormetadata and field 3 = binary payload.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. much better, thank you 👍 |
||
| func (f *CaptureFile) BinaryPayloadReader() (*v1.SensorMetadata, int64, io.Reader, error) { | ||
| f.lock.Lock() | ||
| defer f.lock.Unlock() | ||
|
|
||
| if err := f.writer.Flush(); err != nil { | ||
|
n0nick marked this conversation as resolved.
Outdated
|
||
| return nil, 0, nil, err | ||
| } | ||
|
|
||
| seekOffset := f.readOffset | ||
| if _, err := f.file.Seek(seekOffset, io.SeekStart); err != nil { | ||
| return nil, 0, nil, err | ||
| } | ||
|
|
||
| varintCR := &countingByteReader{r: f.file} | ||
|
|
||
| // Read the outer length-prefix varint to advance readOffset past the full message. | ||
| outerLen, err := binary.ReadUvarint(varintCR) | ||
| if err != nil { | ||
| return nil, 0, nil, err // io.EOF means no more messages | ||
| } | ||
| // msgStart is the absolute file offset of the first field in this SensorData message. | ||
| msgStart := seekOffset + varintCR.count | ||
| f.readOffset = msgStart + int64(outerLen) | ||
|
|
||
| // Bound field parsing to exactly outerLen bytes so we can't read into the next message. | ||
| inner := &countingByteReader{r: io.LimitReader(f.file, int64(outerLen))} | ||
|
|
||
| var sensorMeta *v1.SensorMetadata | ||
|
|
||
| for { | ||
| tagVal, err := binary.ReadUvarint(inner) | ||
| if err != nil { | ||
| if errors.Is(err, io.EOF) { | ||
| break | ||
| } | ||
| return nil, 0, nil, fmt.Errorf("reading SensorData field tag: %w", err) | ||
| } | ||
|
|
||
| fieldNum := protowire.Number(tagVal >> 3) | ||
| wireType := protowire.Type(tagVal & 0x7) | ||
|
|
||
| // Skip non-bytes wire type fields to remain forward-compatible with fields | ||
| // added by future server versions. | ||
| if wireType != protowire.BytesType { | ||
| var skipErr error | ||
| switch wireType { //nolint:exhaustive | ||
| case protowire.VarintType: | ||
| _, skipErr = binary.ReadUvarint(inner) | ||
| case protowire.Fixed32Type: | ||
| _, skipErr = io.CopyN(io.Discard, inner, 4) | ||
| case protowire.Fixed64Type: | ||
| _, skipErr = io.CopyN(io.Discard, inner, 8) | ||
| default: | ||
| return nil, 0, nil, fmt.Errorf("unsupported wire type %d for field %d in SensorData", wireType, fieldNum) | ||
| } | ||
| if skipErr != nil { | ||
| return nil, 0, nil, fmt.Errorf("skipping field %d (wire type %d): %w", fieldNum, wireType, skipErr) | ||
| } | ||
| continue | ||
| } | ||
|
|
||
| fieldLen, err := binary.ReadUvarint(inner) | ||
| if err != nil { | ||
| return nil, 0, nil, fmt.Errorf("reading field length for SensorData field %d: %w", fieldNum, err) | ||
| } | ||
|
|
||
| switch fieldNum { //nolint:exhaustive | ||
| case 1: // SensorMetadata | ||
| metaBytes := make([]byte, fieldLen) | ||
|
n0nick marked this conversation as resolved.
|
||
| if _, err := io.ReadFull(inner, metaBytes); err != nil { | ||
| return nil, 0, nil, fmt.Errorf("reading SensorMetadata bytes: %w", err) | ||
| } | ||
| sensorMeta = &v1.SensorMetadata{} | ||
| if err := proto.Unmarshal(metaBytes, sensorMeta); err != nil { | ||
| return nil, 0, nil, fmt.Errorf("unmarshaling SensorMetadata: %w", err) | ||
| } | ||
| case 3: // binary payload (SensorData.binary oneof field) | ||
| // inner.count bytes consumed since msgStart; binary data starts here. | ||
| return sensorMeta, int64(fieldLen), io.NewSectionReader(f.file, msgStart+inner.count, int64(fieldLen)), nil | ||
|
n0nick marked this conversation as resolved.
|
||
| default: | ||
| if _, err := io.CopyN(io.Discard, inner, int64(fieldLen)); err != nil { | ||
| return nil, 0, nil, fmt.Errorf("skipping SensorData field %d: %w", fieldNum, err) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return nil, 0, nil, errors.New("binary payload field not found in capture file") | ||
| } | ||
|
|
||
| // countingByteReader wraps an io.Reader and tracks the total number of bytes read. | ||
| // It implements io.ByteReader so it can be passed to binary.ReadUvarint. | ||
| type countingByteReader struct { | ||
| r io.Reader | ||
| count int64 | ||
| } | ||
|
|
||
| func (c *countingByteReader) ReadByte() (byte, error) { | ||
| var b [1]byte | ||
| n, err := c.r.Read(b[:]) | ||
| c.count += int64(n) | ||
| if n == 1 { | ||
| return b[0], nil | ||
| } | ||
| return 0, err | ||
| } | ||
|
|
||
| func (c *countingByteReader) Read(p []byte) (int, error) { | ||
| n, err := c.r.Read(p) | ||
| c.count += int64(n) | ||
| return n, err | ||
| } | ||
|
|
||
| // CaptureFilePathWithReplacedReservedChars returns the filepath with substitutions | ||
| // for reserved characters. | ||
| func CaptureFilePathWithReplacedReservedChars(filepath string) string { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.