Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions src/attrs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// SPDX-License-Identifier: MIT
//
// Copyright 2016-2025, Johann Tuffe.

//! Zero-allocation XML attribute extraction utilities.
//!
//! These replace quick_xml's own `Attributes` iterator,
//! avoiding per-item overhead from `Result` wrapping,
//! `Cow`/`QName` newtypes, quote-type tracking, etc.

use quick_xml::escape::unescape;
use quick_xml::events::BytesStart;
use quick_xml::Decoder;

/// Zero-allocation iterator over raw XML attribute
/// bytes, yielding `(key, value)` byte-slice pairs.
pub(crate) struct RawAttrIter<'a> {
raw: &'a [u8],
pos: usize,
}

impl<'a> RawAttrIter<'a> {
#[inline]
fn new(raw: &'a [u8]) -> Self {
Self { raw, pos: 0 }
}
}

impl<'a> Iterator for RawAttrIter<'a> {
type Item = (&'a [u8], &'a [u8]);

#[inline]
fn next(&mut self) -> Option<Self::Item> {
let raw = self.raw;
let len = raw.len();

// skip whitespace
while self.pos < len && raw[self.pos].is_ascii_whitespace() {
self.pos += 1;
}
if self.pos >= len {
return None;
}

// key
let key_start = self.pos;
while self.pos < len && raw[self.pos] != b'=' {
self.pos += 1;
}
if self.pos >= len {
return None;
}
let key = &raw[key_start..self.pos];
self.pos += 1; // skip '='
if self.pos >= len {
return None;
}

// quoted value
let quote = raw[self.pos];
if quote != b'"' && quote != b'\'' {
return None;
}
self.pos += 1; // skip opening quote
let val_start = self.pos;
while self.pos < len && raw[self.pos] != quote {
self.pos += 1;
}
let val = &raw[val_start..self.pos];
if self.pos < len {
self.pos += 1; // skip closing quote
}
Some((key, val))
}
}

/// Extension trait for fast/raw attribute access on XML elements.
pub(crate) trait RawAttributes {
/// Iterate over all attributes as `(key, value)` byte-slice pairs.
fn iter_raw_attrs(&self) -> RawAttrIter<'_>;

/// Get a single attribute by name.
#[inline]
fn raw_attr(&self, name: &[u8]) -> Option<&[u8]> {
self.iter_raw_attrs()
.find_map(|(k, v)| (k == name).then_some(v))
}
}

impl RawAttributes for BytesStart<'_> {
#[inline]
fn iter_raw_attrs(&self) -> RawAttrIter<'_> {
RawAttrIter::new(self.attributes_raw())
}
}

/// Get a set of named attributes from an element in a single
/// pass, with early exit as soon as all items are found.
macro_rules! get_attrs {
($e:expr, $($key:expr => $var:ident),+ $(,)?) => {{
$(let mut $var = None;)+
let mut found = 0u8;
let total = get_attrs!(@count $($key),+);
for (k, v) in $e.iter_raw_attrs() {
match k {
$($key => { $var = Some(v); found += 1; })+
_ => {}
}
if found == total {
break;
}
}
($($var),+)
}};
(@count $first:expr $(, $rest:expr)*) => {
1u8 $(+ get_attrs!(@count_one $rest))*
};
(@count_one $e:expr) => { 1u8 };
}

/// Decode raw attribute bytes into a `String`, with XML entity unescaping.
/// Only needed for values that can contain entities (eg: sheet names, table names, etc).
pub(crate) fn decode_attr(decoder: &Decoder, val: &[u8]) -> Result<String, quick_xml::Error> {
let decoded = decoder.decode(val)?;
let unescaped = unescape(&decoded).map_err(quick_xml::Error::from)?;
Ok(unescaped.into_owned())
}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest adding some lib tests here like:


#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_basic_attrs() {
        let bytes = b"key1=\"val1\" key2='val2'";
        let mut iter = RawAttrIter::new(bytes);
        assert_eq!(iter.next(), Some((&b"key1"[..], &b"val1"[..])));
        assert_eq!(iter.next(), Some((&b"key2"[..], &b"val2"[..])));
        assert_eq!(iter.next(), None);
    }

    #[test]
    fn test_whitespace_around_equals() {
        let bytes = b"key = \"value\"";
        let mut iter = RawAttrIter::new(bytes);
        assert_eq!(iter.next(), Some((&b"key"[..], &b"value"[..])));
        assert_eq!(iter.next(), None);
    }

    #[test]
    fn test_empty_value() {
        let bytes = b"key=\"\"";
        let mut iter = RawAttrIter::new(bytes);
        assert_eq!(iter.next(), Some((&b"key"[..], &b""[..])));
    }

    #[test]
    fn test_no_trailing_space() {
        let bytes = b"key=\"value\"";
        let mut iter = RawAttrIter::new(bytes);
        assert_eq!(iter.next(), Some((&b"key"[..], &b"value"[..])));
        assert_eq!(iter.next(), None);
    }
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely a good idea - will get back to this shortly πŸ‘Œ

2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@
#[macro_use]
mod utils;

#[macro_use]
mod attrs;
mod auto;
mod cfb;
mod datatype;
Expand Down
30 changes: 4 additions & 26 deletions src/xlsb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ use std::io::{BufReader, Read, Seek};
use log::debug;

use encoding_rs::UTF_16LE;
use quick_xml::events::attributes::Attribute;
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::Reader as XmlReader;
use zip::read::{ZipArchive, ZipFile};
use zip::result::ZipError;

use crate::attrs::{decode_attr, RawAttributes};
use crate::datatype::DataRef;
use crate::formats::{builtin_format_by_code, detect_custom_number_format, CellFormat};
use crate::utils::{
Expand Down Expand Up @@ -183,32 +183,10 @@ impl<RS: Read + Seek> Xlsb<RS> {
loop {
match xml.read_event_into(&mut buf) {
Ok(Event::Start(e)) if e.name() == QName(b"Relationship") => {
let mut id = None;
let mut target = None;
for a in e.attributes() {
match a? {
Attribute {
key: QName(b"Id"),
value: v,
} => {
id = Some(v.to_vec());
}
Attribute {
key: QName(b"Target"),
value: v,
} => {
target = Some(
xml.decoder()
.decode(&v)
.map_err(XlsbError::Encoding)?
.into_owned(),
);
}
_ => (),
}
}
let (id, target) = get_attrs!(e, b"Id" => id, b"Target" => target);
if let (Some(id), Some(target)) = (id, target) {
relationships.insert(id, target);
relationships
.insert(id.to_vec(), decode_attr(&xml.decoder(), target)?);
}
}
Ok(Event::Eof) => break,
Expand Down
93 changes: 32 additions & 61 deletions src/xlsx/cells_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,18 @@
//
// Copyright 2016-2025, Johann Tuffe.

use quick_xml::{
events::{attributes::Attribute, BytesStart, Event},
name::QName,
};
use quick_xml::events::{BytesStart, Event};
use std::{
borrow::{Borrow, Cow},
borrow::Borrow,
collections::HashMap,
io::{Read, Seek},
};

use super::{
get_attribute, get_dimension, get_row, get_row_column, read_string_with_bufs,
replace_cell_names, Dimensions, XlReader,
get_dimension, get_row, get_row_column, read_string_with_bufs, replace_cell_names, Dimensions,
XlReader,
};
use crate::attrs::RawAttributes;
use crate::{
datatype::DataRef,
formats::{format_excel_f64_ref, CellFormat},
Expand Down Expand Up @@ -85,15 +83,9 @@ where
match xml.read_event_into(&mut buf).map_err(XlsxError::Xml)? {
Event::Start(e) => match e.local_name().as_ref() {
b"dimension" => {
for a in e.attributes() {
if let Attribute {
key: QName(b"ref"),
value: rdim,
} = a?
{
dimensions = get_dimension(&rdim)?;
continue 'xml;
}
if let Some(rdim) = e.raw_attr(b"ref") {
dimensions = get_dimension(rdim)?;
continue 'xml;
}
return Err(XlsxError::UnexpectedNode("dimension"));
}
Expand Down Expand Up @@ -138,34 +130,17 @@ where
self.buf.clear();
match self.xml.read_event_into(&mut self.buf) {
Ok(Event::Start(row_element)) if row_element.local_name().as_ref() == b"row" => {
let attribute = get_attribute(row_element.attributes(), QName(b"r"))?;
if let Some(range) = attribute {
let row = get_row(range)?;
self.row_index = row;
if let Some(r) = row_element.raw_attr(b"r") {
self.row_index = get_row(r)?;
}
}
Ok(Event::End(row_element)) if row_element.local_name().as_ref() == b"row" => {
self.row_index += 1;
self.col_index = 0;
}
Ok(Event::Start(c_element)) if c_element.local_name().as_ref() == b"c" => {
// Extract all needed attributes in one pass (avoids calling
// `get_attribute` multiple times as each re-iterates).
let mut pos_attr = None;
let mut style_attr = None;
let mut type_attr = None;
for a in c_element.attributes() {
let a = a.map_err(XlsxError::XmlAttr)?;
let Cow::Borrowed(val) = a.value else {
continue;
};
match a.key {
QName(b"r") => pos_attr = Some(val),
QName(b"s") => style_attr = Some(val),
QName(b"t") => type_attr = Some(val),
_ => {}
}
}
let (pos_attr, style_attr, type_attr) =
get_attrs!(c_element, b"r" => r, b"s" => s, b"t" => t);
let pos = if let Some(range) = pos_attr {
let (row, col) = get_row_column(range)?;
self.col_index = col;
Expand Down Expand Up @@ -216,20 +191,17 @@ where
self.buf.clear();
match self.xml.read_event_into(&mut self.buf) {
Ok(Event::Start(row_element)) if row_element.local_name().as_ref() == b"row" => {
let attribute = get_attribute(row_element.attributes(), QName(b"r"))?;
if let Some(range) = attribute {
let row = get_row(range)?;
self.row_index = row;
if let Some(r) = row_element.raw_attr(b"r") {
self.row_index = get_row(r)?;
}
}
Ok(Event::End(row_element)) if row_element.local_name().as_ref() == b"row" => {
self.row_index += 1;
self.col_index = 0;
}
Ok(Event::Start(c_element)) if c_element.local_name().as_ref() == b"c" => {
let attribute = get_attribute(c_element.attributes(), QName(b"r"))?;
let pos = if let Some(range) = attribute {
let (row, col) = get_row_column(range)?;
let pos = if let Some(r) = c_element.raw_attr(b"r") {
let (row, col) = get_row_column(r)?;
self.col_index = col;
(row, col)
} else {
Expand All @@ -244,31 +216,30 @@ where
if let Some(f) = formula.borrow() {
value = Some(f.clone());
}
if let Ok(Some(b"shared")) =
get_attribute(e.attributes(), QName(b"t"))
{
let (t_attr, si_attr, ref_attr) =
get_attrs!(e, b"t" => t, b"si" => si, b"ref" => ref_);
if t_attr == Some(b"shared".as_slice()) {
// shared formula
let mut offset_map: HashMap<(u32, u32), (i64, i64)> =
HashMap::new();
// shared index
let shared_index =
match get_attribute(e.attributes(), QName(b"si"))? {
Some(res) => match atoi_simd::parse::<usize>(res) {
Ok(res) => res,
Err(_) => {
return Err(XlsxError::Unexpected(
"si attribute must be a number",
));
}
},
None => {
let shared_index = match si_attr {
Some(res) => match atoi_simd::parse::<usize>(res) {
Ok(res) => res,
Err(_) => {
return Err(XlsxError::Unexpected(
"si attribute is mandatory if it is shared",
"si attribute must be a number",
));
}
};
},
None => {
return Err(XlsxError::Unexpected(
"si attribute is mandatory if it is shared",
));
}
};
// shared reference
match get_attribute(e.attributes(), QName(b"ref"))? {
match ref_attr {
Some(res) => {
// original reference formula
let reference = get_dimension(res)?;
Expand Down
Loading