From 12499aa9eab23965bc9ebf4b3170acd59d420676 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Thu, 26 Feb 2026 16:07:06 +0400 Subject: [PATCH] perf: custom zero-overhead attribute extraction --- src/attrs.rs | 127 +++++++++++++ src/lib.rs | 2 + src/xlsb/mod.rs | 30 +-- src/xlsx/cells_reader.rs | 93 ++++----- src/xlsx/mod.rs | 395 +++++++++++---------------------------- 5 files changed, 279 insertions(+), 368 deletions(-) create mode 100644 src/attrs.rs diff --git a/src/attrs.rs b/src/attrs.rs new file mode 100644 index 00000000..3b5cdae9 --- /dev/null +++ b/src/attrs.rs @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2016-2025, Johann Tuffe. + +//! Zero-allocation XML attribute extraction utilities. +//! +//! These replace quick_xml's own `Attributes` iterator, +//! avoiding per-item overhead from `Result` wrapping, +//! `Cow`/`QName` newtypes, quote-type tracking, etc. + +use quick_xml::escape::unescape; +use quick_xml::events::BytesStart; +use quick_xml::Decoder; + +/// Zero-allocation iterator over raw XML attribute +/// bytes, yielding `(key, value)` byte-slice pairs. +pub(crate) struct RawAttrIter<'a> { + raw: &'a [u8], + pos: usize, +} + +impl<'a> RawAttrIter<'a> { + #[inline] + fn new(raw: &'a [u8]) -> Self { + Self { raw, pos: 0 } + } +} + +impl<'a> Iterator for RawAttrIter<'a> { + type Item = (&'a [u8], &'a [u8]); + + #[inline] + fn next(&mut self) -> Option { + let raw = self.raw; + let len = raw.len(); + + // skip whitespace + while self.pos < len && raw[self.pos].is_ascii_whitespace() { + self.pos += 1; + } + if self.pos >= len { + return None; + } + + // key + let key_start = self.pos; + while self.pos < len && raw[self.pos] != b'=' { + self.pos += 1; + } + if self.pos >= len { + return None; + } + let key = &raw[key_start..self.pos]; + self.pos += 1; // skip '=' + if self.pos >= len { + return None; + } + + // quoted value + let quote = raw[self.pos]; + if quote != b'"' && quote != b'\'' { + return None; + } + self.pos += 1; // skip opening quote + let val_start = self.pos; + while self.pos < len && raw[self.pos] != quote { + self.pos += 1; + } + let val = &raw[val_start..self.pos]; + if self.pos < len { + self.pos += 1; // skip closing quote + } + Some((key, val)) + } +} + +/// Extension trait for fast/raw attribute access on XML elements. +pub(crate) trait RawAttributes { + /// Iterate over all attributes as `(key, value)` byte-slice pairs. + fn iter_raw_attrs(&self) -> RawAttrIter<'_>; + + /// Get a single attribute by name. + #[inline] + fn raw_attr(&self, name: &[u8]) -> Option<&[u8]> { + self.iter_raw_attrs() + .find_map(|(k, v)| (k == name).then_some(v)) + } +} + +impl RawAttributes for BytesStart<'_> { + #[inline] + fn iter_raw_attrs(&self) -> RawAttrIter<'_> { + RawAttrIter::new(self.attributes_raw()) + } +} + +/// Get a set of named attributes from an element in a single +/// pass, with early exit as soon as all items are found. +macro_rules! get_attrs { + ($e:expr, $($key:expr => $var:ident),+ $(,)?) => {{ + $(let mut $var = None;)+ + let mut found = 0u8; + let total = get_attrs!(@count $($key),+); + for (k, v) in $e.iter_raw_attrs() { + match k { + $($key => { $var = Some(v); found += 1; })+ + _ => {} + } + if found == total { + break; + } + } + ($($var),+) + }}; + (@count $first:expr $(, $rest:expr)*) => { + 1u8 $(+ get_attrs!(@count_one $rest))* + }; + (@count_one $e:expr) => { 1u8 }; +} + +/// Decode raw attribute bytes into a `String`, with XML entity unescaping. +/// Only needed for values that can contain entities (eg: sheet names, table names, etc). +pub(crate) fn decode_attr(decoder: &Decoder, val: &[u8]) -> Result { + let decoded = decoder.decode(val)?; + let unescaped = unescape(&decoded).map_err(quick_xml::Error::from)?; + Ok(unescaped.into_owned()) +} diff --git a/src/lib.rs b/src/lib.rs index bfcec578..646ff5b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,6 +79,8 @@ #[macro_use] mod utils; +#[macro_use] +mod attrs; mod auto; mod cfb; mod datatype; diff --git a/src/xlsb/mod.rs b/src/xlsb/mod.rs index 875d5c93..0cbe5da8 100644 --- a/src/xlsb/mod.rs +++ b/src/xlsb/mod.rs @@ -13,13 +13,13 @@ use std::io::{BufReader, Read, Seek}; use log::debug; use encoding_rs::UTF_16LE; -use quick_xml::events::attributes::Attribute; use quick_xml::events::Event; use quick_xml::name::QName; use quick_xml::Reader as XmlReader; use zip::read::{ZipArchive, ZipFile}; use zip::result::ZipError; +use crate::attrs::{decode_attr, RawAttributes}; use crate::datatype::DataRef; use crate::formats::{builtin_format_by_code, detect_custom_number_format, CellFormat}; use crate::utils::{ @@ -183,32 +183,10 @@ impl Xlsb { loop { match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.name() == QName(b"Relationship") => { - let mut id = None; - let mut target = None; - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"Id"), - value: v, - } => { - id = Some(v.to_vec()); - } - Attribute { - key: QName(b"Target"), - value: v, - } => { - target = Some( - xml.decoder() - .decode(&v) - .map_err(XlsbError::Encoding)? - .into_owned(), - ); - } - _ => (), - } - } + let (id, target) = get_attrs!(e, b"Id" => id, b"Target" => target); if let (Some(id), Some(target)) = (id, target) { - relationships.insert(id, target); + relationships + .insert(id.to_vec(), decode_attr(&xml.decoder(), target)?); } } Ok(Event::Eof) => break, diff --git a/src/xlsx/cells_reader.rs b/src/xlsx/cells_reader.rs index 0563671c..bb594b7c 100644 --- a/src/xlsx/cells_reader.rs +++ b/src/xlsx/cells_reader.rs @@ -2,20 +2,18 @@ // // Copyright 2016-2025, Johann Tuffe. -use quick_xml::{ - events::{attributes::Attribute, BytesStart, Event}, - name::QName, -}; +use quick_xml::events::{BytesStart, Event}; use std::{ - borrow::{Borrow, Cow}, + borrow::Borrow, collections::HashMap, io::{Read, Seek}, }; use super::{ - get_attribute, get_dimension, get_row, get_row_column, read_string_with_bufs, - replace_cell_names, Dimensions, XlReader, + get_dimension, get_row, get_row_column, read_string_with_bufs, replace_cell_names, Dimensions, + XlReader, }; +use crate::attrs::RawAttributes; use crate::{ datatype::DataRef, formats::{format_excel_f64_ref, CellFormat}, @@ -85,15 +83,9 @@ where match xml.read_event_into(&mut buf).map_err(XlsxError::Xml)? { Event::Start(e) => match e.local_name().as_ref() { b"dimension" => { - for a in e.attributes() { - if let Attribute { - key: QName(b"ref"), - value: rdim, - } = a? - { - dimensions = get_dimension(&rdim)?; - continue 'xml; - } + if let Some(rdim) = e.raw_attr(b"ref") { + dimensions = get_dimension(rdim)?; + continue 'xml; } return Err(XlsxError::UnexpectedNode("dimension")); } @@ -138,10 +130,8 @@ where self.buf.clear(); match self.xml.read_event_into(&mut self.buf) { Ok(Event::Start(row_element)) if row_element.local_name().as_ref() == b"row" => { - let attribute = get_attribute(row_element.attributes(), QName(b"r"))?; - if let Some(range) = attribute { - let row = get_row(range)?; - self.row_index = row; + if let Some(r) = row_element.raw_attr(b"r") { + self.row_index = get_row(r)?; } } Ok(Event::End(row_element)) if row_element.local_name().as_ref() == b"row" => { @@ -149,23 +139,8 @@ where self.col_index = 0; } Ok(Event::Start(c_element)) if c_element.local_name().as_ref() == b"c" => { - // Extract all needed attributes in one pass (avoids calling - // `get_attribute` multiple times as each re-iterates). - let mut pos_attr = None; - let mut style_attr = None; - let mut type_attr = None; - for a in c_element.attributes() { - let a = a.map_err(XlsxError::XmlAttr)?; - let Cow::Borrowed(val) = a.value else { - continue; - }; - match a.key { - QName(b"r") => pos_attr = Some(val), - QName(b"s") => style_attr = Some(val), - QName(b"t") => type_attr = Some(val), - _ => {} - } - } + let (pos_attr, style_attr, type_attr) = + get_attrs!(c_element, b"r" => r, b"s" => s, b"t" => t); let pos = if let Some(range) = pos_attr { let (row, col) = get_row_column(range)?; self.col_index = col; @@ -216,10 +191,8 @@ where self.buf.clear(); match self.xml.read_event_into(&mut self.buf) { Ok(Event::Start(row_element)) if row_element.local_name().as_ref() == b"row" => { - let attribute = get_attribute(row_element.attributes(), QName(b"r"))?; - if let Some(range) = attribute { - let row = get_row(range)?; - self.row_index = row; + if let Some(r) = row_element.raw_attr(b"r") { + self.row_index = get_row(r)?; } } Ok(Event::End(row_element)) if row_element.local_name().as_ref() == b"row" => { @@ -227,9 +200,8 @@ where self.col_index = 0; } Ok(Event::Start(c_element)) if c_element.local_name().as_ref() == b"c" => { - let attribute = get_attribute(c_element.attributes(), QName(b"r"))?; - let pos = if let Some(range) = attribute { - let (row, col) = get_row_column(range)?; + let pos = if let Some(r) = c_element.raw_attr(b"r") { + let (row, col) = get_row_column(r)?; self.col_index = col; (row, col) } else { @@ -244,31 +216,30 @@ where if let Some(f) = formula.borrow() { value = Some(f.clone()); } - if let Ok(Some(b"shared")) = - get_attribute(e.attributes(), QName(b"t")) - { + let (t_attr, si_attr, ref_attr) = + get_attrs!(e, b"t" => t, b"si" => si, b"ref" => ref_); + if t_attr == Some(b"shared".as_slice()) { // shared formula let mut offset_map: HashMap<(u32, u32), (i64, i64)> = HashMap::new(); // shared index - let shared_index = - match get_attribute(e.attributes(), QName(b"si"))? { - Some(res) => match atoi_simd::parse::(res) { - Ok(res) => res, - Err(_) => { - return Err(XlsxError::Unexpected( - "si attribute must be a number", - )); - } - }, - None => { + let shared_index = match si_attr { + Some(res) => match atoi_simd::parse::(res) { + Ok(res) => res, + Err(_) => { return Err(XlsxError::Unexpected( - "si attribute is mandatory if it is shared", + "si attribute must be a number", )); } - }; + }, + None => { + return Err(XlsxError::Unexpected( + "si attribute is mandatory if it is shared", + )); + } + }; // shared reference - match get_attribute(e.attributes(), QName(b"ref"))? { + match ref_attr { Some(res) => { // original reference formula let reference = get_dimension(res)?; diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index 3205494b..de016ffc 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -6,22 +6,20 @@ mod cells_reader; -use std::borrow::Cow; use std::collections::HashMap; use std::io::BufReader; use std::io::{Read, Seek}; use std::str::FromStr; use log::warn; -use quick_xml::events::attributes::{AttrError, Attribute, Attributes}; -use quick_xml::events::BytesStart; -use quick_xml::events::Event; +use quick_xml::events::{BytesStart, Event}; use quick_xml::name::QName; use quick_xml::Decoder; use quick_xml::Reader as XmlReader; use zip::read::{ZipArchive, ZipFile}; use zip::result::ZipError; +use crate::attrs::{decode_attr, RawAttributes}; use crate::datatype::DataRef; use crate::formats::{builtin_format_by_id, detect_custom_number_format, CellFormat}; use crate::utils::{ @@ -290,7 +288,7 @@ impl Xlsx { buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"sst" => { - if let Ok(Some(count)) = get_attribute(e.attributes(), QName(b"uniqueCount")) { + if let Some(count) = e.raw_attr(b"uniqueCount") { if let Ok(n) = atoi_simd::parse::(count) { self.strings.reserve(n); } @@ -341,23 +339,11 @@ impl Xlsx { inner_buf.clear(); match xml.read_event_into(&mut inner_buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"numFmt" => { - let mut id = Vec::new(); - let mut format = String::new(); - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"numFmtId"), - value: v, - } => id.extend_from_slice(&v), - Attribute { - key: QName(b"formatCode"), - value: v, - } => format = xml.decoder().decode(&v)?.into_owned(), - _ => (), - } - } - if !format.is_empty() { - number_formats.insert(id, format); + let (id, format_code) = + get_attrs!(e, b"numFmtId" => id, b"formatCode" => fmt); + if let (Some(id), Some(fc)) = (id, format_code) { + let format = decode_attr(&xml.decoder(), fc)?; + number_formats.insert(id.to_vec(), format); } } Ok(Event::End(e)) if e.local_name().as_ref() == b"numFmts" => break, @@ -370,17 +356,13 @@ impl Xlsx { inner_buf.clear(); match xml.read_event_into(&mut inner_buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"xf" => { - self.formats.push( - e.attributes() - .filter_map(|a| a.ok()) - .find(|a| a.key == QName(b"numFmtId")) - .map_or(CellFormat::Other, |a| { - match number_formats.get(&*a.value) { - Some(fmt) => detect_custom_number_format(fmt), - None => builtin_format_by_id(&a.value), - } - }), - ); + self.formats.push(e.raw_attr(b"numFmtId").map_or( + CellFormat::Other, + |val| match number_formats.get(val) { + Some(fmt) => detect_custom_number_format(fmt), + None => builtin_format_by_id(val), + }, + )); } Ok(Event::End(e)) if e.local_name().as_ref() == b"cellXfs" => break, Ok(Event::Eof) => return Err(XlsxError::XmlEof("cellXfs")), @@ -412,38 +394,28 @@ impl Xlsx { let mut name = String::new(); let mut path = String::new(); let mut visible = SheetVisible::Visible; - for a in e.attributes() { - let a = a?; - match a { - Attribute { - key: QName(b"name"), - .. - } => { - name = a.decode_and_unescape_value(xml.decoder())?.to_string(); + for (key, val) in e.iter_raw_attrs() { + match key { + b"name" => { + name = decode_attr(&xml.decoder(), val)?; } - Attribute { - key: QName(b"state"), - .. - } => { - visible = match a.decode_and_unescape_value(xml.decoder())?.as_ref() - { - "visible" => SheetVisible::Visible, - "hidden" => SheetVisible::Hidden, - "veryHidden" => SheetVisible::VeryHidden, + b"state" => { + visible = match val { + b"visible" => SheetVisible::Visible, + b"hidden" => SheetVisible::Hidden, + b"veryHidden" => SheetVisible::VeryHidden, v => { + let v = xml.decoder().decode(v)?; return Err(XlsxError::Unrecognized { typ: "sheet:state", - val: v.to_string(), - }) + val: v.into_owned(), + }); } } } - Attribute { - key: QName(b"r:id" | b"relationships:id"), - value: v, - } => { + b"r:id" | b"relationships:id" => { let r = &relationships - .get(&*v) + .get(val) .ok_or(XlsxError::RelationshipNotFound)?[..]; // target may have prepended "/xl/" or "xl/" path; // strip if present @@ -455,7 +427,7 @@ impl Xlsx { format!("xl/{r}") }; } - _ => (), + _ => {} } } let typ = match path.split('/').nth(1) { @@ -476,23 +448,15 @@ impl Xlsx { }); self.sheets.push((name, path)); } - Ok(Event::Start(e)) if e.name().as_ref() == b"workbookPr" => { - self.is_1904 = match e.try_get_attribute("date1904")? { - Some(c) => ["1", "true"].contains( - &c.decode_and_unescape_value(xml.decoder()) - .map_err(XlsxError::Xml)? - .as_ref(), - ), + Ok(Event::Start(e)) if e.local_name().as_ref() == b"workbookPr" => { + self.is_1904 = match e.raw_attr(b"date1904") { + Some(v) => v == b"1" || v == b"true", None => false, }; } Ok(Event::Start(e)) if e.local_name().as_ref() == b"definedName" => { - if let Some(a) = e - .attributes() - .filter_map(std::result::Result::ok) - .find(|a| a.key == QName(b"name")) - { - let name = a.decode_and_unescape_value(xml.decoder())?.to_string(); + if let Some(val) = e.raw_attr(b"name") { + let name = decode_attr(&xml.decoder(), val)?; val_buf.clear(); let mut value = String::new(); loop { @@ -536,22 +500,10 @@ impl Xlsx { buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => { - let mut id = Vec::new(); - let mut target = String::new(); - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"Id"), - value: v, - } => id.extend_from_slice(&v), - Attribute { - key: QName(b"Target"), - value: v, - } => target = xml.decoder().decode(&v)?.into_owned(), - _ => (), - } + let (id, target) = get_attrs!(e, b"Id" => id, b"Target" => target); + if let (Some(id), Some(target)) = (id, target) { + relationships.insert(id.to_vec(), decode_attr(&xml.decoder(), target)?); } - relationships.insert(id, target); } Ok(Event::End(e)) if e.local_name().as_ref() == b"Relationships" => break, Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")), @@ -582,27 +534,14 @@ impl Xlsx { buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => { - let mut id = Vec::new(); - let mut target = String::new(); - let mut table_type = false; - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"Id"), - value: v, - } => id.extend_from_slice(&v), - Attribute { - key: QName(b"Target"), - value: v, - } => target = xml.decoder().decode(&v)?.into_owned(), - Attribute { - key: QName(b"Type"), - value: v, - } => table_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/table"[..], - _ => (), - } - } + let (_, target, typ) = + get_attrs!(e, b"Id" => id, b"Target" => target, b"Type" => typ); + let table_type = typ == Some(b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/table" as &[u8]); if table_type { + let target = match target { + Some(t) => decode_attr(&xml.decoder(), t)?, + None => String::new(), + }; if target.starts_with("../") { // Relative path. let new_index = @@ -637,49 +576,30 @@ impl Xlsx { buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"table" => { - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"displayName"), - value: v, - } => { - table_meta.display_name = - xml.decoder().decode(&v)?.into_owned(); + for (key, val) in e.iter_raw_attrs() { + match key { + b"displayName" => { + table_meta.display_name = decode_attr(&xml.decoder(), val)?; } - Attribute { - key: QName(b"ref"), - value: v, - } => { + b"ref" => { table_meta.ref_cells = - xml.decoder().decode(&v)?.into_owned(); + xml.decoder().decode(val)?.into_owned(); } - Attribute { - key: QName(b"headerRowCount"), - value: v, - } => { + b"headerRowCount" => { table_meta.header_row_count = - xml.decoder().decode(&v)?.parse()?; + xml.decoder().decode(val)?.parse()?; } - Attribute { - key: QName(b"totalsRowCount"), - value: v, - } => { + b"totalsRowCount" => { table_meta.totals_row_count = - xml.decoder().decode(&v)?.parse()?; + xml.decoder().decode(val)?.parse()?; } - _ => (), + _ => {} } } } Ok(Event::Start(e)) if e.local_name().as_ref() == b"tableColumn" => { - for a in e.attributes().flatten() { - if let Attribute { - key: QName(b"name"), - value: v, - } = a - { - column_names.push(xml.decoder().decode(&v)?.into_owned()); - } + if let Some(val) = e.raw_attr(b"name") { + column_names.push(decode_attr(&xml.decoder(), val)?); } } Ok(Event::End(e)) if e.local_name().as_ref() == b"table" => break, @@ -838,8 +758,8 @@ impl Xlsx { buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name() == QName(b"mergeCell").into() => { - if let Some(attr) = get_attribute(e.attributes(), QName(b"ref"))? { - let dimension = get_dimension(attr)?; + if let Some(val) = e.raw_attr(b"ref") { + let dimension = get_dimension(val)?; regions.push(( sheet_name.to_string(), sheet_path.to_string(), @@ -1779,24 +1699,6 @@ fn xml_reader<'a, RS: Read + Seek>( } } -/// search through an Element's attributes for the named one -pub(crate) fn get_attribute<'a>( - atts: Attributes<'a>, - n: QName, -) -> Result, XlsxError> { - for a in atts { - match a { - Ok(Attribute { - key, - value: Cow::Borrowed(value), - }) if key == n => return Ok(Some(value)), - Err(e) => return Err(XlsxError::XmlAttr(e)), - _ => {} // ignore other attributes - } - } - Ok(None) -} - /// converts a text representation (e.g. "A6:G67") of a dimension into integers /// - top left (row, column), /// - bottom right (row, column) @@ -1980,15 +1882,8 @@ where match xml.read_event_into(&mut buffer) { Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCell" => { - for attribute in event.attributes() { - let attribute = attribute?; - - if attribute.key == QName(b"ref") { - let dimensions = get_dimension(&attribute.value)?; - merge_cells.push(dimensions); - - break; - } + if let Some(val) = event.raw_attr(b"ref") { + merge_cells.push(get_dimension(val)?); } } Ok(Event::End(event)) if event.local_name().as_ref() == b"mergeCells" => { @@ -2335,17 +2230,9 @@ fn item_tag(e: &BytesStart) -> Option { _ => None, } } -fn item_value(e: &BytesStart) -> Result { - for a in e.attributes() { - if let Attribute { - key: QName(b"v"), - value, - } = a? - { - return Ok(Some(Box::from(value))); - } - } - Ok(None) + +fn item_value(e: &BytesStart) -> Value { + e.raw_attr(b"v").map(Box::from) } // Get the target location of the pivot table's pivot cache definitions. @@ -2369,21 +2256,12 @@ where buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => { - let mut target = String::new(); - let mut is_pivot_cache_definitions_type = false; - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"Target"), - value: v, - } => target = xml.decoder().decode(&v)?.into_owned(), - Attribute { - key: QName(b"Type"), - value: v, - } => is_pivot_cache_definitions_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheDefinition"[..], - _ => (), - } - } + let (target, typ) = get_attrs!(e, b"Target" => target, b"Type" => typ); + let is_pivot_cache_definitions_type = typ == Some(b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheDefinition" as &[u8]); + let target = match target { + Some(t) => decode_attr(&xml.decoder(), t)?, + None => String::new(), + }; match (is_pivot_cache_definitions_type, definitions_path.is_some()) { (true, false) => { if let Some(target) = target.strip_prefix("../") { @@ -2437,21 +2315,12 @@ where buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => { - let mut target = String::new(); - let mut is_pivot_cache_record_type = false; - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"Target"), - value: v, - } => target = xml.decoder().decode(&v)?.into_owned(), - Attribute { - key: QName(b"Type"), - value: v, - } => is_pivot_cache_record_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheRecords"[..], - _ => (), - } - } + let (target, typ) = get_attrs!(e, b"Target" => target, b"Type" => typ); + let is_pivot_cache_record_type = typ == Some(b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheRecords" as &[u8]); + let target = match target { + Some(t) => decode_attr(&xml.decoder(), t)?, + None => String::new(), + }; match (is_pivot_cache_record_type, record_path.is_some()) { (true, false) => { if target.starts_with("xl/pivotCache") { @@ -2507,22 +2376,13 @@ where buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => { - let mut target = String::new(); - let mut is_pivot_table_type = false; - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"Target"), - value: v, - } => target = xml.decoder().decode(&v)?.into_owned(), - Attribute { - key: QName(b"Type"), - value: v, - } => is_pivot_table_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotTable"[..], - _ => (), - } - } + let (target, typ) = get_attrs!(e, b"Target" => target, b"Type" => typ); + let is_pivot_table_type = typ == Some(b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotTable" as &[u8]); if is_pivot_table_type { + let target = match target { + Some(t) => decode_attr(&xml.decoder(), t)?, + None => String::new(), + }; if let Some(target) = target.strip_prefix("../") { // this is an incomplete implementation, but should be good enough for excel let (parent, _) = base_folder @@ -2563,20 +2423,13 @@ where buf.clear(); match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"pivotTableDefinition" => { - for a in e.attributes() { - if let Attribute { - key: QName(b"name"), - value: v, - } = a? - { - if name.is_some() { - return Err(XlsxError::Unexpected( - "multiple name entries for one pivot table path", - )); - } else { - name.replace(xml.decoder().decode(&v)?.into_owned()); - } + if let Some(val) = e.raw_attr(b"name") { + if name.is_some() { + return Err(XlsxError::Unexpected( + "multiple name entries for one pivot table path", + )); } + name.replace(decode_attr(&xml.decoder(), val)?); } } Ok(Event::End(e)) if e.local_name().as_ref() == b"pivotTableDefinition" => break, @@ -2835,26 +2688,14 @@ fn get_pivot_cache_iter<'a, RS: Read + Seek + 'a>( match xml.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"cacheField" => { - for a in e.attributes() { - match a? { - Attribute { - key: QName(b"name"), - value, - } => { - field_names.push(xml.decoder().decode(value.as_ref())?.to_string()); - fields.push(vec![]); - } - Attribute { - key: QName(b"formula"), - value: _value, - } => { - field_names.pop(); - fields.pop(); - } - _ => { - // do nothing - } - } + let (name, formula) = get_attrs!(e, b"name" => name, b"formula" => formula); + if let Some(name) = name { + field_names.push(decode_attr(&xml.decoder(), name)?); + fields.push(vec![]); + } + if formula.is_some() { + field_names.pop(); + fields.pop(); } } // Exclude grouped fields from results. @@ -2870,7 +2711,7 @@ fn get_pivot_cache_iter<'a, RS: Read + Seek + 'a>( Ok(Event::Start(e)) => { if let Some(tag) = item_tag(&e) { if let Some(field) = fields.last_mut() { - field.push((tag, item_value(&e)?)); + field.push((tag, item_value(&e))); } } } @@ -2929,30 +2770,21 @@ impl<'a, RS: Read + Seek + 'a> Iterator for PivotCacheIter<'a, RS> { buf.clear(); match self.reader.read_event_into(&mut buf) { Ok(Event::Start(e)) if e.local_name().as_ref() == b"x" => { - for a in e.attributes() { - if let Ok(Attribute { - key: QName(b"v"), - value, - }) = a - { - let value_position = match self.reader.decoder().decode(value.as_ref()) - { - Ok(val) => match val.parse::() { - Ok(val) => val, - Err(e) => { - return Some(Err(XlsxError::ParseInt(e))); - } - }, - Err(e) => return Some(Err(XlsxError::Encoding(e))), - }; + if let Some(val) = e.raw_attr(b"v") { + let value_position = match atoi_simd::parse::(val) { + Ok(val) => val, + Err(_) => { + return Some(Err(XlsxError::Unexpected( + "pivot cache x:v attribute must be a number", + ))); + } + }; - let column_name = &self.field_names[col_number]; - row.push(parse_item( - &self.definitions[column_name][value_position], - &self.reader.decoder(), - )); - break; - } + let column_name = &self.field_names[col_number]; + row.push(parse_item( + &self.definitions[column_name][value_position], + &self.reader.decoder(), + )); } col_number += 1; @@ -2969,7 +2801,8 @@ impl<'a, RS: Read + Seek + 'a> Iterator for PivotCacheIter<'a, RS> { Err(e) => return Some(Err(XlsxError::Xml(e))), Ok(Event::Start(e)) => { if let Some(tag) = item_tag(&e) { - if let Ok(value) = item_value(&e) { + { + let value = item_value(&e); row.push(parse_item(&(tag, value), &self.reader.decoder())); col_number += 1; }