From e630050bdd99b29e4945c2e249ff8126f4cc8503 Mon Sep 17 00:00:00 2001 From: olabusayoT <50379531+olabusayoT@users.noreply.github.com> Date: Thu, 2 Apr 2026 15:54:20 -0400 Subject: [PATCH 1/7] Change TDMLRunner to use XMLTextInfosetInputter/Outputter as default - it still uses scala results for certain things so we expose getScalaResult in the TDML Inputters/Outputters - Update TDML Schema to add support for custom validation name/type and use in stringAsXML tests - Drop whitespace between elements to keep expected matching actual, but keep all others like mixed whitespace, attributes, comments unchanged - Introduced tests for `stringAsXML` validation and namespace handling. - Added a `noNormalizations` flag to control whether comments/processing instructions are normalized. - Updated associated XML parsing methods and test cases to support the new option. - Revised whitespace removal to handle specific scenarios for improved XML processing. - Verify prefixes resolve to the same namespaces when checking prefixes - update TDMLException with more information on why getSimpleText isn't matching - NullInfosetInputter should be received UTF-8 bytes for its events Deprecation/Compatibility Instead of ScalaXMLInfosetInputter/Outputter being the default inputter/outputter for TDML Runner, it is now XMLTextInfosetInputter/Outputter which supports stringsAsXml feature DAFFODIL-2909 --- .../org/apache/daffodil/xsd/tdml.xsd | 20 ++- .../lib/xml/DaffodilConstructingLoader.scala | 33 +++-- .../daffodil/lib/xml/DaffodilXMLLoader.scala | 21 ++- .../apache/daffodil/lib/xml/XMLUtils.scala | 126 ++++++++++++++---- .../lib/xml/test/unit/TestXMLLoader.scala | 16 ++- .../org/apache/daffodil/tdml/TDMLRunner.scala | 2 +- .../tdml/DaffodilTDMLDFDLProcessor.scala | 15 ++- .../processor/tdml/TDMLInfosetInputter.scala | 24 ++-- .../processor/tdml/TDMLInfosetOutputter.scala | 66 ++++++--- ...e.daffodil.api.validation.ValidatorFactory | 17 +++ .../apache/daffodil/infoset/stringAsXML.tdml | 89 +++++++++++++ .../infoset/TestStringAsXmlTDML.scala | 37 +++++ .../infoset/TestStringAsXmlValidator.scala | 61 +++++++++ .../TestStringAsXmlValidatorFactory.scala | 33 +++++ 14 files changed, 485 insertions(+), 75 deletions(-) create mode 100644 daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory create mode 100644 daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml create mode 100644 daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala create mode 100644 daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala create mode 100644 daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala diff --git a/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd b/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd index 8a8441a1e2..0b6397666f 100644 --- a/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd +++ b/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd @@ -224,11 +224,21 @@ - - - - - + + + + + + + + + + + + + + + diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala index 8c1af3f61d..d0be20682b 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala @@ -94,13 +94,15 @@ object Position { * behavior of normalizing CRLF to LF, and solitary CR to LF. * Defaults to true. Should only be changed in special circumstances * as not normalizing CRLFs is non-standard for XML. - * + * @param noNormalizations True to not remove comments and processing instructions and to not normalize + * CRLF/CR to LF. This is used to keep the XML as close to the original as possible */ class DaffodilConstructingLoader private[xml] ( uri: URI, errorHandler: org.xml.sax.ErrorHandler, addPositionAttributes: Boolean, - normalizeCRLFtoLF: Boolean + normalizeCRLFtoLF: Boolean, + noNormalizations: Boolean ) extends ConstructingParser( { // Note: we must open the XML carefully since it might be in some non @@ -122,7 +124,13 @@ class DaffodilConstructingLoader private[xml] ( errorHandler: org.xml.sax.ErrorHandler, addPositionAttributes: Boolean = false ) = - this(uri, errorHandler, addPositionAttributes, normalizeCRLFtoLF = true) + this( + uri, + errorHandler, + addPositionAttributes, + normalizeCRLFtoLF = true, + noNormalizations = false + ) /** * Ensures that DOCTYPES aka DTDs, if encountered, are rejected. @@ -316,19 +324,26 @@ class DaffodilConstructingLoader private[xml] ( } /** - * Drops comments + * Drops comments if noNormalizations is false */ override def comment(pos: Int, s: String): Comment = { - // returning null drops comments - null + if (noNormalizations) { + super.comment(pos, s) + } else { + // returning null drops comments + null + } } /** - * Drops processing instructions + * Drops processing instructions if noNormalizations is false */ override def procInstr(pos: Int, target: String, txt: String) = { - // returning null drops processing instructions - null + if (noNormalizations) { + super.procInstr(pos, target, txt) + } else { // returning null drops processing instructions + null + } } private def parseXMLPrologAttributes( diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala index 0b32d1accc..7994465b71 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala @@ -702,14 +702,23 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) * @param optSchemaURI Optional URI for XML schema for the XML source document. * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements. * Defaults to false. + * @param noNormalizations True to not remove comments and processing instructions and to not normalize + * CRLF/CR to LF. This is used to keep the XML as close to the original as possible * @return an scala.xml.Node (Element actually) which is the document element of the source. */ def load( source: DaffodilSchemaSource, optSchemaURI: Option[URI], - addPositionAttributes: Boolean = false + addPositionAttributes: Boolean = false, + noNormalizations: Boolean = false ): scala.xml.Node = - load(source, optSchemaURI, addPositionAttributes, normalizeCRLFtoLF = true) + load( + source, + optSchemaURI, + addPositionAttributes, + normalizeCRLFtoLF = true, + noNormalizations + ) /** * package private constructor gives access to normalizeCRLFtoLF feature. @@ -720,13 +729,16 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) * Defaults to false. * @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. This should usually be true, * but some special case situations may require preservation of CRLF/CR. + * @param noNormalizations True to not remove comments and processing instructions and to not normalize + * CRLF/CR to LF. This is used to keep the XML as close to the original as possible * @return an scala.xml.Node (Element actually) which is the document element of the source. */ private[xml] def load( source: DaffodilSchemaSource, optSchemaURI: Option[URI], addPositionAttributes: Boolean, - normalizeCRLFtoLF: Boolean + normalizeCRLFtoLF: Boolean, + noNormalizations: Boolean ): scala.xml.Node = { // // First we invoke the validator to explicitly validate the XML against @@ -819,7 +831,8 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) source.uriForLoading, errorHandler, addPositionAttributes, - normalizeCRLFtoLF + normalizeCRLFtoLF, + noNormalizations ) val res = try { diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index da67ac01bb..de6144050a 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -42,6 +42,7 @@ import org.apache.daffodil.lib.iapi.URISchemaSource import org.apache.daffodil.lib.schema.annotation.props.LookupLocation import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.Misc +import org.apache.daffodil.runtime1.infoset.XMLTextInfoset import org.apache.commons.io.IOUtils import org.xml.sax.XMLReader @@ -644,34 +645,70 @@ object XMLUtils { private def removeMixedWhitespace(ns: Node): Node = { if (!ns.isInstanceOf[Elem]) return ns - val e = ns.asInstanceOf[Elem] - val children = e.child - val noMixedChildren = - if (children.exists(_.isInstanceOf[Elem])) { - children - .filter { - case Text(data) if data.matches("""\s*""") => false - case Text(data) => - throw new Exception("Element %s contains mixed data: %s".format(e.label, data)) - case _ => true - } - .map(removeMixedWhitespace) - } else { - children.filter { + + def dropWhitespace(e: Node): Node = { + val children = e.child + val noWhitespace = children + .filter { // // So this is a bit strange, but we're dropping nodes that are Empty String. // // In XML we cannot tell where there is a Text("") child, from with Nil children // case Text("") => false // drop empty strings + case Text(data) if data.matches("""\s*""") => false case _ => true } + .map(dropWhitespace) + e match { + case elem: Elem => elem.copy(child = noWhitespace) + case _ => e } + } + + ns match { + case e @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + dropWhitespace(e) + case _ => { + val e = ns.asInstanceOf[Elem] + val children = e.child + val noMixedChildren = + if (children.exists(_.isInstanceOf[Elem])) { + children + .filter { + case Text(data) if data.matches("""\s*""") => false + case Text(data) => + throw new Exception( + "Element %s contains mixed data: %s".format(e.label, data) + ) + case _ => true + } + .map(removeMixedWhitespace) + } else { + children.filter { + // + // So this is a bit strange, but we're dropping nodes that are Empty String. + // + // In XML we cannot tell where there is a Text("") child, from with Nil children + // + case Text("") => false // drop empty strings + case _ => true + } + } + + val res = + if (noMixedChildren eq children) e + else e.copy(child = noMixedChildren) + res + } + } - val res = - if (noMixedChildren eq children) e - else e.copy(child = noMixedChildren) - res } /** @@ -700,6 +737,15 @@ object XMLUtils { ): NodeSeq = { val res = n match { + case e @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + e + case e @ Elem(prefix, label, attributes, scope, children*) => { val filteredScope = if (ns.length > 0) filterScope(scope, ns) else xml.TopScope @@ -804,11 +850,23 @@ object XMLUtils { * - Removes unnecessary whitespace */ def normalize(n: Node): Node = { - val noComments = removeComments(n) - val noPCData = convertPCDataToText(noComments) - val combinedText = coalesceAllAdjacentTextNodes(noPCData) - val noMixedWS = removeMixedWhitespace(combinedText) - noMixedWS + n match { + case x @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + x + case _ => { + val noComments = removeComments(n) + val noPCData = convertPCDataToText(noComments) + val combinedText = coalesceAllAdjacentTextNodes(noPCData) + val noMixedWS = removeMixedWhitespace(combinedText) + noMixedWS + } + } } class XMLDifferenceException(message: String) extends Exception(message) @@ -973,6 +1031,28 @@ Differences were (path, expected, actual): } else if (checkPrefixes && prefixA != prefixB) { // different prefix List((zPath + "/" + labelA + "@prefix", prefixA, prefixB)) + } else if ( + checkPrefixes && prefixA != null && a.getNamespace(prefixA) != nsbA.getURI(prefixA) + ) { + // prefix doesn't resolve to namespace + List( + ( + zPath + "/" + labelA + "@prefix-uri", + nsbA.getURI(prefixA), + a.getNamespace(prefixA) + ) + ) + } else if ( + checkPrefixes && prefixB != null && b.getNamespace(prefixB) != nsbB.getURI(prefixB) + ) { + // prefix doesn't resolve to namespace + List( + ( + zPath + "/" + labelA + "@prefix-uri", + nsbB.getURI(prefixB), + b.getNamespace(prefixB) + ) + ) } else if (checkNamespaces && mappingsA != mappingsB) { // different namespace bindings List((zPath + "/" + labelA + "@xmlns", mappingsA, mappingsB)) diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala index 3cb5caeb3e..2f4f9a827b 100644 --- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala +++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala @@ -172,9 +172,21 @@ class TestXMLLoader { // and toString will print them out into the text with the preserved. // val xmlFromDafLoaderNonNormalized = - loader.load(ss, None, addPositionAttributes = false, normalizeCRLFtoLF = false) + loader.load( + ss, + None, + addPositionAttributes = false, + normalizeCRLFtoLF = false, + noNormalizations = false + ) val xmlFromDafLoaderNormalized = - loader.load(ss, None, addPositionAttributes = false, normalizeCRLFtoLF = true) + loader.load( + ss, + None, + addPositionAttributes = false, + normalizeCRLFtoLF = true, + noNormalizations = false + ) { // compare to the regular scala XML loader diff --git a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala index 0549c61ac1..cbf96e2d9a 100644 --- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala +++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala @@ -2800,7 +2800,7 @@ case class DFDLInfoset(di: Node, parent: Infoset) { val testSuite = testCase.parent val before = testSuite.loadingExceptions.clone() - val elem = loader.load(infosetSrc, None) // no schema + val elem = loader.load(infosetSrc, None, noNormalizations = true) // no schema // // TODO: DAFFODIL-288 validate the infoset also // You can pass the optDataSchema, which appears to be the correct thing diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala index 696a1ab8e8..9d043092d0 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala @@ -173,7 +173,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( private def blobPrefix = "" private def blobSuffix = ".bin" - private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "scala") + private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "xml") override def withTracing(bool: Boolean): DaffodilTDMLDFDLProcessor = { copy(dp = newTracing(bool)) @@ -238,7 +238,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( ): TDMLUnparseResult = { val dafpr = parseResult.asInstanceOf[DaffodilTDMLParseResult] val inputter = dafpr.inputter - val resNode = dafpr.getResult + val resNode = dafpr.getScalaResult unparse(inputter, resNode, outStream) } @@ -268,8 +268,10 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( ): TDMLParseResult = { val outputter = if (tdmlApiInfosetsEnv == "all") { TDMLInfosetOutputterAll() - } else { + } else if (tdmlApiInfosetsEnv == "scala") { TDMLInfosetOutputterScala() + } else { + TDMLInfosetOutputterXML() } outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix) @@ -308,7 +310,10 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( xri.parse(sis) if (!actual.isError && !errorHandler.isError) { - verifySameParseOutput(outputter.xmlStream, saxOutputStream) + val actualOutputArray = outputter.getScalaResult.toString.getBytes("UTF-8") + val baos = new ByteArrayOutputStream(actualOutputArray.length) + baos.write(actualOutputArray) + verifySameParseOutput(baos, saxOutputStream) } val dpParseDiag = actual.getDiagnostics.asScala.map(_.toString()).toSeq val saxParseDiag = errorHandler.getDiagnostics.asScala.map(_.toString()).toSeq @@ -392,7 +397,6 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( XMLUtils.compareAndReport( dpParseXMLNodeOutput, saxParseXMLNodeOutput, - checkNamespaces = true, checkPrefixes = true ) } catch { @@ -433,6 +437,7 @@ final class DaffodilTDMLParseResult(actual: ParseResult, outputter: TDMLInfosetO extends TDMLParseResult { override def getResult: Node = outputter.getResult + def getScalaResult: Node = outputter.getScalaResult override def getBlobPaths: java.util.List[Path] = outputter.getBlobPaths diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala index 518f38961b..9ea2a7d9d9 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala @@ -27,32 +27,31 @@ import org.apache.daffodil.lib.util.Misc import org.apache.daffodil.lib.xml.XMLUtils import org.apache.daffodil.runtime1.dpath.NodeInfo import org.apache.daffodil.runtime1.infoset.JsonInfosetInputter -import org.apache.daffodil.runtime1.infoset.ScalaXMLInfosetInputter import org.apache.daffodil.tdml.TDMLException class TDMLInfosetInputter( - val scalaInputter: ScalaXMLInfosetInputter, + val inputter: api.infoset.InfosetInputter, others: Seq[api.infoset.InfosetInputter] ) extends api.infoset.InfosetInputter { private def implString: String = "daffodil" override def getEventType(): InfosetInputterEventType = { - val res = scalaInputter.getEventType() + val res = inputter.getEventType() if (!others.forall(_.getEventType() == res)) throw TDMLException("getEventType does not match", Some(implString)) res } override def getLocalName(): String = { - val res = scalaInputter.getLocalName() + val res = inputter.getLocalName() if (!others.forall(_.getLocalName() == res)) throw TDMLException("getLocalName does not match", Some(implString)) res } override def getNamespaceURI(): String = { - val res = scalaInputter.getNamespaceURI() + val res = inputter.getNamespaceURI() val resIsEmpty = res == null || res == "" val othersMatch = others.forall { i => if (!i.getSupportsNamespaces) { @@ -74,7 +73,7 @@ class TDMLInfosetInputter( primType: NodeInfo.Kind, runtimeProperties: java.util.Map[String, String] ): String = { - val res = scalaInputter.getSimpleText(primType, runtimeProperties) + val res = inputter.getSimpleText(primType, runtimeProperties) val resIsEmpty = res == null || res == "" val otherStrings = others.map { i => // Note in an unparserTestCase, there are no others (infoset inputters), because the input infoset is @@ -100,7 +99,10 @@ class TDMLInfosetInputter( } if (!othersmatch) - throw TDMLException("getSimpleText does not match", Some(implString)) + throw TDMLException( + s"getSimpleText does not match for $res ${others.zip(otherStrings).mkString("\n")}", + Some(implString) + ) if (primType.isInstanceOf[NodeInfo.AnyURI.Kind]) { try { @@ -126,26 +128,26 @@ class TDMLInfosetInputter( } override def isNilled(): JBoolean = { - val res = scalaInputter.isNilled() + val res = inputter.isNilled() if (!others.forall(_.isNilled() == res)) throw TDMLException("isNilled does not match", Some(implString)) res } override def hasNext(): Boolean = { - val res = scalaInputter.hasNext() + val res = inputter.hasNext() if (!others.forall(_.hasNext() == res)) throw TDMLException("hasNext does not match", Some(implString)) res } override def next(): Unit = { - scalaInputter.next() + inputter.next() others.foreach(_.next()) } override def fini(): Unit = { - scalaInputter.fini() + inputter.fini() others.foreach(_.fini()) } diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala index cf913d6877..6f185c7b01 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala @@ -19,7 +19,6 @@ package org.apache.daffodil.processor.tdml import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream -import java.nio.charset.Charset import scala.xml.Node import org.apache.daffodil.api @@ -36,32 +35,62 @@ import org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter -class TDMLInfosetOutputterScala(scalaOut: ScalaXMLInfosetOutputter) - extends TeeInfosetOutputter(Seq(scalaOut)*) +class TDMLInfosetOutputterScala( + scalaOut: ScalaXMLInfosetOutputter, + override val xmlStream: ByteArrayOutputStream, + xmlOut: XMLTextInfosetOutputter +) extends TeeInfosetOutputter(Seq(scalaOut, xmlOut)*) with TDMLInfosetOutputter { - override def getResult: Node = scalaOut.getResult() - - override lazy val xmlStream: ByteArrayOutputStream = { - val bos = new ByteArrayOutputStream() - bos.write(getResult.toString().getBytes(Charset.defaultCharset())) - bos - } + override def getResult: Node = + scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray)) override def toInfosetInputter: TDMLInfosetInputter = { val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult()) new TDMLInfosetInputter(scalaIn, Seq()) } + + override def getScalaResult: Node = scalaOut.getResult() } object TDMLInfosetOutputterScala { def apply(): TDMLInfosetOutputterScala = { val scalaOut = new ScalaXMLInfosetOutputter() scalaOut.setIncludeDataType(true) - new TDMLInfosetOutputterScala(scalaOut) + val baos = new ByteArrayOutputStream() + val xmlOut = new XMLTextInfosetOutputter(baos, true) + new TDMLInfosetOutputterScala(scalaOut, baos, xmlOut) + } +} + +object TDMLInfosetOutputterXML { + def apply(): TDMLInfosetOutputterXML = { + val baos = new ByteArrayOutputStream() + val xmlOut = new XMLTextInfosetOutputter(baos, true) + xmlOut.setIncludeDataType(true) + val scalaOut = new ScalaXMLInfosetOutputter() + new TDMLInfosetOutputterXML(baos, xmlOut, scalaOut) } } +class TDMLInfosetOutputterXML( + override val xmlStream: ByteArrayOutputStream, + xmlOut: XMLTextInfosetOutputter, + scalaOut: ScalaXMLInfosetOutputter +) extends TeeInfosetOutputter(Seq(xmlOut, scalaOut)*) + with TDMLInfosetOutputter { + + override def getResult: Node = + scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray)) + + override def toInfosetInputter: TDMLInfosetInputter = { + val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray)) + new TDMLInfosetInputter(xmlIn, Seq()) + } + + override def getScalaResult: Node = scalaOut.getResult() +} + class TDMLInfosetOutputterAll( jsonStream: ByteArrayOutputStream, override val xmlStream: ByteArrayOutputStream, @@ -73,7 +102,9 @@ class TDMLInfosetOutputterAll( ) extends TeeInfosetOutputter(Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)*) with TDMLInfosetOutputter { - override def getResult: Node = scalaOut.getResult() + def getScalaResult: Node = scalaOut.getResult() + override def getResult: Node = + scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray)) override def toInfosetInputter: TDMLInfosetInputter = { val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult()) @@ -82,10 +113,14 @@ class TDMLInfosetOutputterAll( val jsonIn = new JsonInfosetInputter(new ByteArrayInputStream(jsonStream.toByteArray)) val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray)) val nullIn = { - val events = NullInfosetInputter.toEvents(new ByteArrayInputStream(xmlStream.toByteArray)) + val events = NullInfosetInputter.toEvents( + new ByteArrayInputStream( + scalaOut.getResult().toString().getBytes("UTF-8") + ) + ) new NullInfosetInputter(events) } - new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn, nullIn)) + new TDMLInfosetInputter(xmlIn, Seq(jdomIn, w3cdomIn, jsonIn, scalaIn, nullIn)) } } @@ -98,7 +133,7 @@ object TDMLInfosetOutputterAll { val jdomOut = new JDOMInfosetOutputter() val w3cdomOut = new W3CDOMInfosetOutputter() val jsonOut = new JsonInfosetOutputter(jsonStream, false) - val xmlOut = new XMLTextInfosetOutputter(xmlStream, false) + val xmlOut = new XMLTextInfosetOutputter(xmlStream, true) Seq(scalaOut, jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out => out.setIncludeDataType(true) @@ -121,6 +156,7 @@ trait TDMLInfosetOutputter extends api.infoset.InfosetOutputter { def xmlStream: ByteArrayOutputStream def getResult: Node + def getScalaResult: Node def toInfosetInputter: TDMLInfosetInputter } diff --git a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory new file mode 100644 index 0000000000..047377250c --- /dev/null +++ b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.daffodil.infoset.TestStringAsXmlNamespacedValidatorFactory +org.apache.daffodil.infoset.TestStringAsXmlNoNamespaceValidatorFactory diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml new file mode 100644 index 0000000000..04a2269769 --- /dev/null +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml @@ -0,0 +1,89 @@ + + + + + + + + stringAsXml/namespaced/binMessage_01.dat + + + stringAsXml/namespaced/binMessage_01.dat.xml + + + + + + stringAsXml/namespaced/binMessage_01.dat + + + stringAsXml/namespaced/binMessage_01.dat.xml + + + Element 'xmlStr' is a simple type + + + + + + stringAsXml/namespaced/binMessage_03.dat + + + Unexpected character + + + + + + stringAsXml/namespaced/binMessage_08.dat + + + Undeclared general entity "name" + + + + + + + stringAsXml/nonamespace/binMessage_01.dat + + + stringAsXml/nonamespace/binMessage_01.dat.xml + + + Value '=invalid field' is not facet-valid + + + diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala new file mode 100644 index 0000000000..62f9392338 --- /dev/null +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import org.apache.daffodil.junit.tdml.TdmlSuite +import org.apache.daffodil.junit.tdml.TdmlTests + +import org.junit.Test + +object TestStringAsXmlTDML extends TdmlSuite { + val tdmlResource = "/org/apache/daffodil/infoset/stringAsXML.tdml" +} + +class TestStringAsXmlTDML extends TdmlTests { + val tdmlSuite = TestStringAsXmlTDML + + @Test def stringAsXml_01_a = test + @Test def stringAsXml_01_b = test + @Test def stringAsXml_04 = test + @Test def stringAsXml_09 = test + @Test def stringAsXml_10 = test +} diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala new file mode 100644 index 0000000000..658baf0ea7 --- /dev/null +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.infoset + +import java.io.InputStream +import java.net.URL + +import org.apache.daffodil.api.validation.ValidationHandler +import org.apache.daffodil.api.validation.Validator +import org.apache.daffodil.lib.util.Misc +import org.apache.daffodil.validation.XercesValidator + +object TestStringAsXmlNamespacedValidator { + val name = "TestStringAsXmlNamespacedValidator" +} + +class TestStringAsXmlNamespacedValidator extends Validator { + + val schemaURL: URL = Misc + .getRequiredResource( + "/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd" + ) + .toURL + + override def validateXML(document: InputStream, vh: ValidationHandler): Unit = { + val v = XercesValidator.fromURL(schemaURL) + v.validateXML(document, vh) + } +} + +object TestStringAsXmlNoNamespaceValidator { + val name = "TestStringAsXmlNoNamespaceValidator" +} + +class TestStringAsXmlNoNamespaceValidator extends Validator { + + val schemaURL: URL = Misc + .getRequiredResource( + "/org/apache/daffodil/infoset/stringAsXml/nonamespace/xsd/binMessageWithXmlPayload.xsd" + ) + .toURL + + override def validateXML(document: InputStream, vh: ValidationHandler): Unit = { + val v = XercesValidator.fromURL(schemaURL) + v.validateXML(document, vh) + } +} diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala new file mode 100644 index 0000000000..e4fb252c94 --- /dev/null +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.infoset + +import java.util.Properties + +import org.apache.daffodil.api.validation.ValidatorFactory + +class TestStringAsXmlNamespacedValidatorFactory extends ValidatorFactory { + override def name: String = TestStringAsXmlNamespacedValidator.name + + override def make(config: Properties) = new TestStringAsXmlNamespacedValidator +} + +class TestStringAsXmlNoNamespaceValidatorFactory extends ValidatorFactory { + override def name: String = TestStringAsXmlNoNamespaceValidator.name + + override def make(config: Properties) = new TestStringAsXmlNoNamespaceValidator +} From 0ac13c9c4f4dd5a775d725f08b21522b6b506d24 Mon Sep 17 00:00:00 2001 From: olabusayoT <50379531+olabusayoT@users.noreply.github.com> Date: Tue, 14 Apr 2026 14:59:21 -0400 Subject: [PATCH 2/7] fixup! - Removed unneeded `TDMLInfosetOutputterScala`. - Enhanced `DaffodilXMLLoader` and related classes with `removeComments` and `removeProcInstr` flags, replacing too-broad `noNormalizations`. - Updated `TestStringAsXmlValidator` with a unified validator for namespace and non-namespace cases. - add comments for clarity - remove getScalaResult from all but TDMLInfosetOutputterAll - add clarifying info to TDMLInfosetInputter TDML Exceptions in case of non-matches - remove intermingling of ScalaInfosetOutputter with TDMLInfosetOutterXML - Turn off pretty printing from XMLTextInfosetOutputter in TDMLInfosetOutputterAll - undo type aware changes for ScalaXMLInfosetOutputter (the way it was adding namespace bindings for scalaXML was not exactly correct as it wasn't part of the child element's minimized scope. We found it would be too much trouble to implement correctly for scalaXML, so we decided to remove the functionality) DAFFODIL-2909 --- .../lib/xml/DaffodilConstructingLoader.scala | 30 +++-- .../daffodil/lib/xml/DaffodilXMLLoader.scala | 42 ++----- .../apache/daffodil/lib/xml/XMLUtils.scala | 115 +++++++++--------- .../infoset/ScalaXMLInfosetOutputter.scala | 25 +--- .../lib/xml/test/unit/TestXMLLoader.scala | 6 +- .../lib/xml/test/unit/TestXMLUtils.scala | 11 ++ .../org/apache/daffodil/tdml/TDMLRunner.scala | 8 +- .../tdml/DaffodilTDMLDFDLProcessor.scala | 21 +++- .../processor/tdml/TDMLInfosetInputter.scala | 27 +++- .../processor/tdml/TDMLInfosetOutputter.scala | 45 +------ .../apache/daffodil/cliTest/TestCLITdml.scala | 2 +- ...e.daffodil.api.validation.ValidatorFactory | 3 +- .../apache/daffodil/infoset/stringAsXML.tdml | 15 +-- .../section07/variables/variables_01.tdml | 2 +- .../infoset/TestStringAsXmlValidator.scala | 39 +++--- .../TestStringAsXmlValidatorFactory.scala | 33 ----- 16 files changed, 175 insertions(+), 249 deletions(-) delete mode 100644 daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala index d0be20682b..a8c97963cd 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala @@ -94,15 +94,16 @@ object Position { * behavior of normalizing CRLF to LF, and solitary CR to LF. * Defaults to true. Should only be changed in special circumstances * as not normalizing CRLFs is non-standard for XML. - * @param noNormalizations True to not remove comments and processing instructions and to not normalize - * CRLF/CR to LF. This is used to keep the XML as close to the original as possible + * @param removeComments True to remove comments. This is used to keep the XML as close to the original as possible + * @param removeProcInstr True to remove processing instructions. This is used to keep the XML as close to the original as possible */ class DaffodilConstructingLoader private[xml] ( uri: URI, errorHandler: org.xml.sax.ErrorHandler, addPositionAttributes: Boolean, normalizeCRLFtoLF: Boolean, - noNormalizations: Boolean + removeComments: Boolean, + removeProcInstr: Boolean ) extends ConstructingParser( { // Note: we must open the XML carefully since it might be in some non @@ -129,7 +130,8 @@ class DaffodilConstructingLoader private[xml] ( errorHandler, addPositionAttributes, normalizeCRLFtoLF = true, - noNormalizations = false + removeComments = true, + removeProcInstr = true ) /** @@ -324,25 +326,29 @@ class DaffodilConstructingLoader private[xml] ( } /** - * Drops comments if noNormalizations is false + * Drops comments if removeComments is true + * + * This is optional controlled by a constructor parameter. */ override def comment(pos: Int, s: String): Comment = { - if (noNormalizations) { - super.comment(pos, s) - } else { + if (removeComments) { // returning null drops comments null + } else { + super.comment(pos, s) } } /** - * Drops processing instructions if noNormalizations is false + * Drops processing instructions if removeProcInstr is false + * + * This is optional controlled by a constructor parameter. */ override def procInstr(pos: Int, target: String, txt: String) = { - if (noNormalizations) { - super.procInstr(pos, target, txt) - } else { // returning null drops processing instructions + if (removeProcInstr) { // returning null drops processing instructions null + } else { + super.procInstr(pos, target, txt) } } diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala index 7994465b71..c250dfcd03 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala @@ -702,43 +702,20 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) * @param optSchemaURI Optional URI for XML schema for the XML source document. * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements. * Defaults to false. - * @param noNormalizations True to not remove comments and processing instructions and to not normalize - * CRLF/CR to LF. This is used to keep the XML as close to the original as possible - * @return an scala.xml.Node (Element actually) which is the document element of the source. - */ - def load( - source: DaffodilSchemaSource, - optSchemaURI: Option[URI], - addPositionAttributes: Boolean = false, - noNormalizations: Boolean = false - ): scala.xml.Node = - load( - source, - optSchemaURI, - addPositionAttributes, - normalizeCRLFtoLF = true, - noNormalizations - ) - - /** - * package private constructor gives access to normalizeCRLFtoLF feature. - * - * @param source The URI for the XML document which may be a XML or DFDL schema, or just XML data. - * @param optSchemaURI Optional URI for XML schema for the XML source document. - * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements. - * Defaults to false. * @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. This should usually be true, * but some special case situations may require preservation of CRLF/CR. - * @param noNormalizations True to not remove comments and processing instructions and to not normalize - * CRLF/CR to LF. This is used to keep the XML as close to the original as possible + * @param removeComments True to remove comments. This is used to keep the XML as close to the original as possible + * @param removeProcInstr True to remove processing instructions. This is used to keep the XML as close to the original as possible + * * @return an scala.xml.Node (Element actually) which is the document element of the source. */ - private[xml] def load( + def load( source: DaffodilSchemaSource, optSchemaURI: Option[URI], - addPositionAttributes: Boolean, - normalizeCRLFtoLF: Boolean, - noNormalizations: Boolean + addPositionAttributes: Boolean = false, + normalizeCRLFtoLF: Boolean = true, + removeComments: Boolean = true, + removeProcInstr: Boolean = true ): scala.xml.Node = { // // First we invoke the validator to explicitly validate the XML against @@ -832,7 +809,8 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) errorHandler, addPositionAttributes, normalizeCRLFtoLF, - noNormalizations + removeComments, + removeProcInstr ) val res = try { diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index de6144050a..b08a594010 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -600,6 +600,14 @@ object XMLUtils { def removeComments(e: Node): Node = { e match { + case x @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + x case Elem(prefix, label, attribs, scope, child*) => { val newChildren = child.filterNot { _.isInstanceOf[Comment] }.map { removeComments(_) } Elem(prefix, label, attribs, scope, true, newChildren*) @@ -646,35 +654,31 @@ object XMLUtils { private def removeMixedWhitespace(ns: Node): Node = { if (!ns.isInstanceOf[Elem]) return ns - def dropWhitespace(e: Node): Node = { - val children = e.child - val noWhitespace = children - .filter { - // - // So this is a bit strange, but we're dropping nodes that are Empty String. - // - // In XML we cannot tell where there is a Text("") child, from with Nil children - // - case Text("") => false // drop empty strings - case Text(data) if data.matches("""\s*""") => false - case _ => true - } - .map(dropWhitespace) - e match { - case elem: Elem => elem.copy(child = noWhitespace) - case _ => e - } - } - ns match { + // NOTE: this is specifically for the stringAsXml feature as we avoid + // making changes to any of its children except removing any surrounding + // whitespace, requiring that stringAsXml in the infoset match results exactly. case e @ Elem( null, XMLTextInfoset.stringAsXml, Null, NamespaceBinding(null, null | "", _), _* - ) => - dropWhitespace(e) + ) => { + val (elemChildren, nonElemChildren) = e.child.partition { + _.isInstanceOf[Elem] + } + if (elemChildren.length != 1) + throw new Exception("stringAsXml must contain a single child element.") + nonElemChildren.foreach { + case Text(data) if data.matches("""\s*""") => // no-op, empty text siblings are fine + case x => + throw new Exception( + "%s is some kind of mixed content not allowed as a stringAsXml child".format(x) + ) + } + e.asInstanceOf[Elem].copy(child = elemChildren) + } case _ => { val e = ns.asInstanceOf[Elem] val children = e.child @@ -850,23 +854,11 @@ object XMLUtils { * - Removes unnecessary whitespace */ def normalize(n: Node): Node = { - n match { - case x @ Elem( - null, - XMLTextInfoset.stringAsXml, - Null, - NamespaceBinding(null, null | "", _), - _* - ) => - x - case _ => { - val noComments = removeComments(n) - val noPCData = convertPCDataToText(noComments) - val combinedText = coalesceAllAdjacentTextNodes(noPCData) - val noMixedWS = removeMixedWhitespace(combinedText) - noMixedWS - } - } + val noComments = removeComments(n) + val noPCData = convertPCDataToText(noComments) + val combinedText = coalesceAllAdjacentTextNodes(noPCData) + val noMixedWS = removeMixedWhitespace(combinedText) + noMixedWS } class XMLDifferenceException(message: String) extends Exception(message) @@ -1031,26 +1023,13 @@ Differences were (path, expected, actual): } else if (checkPrefixes && prefixA != prefixB) { // different prefix List((zPath + "/" + labelA + "@prefix", prefixA, prefixB)) - } else if ( - checkPrefixes && prefixA != null && a.getNamespace(prefixA) != nsbA.getURI(prefixA) - ) { - // prefix doesn't resolve to namespace + } else if (checkPrefixes && a.scope.getURI(prefixA) != b.scope.getURI(prefixB)) { + // prefixes doesn't resolve to same namespace List( ( - zPath + "/" + labelA + "@prefix-uri", - nsbA.getURI(prefixA), - a.getNamespace(prefixA) - ) - ) - } else if ( - checkPrefixes && prefixB != null && b.getNamespace(prefixB) != nsbB.getURI(prefixB) - ) { - // prefix doesn't resolve to namespace - List( - ( - zPath + "/" + labelA + "@prefix-uri", - nsbB.getURI(prefixB), - b.getNamespace(prefixB) + zPath + "/" + labelA + "@prefix-namespace", + a.scope.getURI(prefixA), + b.scope.getURI(prefixB) ) ) } else if (checkNamespaces && mappingsA != mappingsB) { @@ -1135,6 +1114,28 @@ Differences were (path, expected, actual): computeTextDiff(zPath, tA, tB, maybeType, maybeFloatEpsilon, maybeDoubleEpsilon) thisDiff } + case (cA: Comment, cB: Comment) => { + val thisDiff = computeTextDiff( + zPath, + cA.toString, + cB.toString, + maybeType, + maybeFloatEpsilon, + maybeDoubleEpsilon + ) + thisDiff + } + case (pcA: PCData, pcB: PCData) => { + val thisDiff = computeTextDiff( + zPath, + pcA.toString, + pcB.toString, + maybeType, + maybeFloatEpsilon, + maybeDoubleEpsilon + ) + thisDiff + } case (pA: ProcInstr, pB: ProcInstr) => { val ProcInstr(tA1label, tA1content) = pA val ProcInstr(tB1label, tB1content) = pB diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala index 4abe32f378..e30534fcc7 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala @@ -19,9 +19,7 @@ package org.apache.daffodil.runtime1.infoset import scala.collection.mutable.ListBuffer import scala.xml.MetaData -import scala.xml.NamespaceBinding import scala.xml.Null -import scala.xml.PrefixedAttribute import scala.xml.UnprefixedAttribute import org.apache.daffodil.api.DFDLPrimType @@ -56,16 +54,6 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) resultNode = Maybe(root(0)) } - private def getScope(diElem: DIElement): NamespaceBinding = { - val minScope = diElem.metadata.minimizedScope - // if including xsi:type is enabled, ensure the xsi namespace is defined on the root element - if (getIncludeDataType() && stack.length == 1 && minScope.getURI("xsi") == null) { - NamespaceBinding("xsi", XMLUtils.XSI_NAMESPACE, minScope) - } else { - minScope - } - } - private def getAttributes(diElem: DIElement): MetaData = { val nilAttr = if (diElem.isNilled) XMLUtils.xmlNilAttribute else Null val freedAttr = @@ -92,14 +80,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) } else { nilAttr } - val typedAttr = - if (getIncludeDataType() && diElem.isSimple) { - val primName = diElem.erd.optPrimType.get.name - new PrefixedAttribute("xsi", "type", "xs:" + primName, freedAttr) - } else { - freedAttr - } - typedAttr + freedAttr } override def startSimple(se: InfosetSimpleElement): Unit = { @@ -124,7 +105,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) diSimple.metadata.prefix, diSimple.metadata.name, attributes, - getScope(diSimple), + diSimple.metadata.minimizedScope, minimizeEmpty = true, children* ) @@ -149,7 +130,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) diComplex.metadata.prefix, diComplex.metadata.name, attributes, - getScope(diComplex), + diComplex.metadata.minimizedScope, minimizeEmpty = true, children* ) diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala index 2f4f9a827b..1471da3a3d 100644 --- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala +++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala @@ -176,16 +176,14 @@ class TestXMLLoader { ss, None, addPositionAttributes = false, - normalizeCRLFtoLF = false, - noNormalizations = false + normalizeCRLFtoLF = false ) val xmlFromDafLoaderNormalized = loader.load( ss, None, addPositionAttributes = false, - normalizeCRLFtoLF = true, - noNormalizations = false + normalizeCRLFtoLF = true ) { diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala index 3e70d7129d..2c9690f4b1 100644 --- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala +++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala @@ -98,6 +98,17 @@ class TestXMLUtils { assertEquals("ns2", b) } + @Test def testPrefixNSDiff(): Unit = { + // different prefix should error, even though the namespace is the same + val d1 = a + val d2 = a + val diffs = XMLUtils.computeDiff(d1, d2, checkPrefixes = true) + val Seq((path, a, b)) = diffs + assertEquals("/a@prefix-namespace", path) + assertEquals("someprefix", a) + assertEquals("someotherprefix", b) + } + @Test def testNamespaceDiff(): Unit = { // different namespace mappings should error val d1 = a diff --git a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala index cbf96e2d9a..f36a2a4909 100644 --- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala +++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala @@ -2800,7 +2800,13 @@ case class DFDLInfoset(di: Node, parent: Infoset) { val testSuite = testCase.parent val before = testSuite.loadingExceptions.clone() - val elem = loader.load(infosetSrc, None, noNormalizations = true) // no schema + val elem = loader.load( + infosetSrc, + None, + normalizeCRLFtoLF = false, + removeComments = false, + removeProcInstr = false + ) // no schema // // TODO: DAFFODIL-288 validate the infoset also // You can pass the optDataSchema, which appears to be the correct thing diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala index 9d043092d0..da0d134a67 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala @@ -41,6 +41,7 @@ import org.apache.daffodil.lib.util.MaybeULong import org.apache.daffodil.lib.xml.DaffodilSAXParserFactory import org.apache.daffodil.lib.xml.XMLUtils import org.apache.daffodil.lib.xml.XMLUtils.XMLDifferenceException +import org.apache.daffodil.processor.tdml import org.apache.daffodil.runtime1.iapi.* import org.apache.daffodil.runtime1.iapi.DFDL.DaffodilUnhandledSAXException import org.apache.daffodil.runtime1.iapi.DFDL.DaffodilUnparseContentHandler @@ -238,7 +239,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( ): TDMLUnparseResult = { val dafpr = parseResult.asInstanceOf[DaffodilTDMLParseResult] val inputter = dafpr.inputter - val resNode = dafpr.getScalaResult + val resNode = dafpr.getResult unparse(inputter, resNode, outStream) } @@ -268,8 +269,6 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( ): TDMLParseResult = { val outputter = if (tdmlApiInfosetsEnv == "all") { TDMLInfosetOutputterAll() - } else if (tdmlApiInfosetsEnv == "scala") { - TDMLInfosetOutputterScala() } else { TDMLInfosetOutputterXML() } @@ -310,7 +309,14 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( xri.parse(sis) if (!actual.isError && !errorHandler.isError) { - val actualOutputArray = outputter.getScalaResult.toString.getBytes("UTF-8") + // we use the scala result because both the ScalaInfosetOutputter and + // the SAXInfosetOutputter do not implement stringAsXml, + // which helps to avoid any differences cause by the stringAsXml conversions. + val actualOutputArray = outputter + .asInstanceOf[tdml.TDMLInfosetOutputterAll] + .getScalaResult + .toString + .getBytes("UTF-8") val baos = new ByteArrayOutputStream(actualOutputArray.length) baos.write(actualOutputArray) verifySameParseOutput(baos, saxOutputStream) @@ -397,6 +403,12 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( XMLUtils.compareAndReport( dpParseXMLNodeOutput, saxParseXMLNodeOutput, + // we no longer checkNamespaces because SAX outputs the same namespaces as + // the XMLTextInfosetOutputter but not the scalaXMLInfosetOutputter, so checking + // namespaces fails in the DAFFODIL_TDML_API_INFOSETS='all' case due to differences + // in the scalaXMLInfosetOutputter namespaces, probably having to do with + // minimizeScope issues + // checkNamespaces = true, checkPrefixes = true ) } catch { @@ -437,7 +449,6 @@ final class DaffodilTDMLParseResult(actual: ParseResult, outputter: TDMLInfosetO extends TDMLParseResult { override def getResult: Node = outputter.getResult - def getScalaResult: Node = outputter.getScalaResult override def getBlobPaths: java.util.List[Path] = outputter.getBlobPaths diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala index 9ea2a7d9d9..e9097b4413 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala @@ -39,14 +39,20 @@ class TDMLInfosetInputter( override def getEventType(): InfosetInputterEventType = { val res = inputter.getEventType() if (!others.forall(_.getEventType() == res)) - throw TDMLException("getEventType does not match", Some(implString)) + throw TDMLException( + s"getEventType does not match\n${others.zip(others.map(_.getEventType)).mkString("\n")}", + Some(implString) + ) res } override def getLocalName(): String = { val res = inputter.getLocalName() if (!others.forall(_.getLocalName() == res)) - throw TDMLException("getLocalName does not match", Some(implString)) + throw TDMLException( + s"getLocalName does not match\n${others.zip(others.map(_.getLocalName)).mkString("\n")}", + Some(implString) + ) res } @@ -65,7 +71,10 @@ class TDMLInfosetInputter( } } if (!othersMatch) - throw TDMLException("getNamespaceURI does not match", Some(implString)) + throw TDMLException( + s"getNamespaceURI does not match\n${others.filter(_.getSupportsNamespaces).map(o => (o, o.getNamespaceURI)).mkString("\n")}", + Some(implString) + ) res } @@ -100,7 +109,7 @@ class TDMLInfosetInputter( if (!othersmatch) throw TDMLException( - s"getSimpleText does not match for $res ${others.zip(otherStrings).mkString("\n")}", + s"getSimpleText does not match for $res\n${others.zip(otherStrings).mkString("\n")}", Some(implString) ) @@ -130,14 +139,20 @@ class TDMLInfosetInputter( override def isNilled(): JBoolean = { val res = inputter.isNilled() if (!others.forall(_.isNilled() == res)) - throw TDMLException("isNilled does not match", Some(implString)) + throw TDMLException( + s"isNilled does not match\n${others.zip(others.map(_.isNilled)).mkString("\n")}", + Some(implString) + ) res } override def hasNext(): Boolean = { val res = inputter.hasNext() if (!others.forall(_.hasNext() == res)) - throw TDMLException("hasNext does not match", Some(implString)) + throw TDMLException( + s"hasNext does not match\n${others.zip(others.map(_.hasNext)).mkString("\n")}", + Some(implString) + ) res } diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala index 6f185c7b01..a04262955f 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala @@ -35,49 +35,19 @@ import org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter -class TDMLInfosetOutputterScala( - scalaOut: ScalaXMLInfosetOutputter, - override val xmlStream: ByteArrayOutputStream, - xmlOut: XMLTextInfosetOutputter -) extends TeeInfosetOutputter(Seq(scalaOut, xmlOut)*) - with TDMLInfosetOutputter { - - override def getResult: Node = - scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray)) - - override def toInfosetInputter: TDMLInfosetInputter = { - val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult()) - new TDMLInfosetInputter(scalaIn, Seq()) - } - - override def getScalaResult: Node = scalaOut.getResult() -} - -object TDMLInfosetOutputterScala { - def apply(): TDMLInfosetOutputterScala = { - val scalaOut = new ScalaXMLInfosetOutputter() - scalaOut.setIncludeDataType(true) - val baos = new ByteArrayOutputStream() - val xmlOut = new XMLTextInfosetOutputter(baos, true) - new TDMLInfosetOutputterScala(scalaOut, baos, xmlOut) - } -} - object TDMLInfosetOutputterXML { def apply(): TDMLInfosetOutputterXML = { val baos = new ByteArrayOutputStream() - val xmlOut = new XMLTextInfosetOutputter(baos, true) + val xmlOut = new XMLTextInfosetOutputter(baos, false) xmlOut.setIncludeDataType(true) - val scalaOut = new ScalaXMLInfosetOutputter() - new TDMLInfosetOutputterXML(baos, xmlOut, scalaOut) + new TDMLInfosetOutputterXML(baos, xmlOut) } } class TDMLInfosetOutputterXML( override val xmlStream: ByteArrayOutputStream, - xmlOut: XMLTextInfosetOutputter, - scalaOut: ScalaXMLInfosetOutputter -) extends TeeInfosetOutputter(Seq(xmlOut, scalaOut)*) + xmlOut: XMLTextInfosetOutputter +) extends TeeInfosetOutputter(Seq(xmlOut)*) with TDMLInfosetOutputter { override def getResult: Node = @@ -87,8 +57,6 @@ class TDMLInfosetOutputterXML( val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray)) new TDMLInfosetInputter(xmlIn, Seq()) } - - override def getScalaResult: Node = scalaOut.getResult() } class TDMLInfosetOutputterAll( @@ -133,9 +101,9 @@ object TDMLInfosetOutputterAll { val jdomOut = new JDOMInfosetOutputter() val w3cdomOut = new W3CDOMInfosetOutputter() val jsonOut = new JsonInfosetOutputter(jsonStream, false) - val xmlOut = new XMLTextInfosetOutputter(xmlStream, true) + val xmlOut = new XMLTextInfosetOutputter(xmlStream, false) - Seq(scalaOut, jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out => + Seq(jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out => out.setIncludeDataType(true) } @@ -156,7 +124,6 @@ trait TDMLInfosetOutputter extends api.infoset.InfosetOutputter { def xmlStream: ByteArrayOutputStream def getResult: Node - def getScalaResult: Node def toInfosetInputter: TDMLInfosetInputter } diff --git a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala index 1f0e062f32..046fbe6fb2 100644 --- a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala +++ b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala @@ -45,7 +45,7 @@ class TestCLITdml { "daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml" ) - val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "scala") + val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "xml") runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli => // parse diff --git a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory index 047377250c..a12f49a78a 100644 --- a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory +++ b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory @@ -13,5 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -org.apache.daffodil.infoset.TestStringAsXmlNamespacedValidatorFactory -org.apache.daffodil.infoset.TestStringAsXmlNoNamespaceValidatorFactory +org.apache.daffodil.infoset.TestStringAsXmlValidatorFactory diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml index 04a2269769..7d71f77ef9 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml @@ -23,11 +23,11 @@ xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:ex="http://example.com" - defaultValidation="off"> + defaultValidation="on"> + validation="TestStringAsXmlValidator"> stringAsXml/namespaced/binMessage_01.dat @@ -37,8 +37,7 @@ + model="/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd"> stringAsXml/namespaced/binMessage_01.dat @@ -51,8 +50,7 @@ + model="/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd"> stringAsXml/namespaced/binMessage_03.dat @@ -62,8 +60,7 @@ + model="/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd"> stringAsXml/namespaced/binMessage_08.dat @@ -75,7 +72,7 @@ + validation="TestStringAsXmlValidator"> stringAsXml/nonamespace/binMessage_01.dat diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml index 6d636c6af1..545f47029f 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml @@ -48,7 +48,7 @@ - 42 + 42 diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala index 658baf0ea7..560329ad87 100644 --- a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala @@ -18,44 +18,33 @@ package org.apache.daffodil.infoset import java.io.InputStream import java.net.URL +import java.util.Properties import org.apache.daffodil.api.validation.ValidationHandler import org.apache.daffodil.api.validation.Validator -import org.apache.daffodil.lib.util.Misc +import org.apache.daffodil.api.validation.ValidatorFactory import org.apache.daffodil.validation.XercesValidator -object TestStringAsXmlNamespacedValidator { - val name = "TestStringAsXmlNamespacedValidator" +object TestStringAsXmlValidator { + val name = "TestStringAsXmlValidator" } -class TestStringAsXmlNamespacedValidator extends Validator { - - val schemaURL: URL = Misc - .getRequiredResource( - "/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd" - ) - .toURL +class TestStringAsXmlValidator(schemaURL: String) extends Validator { + private lazy val xercesValidator = XercesValidator.fromURL(new URL(schemaURL)) override def validateXML(document: InputStream, vh: ValidationHandler): Unit = { - val v = XercesValidator.fromURL(schemaURL) - v.validateXML(document, vh) + xercesValidator.validateXML(document, vh) } } -object TestStringAsXmlNoNamespaceValidator { - val name = "TestStringAsXmlNoNamespaceValidator" -} - -class TestStringAsXmlNoNamespaceValidator extends Validator { +class TestStringAsXmlValidatorFactory extends ValidatorFactory { - val schemaURL: URL = Misc - .getRequiredResource( - "/org/apache/daffodil/infoset/stringAsXml/nonamespace/xsd/binMessageWithXmlPayload.xsd" - ) - .toURL + override def name: String = TestStringAsXmlValidator.name - override def validateXML(document: InputStream, vh: ValidationHandler): Unit = { - val v = XercesValidator.fromURL(schemaURL) - v.validateXML(document, vh) + override def make(config: Properties) = { + val dfdlSchema = config.getProperty(name) + // assumes the validation XSD path is in the same as the DFDL schema but with a different suffix + val xsdSchema = dfdlSchema.replace(".dfdl.xsd", "WithXmlPayload.xsd") + new TestStringAsXmlValidator(xsdSchema) } } diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala deleted file mode 100644 index e4fb252c94..0000000000 --- a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.daffodil.infoset - -import java.util.Properties - -import org.apache.daffodil.api.validation.ValidatorFactory - -class TestStringAsXmlNamespacedValidatorFactory extends ValidatorFactory { - override def name: String = TestStringAsXmlNamespacedValidator.name - - override def make(config: Properties) = new TestStringAsXmlNamespacedValidator -} - -class TestStringAsXmlNoNamespaceValidatorFactory extends ValidatorFactory { - override def name: String = TestStringAsXmlNoNamespaceValidator.name - - override def make(config: Properties) = new TestStringAsXmlNoNamespaceValidator -} From 47912e48bc1b445aca61454a41c8b3a700f67e39 Mon Sep 17 00:00:00 2001 From: olabusayoT <50379531+olabusayoT@users.noreply.github.com> Date: Sat, 16 May 2026 15:54:43 -0400 Subject: [PATCH 3/7] fixup! fixup! - ensure stringAsXml file line endings are not normalized in windows - change generic exception to InvalidInfosetException --- .gitattributes | 4 +++- .../src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitattributes b/.gitattributes index b49c2777e5..4538f0d789 100644 --- a/.gitattributes +++ b/.gitattributes @@ -14,4 +14,6 @@ # limitations under the License. # Do not include KEYS in archived source releases -/KEYS export-ignore +/KEYS export-ignore +# ensure stringAsXml file line endings are not normalized in windows +/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/** binary \ No newline at end of file diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index b08a594010..b161ff862b 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -42,6 +42,7 @@ import org.apache.daffodil.lib.iapi.URISchemaSource import org.apache.daffodil.lib.schema.annotation.props.LookupLocation import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.Misc +import org.apache.daffodil.runtime1.infoset.InvalidInfosetException import org.apache.daffodil.runtime1.infoset.XMLTextInfoset import org.apache.commons.io.IOUtils @@ -669,7 +670,7 @@ object XMLUtils { _.isInstanceOf[Elem] } if (elemChildren.length != 1) - throw new Exception("stringAsXml must contain a single child element.") + throw new InvalidInfosetException("stringAsXml must contain a single child element.") nonElemChildren.foreach { case Text(data) if data.matches("""\s*""") => // no-op, empty text siblings are fine case x => From 6ddad8d791a3f73ddad82dc79a671476760b99f4 Mon Sep 17 00:00:00 2001 From: olabusayoT <50379531+olabusayoT@users.noreply.github.com> Date: Mon, 18 May 2026 17:12:43 -0400 Subject: [PATCH 4/7] fixup! fixup! fixup! - make XMLUtils normalize CRLFs on expected/actual normalization during comparison if it is not StringAsXML DEPRECATION/COMPATIBILITY This code checks the actual infoset for the presence of XMLTextInfoset.stringAsXML(currently stringAsXML) and won't normalize CRLF to LF in infosets that contain that element --- .gitattributes | 2 +- .../apache/daffodil/lib/xml/XMLUtils.scala | 46 +++++++++++++++++-- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/.gitattributes b/.gitattributes index 4538f0d789..51ddbe2634 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16,4 +16,4 @@ # Do not include KEYS in archived source releases /KEYS export-ignore # ensure stringAsXml file line endings are not normalized in windows -/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/** binary \ No newline at end of file +/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/** -text \ No newline at end of file diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index b161ff862b..9800fb4fb4 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -648,6 +648,43 @@ object XMLUtils { res } + /** + * normalizes CRLF to LF within text nodes in non-stringAsXML elements + */ + private def normalizeCRLFtoLF(ns:Node): Node = { + if (!ns.isInstanceOf[Elem]) return ns + + ns match { + // NOTE: this is specifically for the stringAsXml feature as we avoid + // making changes to any of its children requiring that stringAsXml in + // the infoset match results exactly. + case e @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => { + e + } + case _ => { + val e = ns.asInstanceOf[Elem] + val children = e.child + val normalized = children.map { + case Text(data) if data.contains("\r") => { + val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n") + Text(replaced) + } + case c => c + }.map(normalizeCRLFtoLF) + val res = + if (normalized eq children) e + else e.copy(child = normalized) + res + } + } + } + /** * removes insignificant whitespace from between elements */ @@ -859,7 +896,8 @@ object XMLUtils { val noPCData = convertPCDataToText(noComments) val combinedText = coalesceAllAdjacentTextNodes(noPCData) val noMixedWS = removeMixedWhitespace(combinedText) - noMixedWS + val noCRLFs = normalizeCRLFtoLF(noMixedWS) + noCRLFs } class XMLDifferenceException(message: String) extends Exception(message) @@ -900,11 +938,11 @@ Actual (attributes %s for diff) Differences were (path, expected, actual): %s""".format( (if (checkPrefixes || checkNamespaces) "compared for diff" - else "stripped"), + else "stripped"), (if (checkPrefixes || checkNamespaces) expected - else removeAttributes(expected).toString), + else removeAttributes(expected).toString), (if (checkPrefixes || checkNamespaces) "compared" - else "ignored"), + else "ignored"), actual, diffs.map { _.toString }.mkString("- ", "\n- ", "\n") ) From 1e8ba75e4eb376b7f29e9f58e5f9c80f98e37d0c Mon Sep 17 00:00:00 2001 From: olabusayoT <50379531+olabusayoT@users.noreply.github.com> Date: Mon, 18 May 2026 17:35:23 -0400 Subject: [PATCH 5/7] fixup! fixup! fixup! fixup! - reformat code --- .../apache/daffodil/lib/xml/XMLUtils.scala | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index 9800fb4fb4..8c761b543a 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -651,7 +651,7 @@ object XMLUtils { /** * normalizes CRLF to LF within text nodes in non-stringAsXML elements */ - private def normalizeCRLFtoLF(ns:Node): Node = { + private def normalizeCRLFtoLF(ns: Node): Node = { if (!ns.isInstanceOf[Elem]) return ns ns match { @@ -659,24 +659,26 @@ object XMLUtils { // making changes to any of its children requiring that stringAsXml in // the infoset match results exactly. case e @ Elem( - null, - XMLTextInfoset.stringAsXml, - Null, - NamespaceBinding(null, null | "", _), - _* - ) => { + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => { e } case _ => { val e = ns.asInstanceOf[Elem] val children = e.child - val normalized = children.map { - case Text(data) if data.contains("\r") => { - val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n") - Text(replaced) + val normalized = children + .map { + case Text(data) if data.contains("\r") => { + val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n") + Text(replaced) + } + case c => c } - case c => c - }.map(normalizeCRLFtoLF) + .map(normalizeCRLFtoLF) val res = if (normalized eq children) e else e.copy(child = normalized) @@ -938,11 +940,11 @@ Actual (attributes %s for diff) Differences were (path, expected, actual): %s""".format( (if (checkPrefixes || checkNamespaces) "compared for diff" - else "stripped"), + else "stripped"), (if (checkPrefixes || checkNamespaces) expected - else removeAttributes(expected).toString), + else removeAttributes(expected).toString), (if (checkPrefixes || checkNamespaces) "compared" - else "ignored"), + else "ignored"), actual, diffs.map { _.toString }.mkString("- ", "\n- ", "\n") ) From bd27e1583053d05a08c97b89dd06fd1780488457 Mon Sep 17 00:00:00 2001 From: olabusayoT <50379531+olabusayoT@users.noreply.github.com> Date: Wed, 20 May 2026 11:34:57 -0400 Subject: [PATCH 6/7] fixup! fixup! fixup! fixup! fixup! fixup! - Introduced `binMessageA` element and relevant schema changes, including `stringAsXmlGroupA` and updated XML payload references to test inline comment in stringAsXml. - Enhanced `XMLUtils` to streamline `CRLF` normalization and `stringAsXml` element identification. - Adjusted `.gitattributes` to prevent line ending normalization for specific test files. --- .gitattributes | 2 +- .../apache/daffodil/lib/xml/XMLUtils.scala | 91 ++++++++---------- .../apache/daffodil/infoset/stringAsXML.tdml | 12 +++ .../namespaced/binMessage_01.dat.xml | 3 + .../stringAsXml/namespaced/binMessage_01a.dat | Bin 0 -> 916 bytes .../namespaced/binMessage_01a.dat.xml | 41 ++++++++ .../namespaced/xsd/binMessage.dfdl.xsd | 28 +++++- .../xsd/binMessageWithXmlPayload.xsd | 27 ++++++ .../namespaced/xsd/stringAsXmlWrapper.xsd | 12 +++ .../stringAsXml/namespaced/xsd/xmlPayload.xsd | 8 ++ .../infoset/TestStringAsXmlTDML.scala | 1 + 11 files changed, 172 insertions(+), 53 deletions(-) create mode 100644 daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat create mode 100644 daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml diff --git a/.gitattributes b/.gitattributes index 51ddbe2634..cf1911d4fc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16,4 +16,4 @@ # Do not include KEYS in archived source releases /KEYS export-ignore # ensure stringAsXml file line endings are not normalized in windows -/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/** -text \ No newline at end of file +/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml -text \ No newline at end of file diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index 8c761b543a..fb0b706fbe 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -601,14 +601,7 @@ object XMLUtils { def removeComments(e: Node): Node = { e match { - case x @ Elem( - null, - XMLTextInfoset.stringAsXml, - Null, - NamespaceBinding(null, null | "", _), - _* - ) => - x + case x : Elem if isStringAsXmlElem(x) => x case Elem(prefix, label, attribs, scope, child*) => { val newChildren = child.filterNot { _.isInstanceOf[Comment] }.map { removeComments(_) } Elem(prefix, label, attribs, scope, true, newChildren*) @@ -648,42 +641,47 @@ object XMLUtils { res } + private def isStringAsXmlElem(ns: Node): Boolean = { + ns match { + case e @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => true + case _ => false + } + } + /** * normalizes CRLF to LF within text nodes in non-stringAsXML elements + * + * Some fields in infosets could contain LFs, but could be changed to CRLF + * in Windows due to git's autocrlf feature. And since infoset outputters + * always output LF we need to undo with git might do and normalize those CRLF's + * to LF. */ private def normalizeCRLFtoLF(ns: Node): Node = { - if (!ns.isInstanceOf[Elem]) return ns - ns match { // NOTE: this is specifically for the stringAsXml feature as we avoid // making changes to any of its children requiring that stringAsXml in // the infoset match results exactly. - case e @ Elem( - null, - XMLTextInfoset.stringAsXml, - Null, - NamespaceBinding(null, null | "", _), - _* - ) => { - e - } - case _ => { - val e = ns.asInstanceOf[Elem] + case e: Elem if isStringAsXmlElem(e) => e + case e: Elem => { val children = e.child - val normalized = children - .map { - case Text(data) if data.contains("\r") => { - val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n") - Text(replaced) - } - case c => c - } - .map(normalizeCRLFtoLF) - val res = + val normalized = children.map(normalizeCRLFtoLF) + val res = { if (normalized eq children) e else e.copy(child = normalized) + } res } + case Text(data) if data.contains("\r") => { + val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n") + Text(replaced) + } + case _ => ns } } @@ -692,19 +690,11 @@ object XMLUtils { */ private def removeMixedWhitespace(ns: Node): Node = { - if (!ns.isInstanceOf[Elem]) return ns - ns match { // NOTE: this is specifically for the stringAsXml feature as we avoid // making changes to any of its children except removing any surrounding // whitespace, requiring that stringAsXml in the infoset match results exactly. - case e @ Elem( - null, - XMLTextInfoset.stringAsXml, - Null, - NamespaceBinding(null, null | "", _), - _* - ) => { + case e: Elem if isStringAsXmlElem(e) => { val (elemChildren, nonElemChildren) = e.child.partition { _.isInstanceOf[Elem] } @@ -719,8 +709,7 @@ object XMLUtils { } e.asInstanceOf[Elem].copy(child = elemChildren) } - case _ => { - val e = ns.asInstanceOf[Elem] + case e: Elem => { val children = e.child val noMixedChildren = if (children.exists(_.isInstanceOf[Elem])) { @@ -751,8 +740,8 @@ object XMLUtils { else e.copy(child = noMixedChildren) res } + case _ => ns } - } /** @@ -1157,23 +1146,23 @@ Differences were (path, expected, actual): } case (cA: Comment, cB: Comment) => { val thisDiff = computeTextDiff( - zPath, + zPath + "/@comment", cA.toString, cB.toString, - maybeType, - maybeFloatEpsilon, - maybeDoubleEpsilon + None, + None, + None ) thisDiff } case (pcA: PCData, pcB: PCData) => { val thisDiff = computeTextDiff( - zPath, + zPath + "/@PCDATA", pcA.toString, pcB.toString, - maybeType, - maybeFloatEpsilon, - maybeDoubleEpsilon + None, + None, + None ) thisDiff } diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml index 7d71f77ef9..9e88bc3126 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml @@ -83,4 +83,16 @@ Value '=invalid field' is not facet-valid + + + + stringAsXml/namespaced/binMessage_01a.dat + + + stringAsXml/namespaced/binMessage_01a.dat.xml + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml index c54b830fc8..921df82034 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml @@ -1,4 +1,7 @@ + 1 diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat new file mode 100644 index 0000000000000000000000000000000000000000..1a31a12c3c5425b271dba091229a0c8adc10815c GIT binary patch literal 916 zcma)4L2lbH5bT++*sy?gl4ZGZP{K@O}$#{?u&CFw|hQoqrcbtOxp zllBmusNwF+aCc&IbA3b7Ol4D2ef{Ja_7u4qt_I=Q=CzF;pI^PWdtC-3PZ?sRaHVc2*V!R)JEsR zTGR%qtTV^lp)J(MhP@PJ!P2Y8?U&n!+s9q^3r|lB8tD$mH{f}}9q-^YI(NW97y;5k zC0kLTrgA9h8Ewr`bE<|(3Qy;|4jRe0DxlIx_HEB?t1Wut)ig(jst zff0N3uh3!wv(l}B!Hie+8Yl$t=l8#T+xroleQb;MFt@wih>`)>-g~UC94~|_rrK#3 zp4B__{T{ny^=Kv)?p_wquVi9RqjRhnS&=1Kt`_f&BW|SsQgazg^E!ZCYQ`_cjSMku zt<31bxjKoWrfKHQr@1!I(Q>hfqaR=IzU2o|@vw^@$EjnutxVjh78z}AB9-goJM!Xu gx*$tcN+tR{`#AfW)uodNt$1r`bbbOOG5J9L0<7~FZ~y=R literal 0 HcmV?d00001 diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml new file mode 100644 index 0000000000..c9b89ad5ad --- /dev/null +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml @@ -0,0 +1,41 @@ + + + + 1 + + + + + + with here is mixed content + spaces spaces and more mixed content + and more mixed content + + entity references: < > & " ' © + CR LF +CRLF +end + CR + LF + CRLF + =invalid field + + + + 5 + + + 2 + + + + + + 5 + + + + + 1 + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd index a7d456c16f..1f27c8dcdb 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd @@ -72,5 +72,31 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd index 3b46c663ca..3118b06280 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd @@ -50,6 +50,33 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd index 3c7cd0bffe..87ad24db76 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd @@ -77,6 +77,18 @@ validate XML embedded in data and subsequently embedded into the infoset. + + + + + + + + + + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd index 2f92d344c5..870d89b890 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd @@ -42,5 +42,13 @@ + + + + + + + + diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala index 62f9392338..0495d584c6 100644 --- a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala @@ -34,4 +34,5 @@ class TestStringAsXmlTDML extends TdmlTests { @Test def stringAsXml_04 = test @Test def stringAsXml_09 = test @Test def stringAsXml_10 = test + @Test def stringAsXml_11 = test } From ee366fbe64b50f86346ccd82c2f5fb6eebbc8d82 Mon Sep 17 00:00:00 2001 From: olabusayoT <50379531+olabusayoT@users.noreply.github.com> Date: Thu, 21 May 2026 13:11:26 -0400 Subject: [PATCH 7/7] fixup! fixup! fixup! fixup! fixup! fixup! fixup! - add new test files to rat - reformat code - add missing comment to test file --- .../org/apache/daffodil/lib/xml/XMLUtils.scala | 15 ++++++++------- .../stringAsXml/namespaced/binMessage_01.dat | Bin 821 -> 790 bytes .../namespaced/binMessage_01.dat.xml | 5 +---- .../namespaced/binMessage_01.dat.xml.dat | Bin 776 -> 790 bytes .../stringAsXml/namespaced/binMessage_01a.dat | Bin 916 -> 930 bytes .../namespaced/binMessage_01a.dat.xml | 2 +- project/Rat.scala | 6 ++++++ 7 files changed, 16 insertions(+), 12 deletions(-) diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index fb0b706fbe..d2a8179126 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -601,7 +601,7 @@ object XMLUtils { def removeComments(e: Node): Node = { e match { - case x : Elem if isStringAsXmlElem(x) => x + case x: Elem if isStringAsXmlElem(x) => x case Elem(prefix, label, attribs, scope, child*) => { val newChildren = child.filterNot { _.isInstanceOf[Comment] }.map { removeComments(_) } Elem(prefix, label, attribs, scope, true, newChildren*) @@ -644,12 +644,13 @@ object XMLUtils { private def isStringAsXmlElem(ns: Node): Boolean = { ns match { case e @ Elem( - null, - XMLTextInfoset.stringAsXml, - Null, - NamespaceBinding(null, null | "", _), - _* - ) => true + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + true case _ => false } } diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat index bdb4f0b65c4fc391ab49ae2e5840a76ea33fcb9e..c5b1045188d2b25cdf5e23046c69b2601944dc20 100644 GIT binary patch delta 280 zcmdnWHjT~8kju=>fXl|dA~#2&EVZaOGe6H(-B8a!T_H6uIX@*cFWpu>G{jBULfzhu z%cj7pC_leM0jM~y*s9Q0skA81svxm4CqFSoX=0DGiIJ_6iIPHMNlB5Rt&&1nVoqtQ zf|4DVf`W~ruC79IeqKppW?r#^bC8c)u|i^AiUN?{cvzQ_&rU%tF}J{4K}kVf;n2#- z{7iBd6)Dy>X_=`xDRw??Hu?}27gPjf08C6x8K@E_k(!qR)GWiuHMxsPbMh7@Z9P*i PBQ8TjV!b%|1(PoTsl-&7 delta 305 zcmbQnww0~kkjvcIkjuutA~#2&EVZaOGe6H($xzQgNg*{aIX@*cFWpu>G{jBULfzhu zOF@ClM$yIJIV3W`RiP+9zeFJ#C}yMR>KEb}5~+}vn47AgF0a_C(3V%dv?$N2Ah9whKQTppqL;Lcp{=??Sz=CUs)9O5 zzm1}04C}jgd%Qi4VIj&z@)~=JK3E{6G#>?X>$U7Y|3TCWo|h6 HB$F=yunJTH diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml index 921df82034..669b020959 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml @@ -1,14 +1,11 @@ - 1 - + with here is mixed content spaces spaces and more mixed content diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat index 92703251fc6ec21860a3cbd5b8c6c14a95052eb7..c5b1045188d2b25cdf5e23046c69b2601944dc20 100644 GIT binary patch delta 38 scmeBRo5sd!$Yo|`Fp;%EB(EefGp|^|ImpMYSRpYlMFB`}?5<@30MC;Ps{jB1 delta 24 fcmbQn*1^VV$Yo|?Hj%Y~EjKqeHLqmj%vvS@P_YMo diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat index 1a31a12c3c5425b271dba091229a0c8adc10815c..9ea0e13f4b93a72dd20924a47cda359a016d5730 100644 GIT binary patch delta 38 scmbQjzKEUGkju=>U?OXSNM1=|W?r#^bC8c)u|i^AiUN?{*v-id0Mcv=QUCw| delta 24 fcmZ3)K82mtkju=(Y$9s|TW)S{YF^34nVif3QXU5- diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml index c9b89ad5ad..875d7f4ceb 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml @@ -5,7 +5,7 @@ - + with here is mixed content spaces spaces and more mixed content diff --git a/project/Rat.scala b/project/Rat.scala index 825a661523..e39a695743 100644 --- a/project/Rat.scala +++ b/project/Rat.scala @@ -123,9 +123,15 @@ object Rat { file( "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat" ), + file( + "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat" + ), file( "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml" ), + file( + "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml" + ), file( "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat" ),