From e630050bdd99b29e4945c2e249ff8126f4cc8503 Mon Sep 17 00:00:00 2001
From: olabusayoT <50379531+olabusayoT@users.noreply.github.com>
Date: Thu, 2 Apr 2026 15:54:20 -0400
Subject: [PATCH 1/7] Change TDMLRunner to use XMLTextInfosetInputter/Outputter
as default
- it still uses scala results for certain things so we expose getScalaResult in the TDML Inputters/Outputters
- Update TDML Schema to add support for custom validation name/type and use in stringAsXML tests
- Drop whitespace between elements to keep expected matching actual, but keep all others like mixed whitespace, attributes, comments unchanged
- Introduced tests for `stringAsXML` validation and namespace handling.
- Added a `noNormalizations` flag to control whether comments/processing instructions are normalized.
- Updated associated XML parsing methods and test cases to support the new option.
- Revised whitespace removal to handle specific scenarios for improved XML processing.
- Verify prefixes resolve to the same namespaces when checking prefixes
- update TDMLException with more information on why getSimpleText isn't matching
- NullInfosetInputter should be received UTF-8 bytes for its events
Deprecation/Compatibility
Instead of ScalaXMLInfosetInputter/Outputter being the default inputter/outputter for TDML Runner, it is now XMLTextInfosetInputter/Outputter which supports stringsAsXml feature
DAFFODIL-2909
---
.../org/apache/daffodil/xsd/tdml.xsd | 20 ++-
.../lib/xml/DaffodilConstructingLoader.scala | 33 +++--
.../daffodil/lib/xml/DaffodilXMLLoader.scala | 21 ++-
.../apache/daffodil/lib/xml/XMLUtils.scala | 126 ++++++++++++++----
.../lib/xml/test/unit/TestXMLLoader.scala | 16 ++-
.../org/apache/daffodil/tdml/TDMLRunner.scala | 2 +-
.../tdml/DaffodilTDMLDFDLProcessor.scala | 15 ++-
.../processor/tdml/TDMLInfosetInputter.scala | 24 ++--
.../processor/tdml/TDMLInfosetOutputter.scala | 66 ++++++---
...e.daffodil.api.validation.ValidatorFactory | 17 +++
.../apache/daffodil/infoset/stringAsXML.tdml | 89 +++++++++++++
.../infoset/TestStringAsXmlTDML.scala | 37 +++++
.../infoset/TestStringAsXmlValidator.scala | 61 +++++++++
.../TestStringAsXmlValidatorFactory.scala | 33 +++++
14 files changed, 485 insertions(+), 75 deletions(-)
create mode 100644 daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory
create mode 100644 daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
create mode 100644 daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala
create mode 100644 daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala
create mode 100644 daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala
diff --git a/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd b/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd
index 8a8441a1e2..0b6397666f 100644
--- a/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd
+++ b/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd
@@ -224,11 +224,21 @@
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
index 8c1af3f61d..d0be20682b 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
@@ -94,13 +94,15 @@ object Position {
* behavior of normalizing CRLF to LF, and solitary CR to LF.
* Defaults to true. Should only be changed in special circumstances
* as not normalizing CRLFs is non-standard for XML.
- *
+ * @param noNormalizations True to not remove comments and processing instructions and to not normalize
+ * CRLF/CR to LF. This is used to keep the XML as close to the original as possible
*/
class DaffodilConstructingLoader private[xml] (
uri: URI,
errorHandler: org.xml.sax.ErrorHandler,
addPositionAttributes: Boolean,
- normalizeCRLFtoLF: Boolean
+ normalizeCRLFtoLF: Boolean,
+ noNormalizations: Boolean
) extends ConstructingParser(
{
// Note: we must open the XML carefully since it might be in some non
@@ -122,7 +124,13 @@ class DaffodilConstructingLoader private[xml] (
errorHandler: org.xml.sax.ErrorHandler,
addPositionAttributes: Boolean = false
) =
- this(uri, errorHandler, addPositionAttributes, normalizeCRLFtoLF = true)
+ this(
+ uri,
+ errorHandler,
+ addPositionAttributes,
+ normalizeCRLFtoLF = true,
+ noNormalizations = false
+ )
/**
* Ensures that DOCTYPES aka DTDs, if encountered, are rejected.
@@ -316,19 +324,26 @@ class DaffodilConstructingLoader private[xml] (
}
/**
- * Drops comments
+ * Drops comments if noNormalizations is false
*/
override def comment(pos: Int, s: String): Comment = {
- // returning null drops comments
- null
+ if (noNormalizations) {
+ super.comment(pos, s)
+ } else {
+ // returning null drops comments
+ null
+ }
}
/**
- * Drops processing instructions
+ * Drops processing instructions if noNormalizations is false
*/
override def procInstr(pos: Int, target: String, txt: String) = {
- // returning null drops processing instructions
- null
+ if (noNormalizations) {
+ super.procInstr(pos, target, txt)
+ } else { // returning null drops processing instructions
+ null
+ }
}
private def parseXMLPrologAttributes(
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
index 0b32d1accc..7994465b71 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
@@ -702,14 +702,23 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
* @param optSchemaURI Optional URI for XML schema for the XML source document.
* @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements.
* Defaults to false.
+ * @param noNormalizations True to not remove comments and processing instructions and to not normalize
+ * CRLF/CR to LF. This is used to keep the XML as close to the original as possible
* @return an scala.xml.Node (Element actually) which is the document element of the source.
*/
def load(
source: DaffodilSchemaSource,
optSchemaURI: Option[URI],
- addPositionAttributes: Boolean = false
+ addPositionAttributes: Boolean = false,
+ noNormalizations: Boolean = false
): scala.xml.Node =
- load(source, optSchemaURI, addPositionAttributes, normalizeCRLFtoLF = true)
+ load(
+ source,
+ optSchemaURI,
+ addPositionAttributes,
+ normalizeCRLFtoLF = true,
+ noNormalizations
+ )
/**
* package private constructor gives access to normalizeCRLFtoLF feature.
@@ -720,13 +729,16 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
* Defaults to false.
* @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. This should usually be true,
* but some special case situations may require preservation of CRLF/CR.
+ * @param noNormalizations True to not remove comments and processing instructions and to not normalize
+ * CRLF/CR to LF. This is used to keep the XML as close to the original as possible
* @return an scala.xml.Node (Element actually) which is the document element of the source.
*/
private[xml] def load(
source: DaffodilSchemaSource,
optSchemaURI: Option[URI],
addPositionAttributes: Boolean,
- normalizeCRLFtoLF: Boolean
+ normalizeCRLFtoLF: Boolean,
+ noNormalizations: Boolean
): scala.xml.Node = {
//
// First we invoke the validator to explicitly validate the XML against
@@ -819,7 +831,8 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
source.uriForLoading,
errorHandler,
addPositionAttributes,
- normalizeCRLFtoLF
+ normalizeCRLFtoLF,
+ noNormalizations
)
val res =
try {
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index da67ac01bb..de6144050a 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -42,6 +42,7 @@ import org.apache.daffodil.lib.iapi.URISchemaSource
import org.apache.daffodil.lib.schema.annotation.props.LookupLocation
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Misc
+import org.apache.daffodil.runtime1.infoset.XMLTextInfoset
import org.apache.commons.io.IOUtils
import org.xml.sax.XMLReader
@@ -644,34 +645,70 @@ object XMLUtils {
private def removeMixedWhitespace(ns: Node): Node = {
if (!ns.isInstanceOf[Elem]) return ns
- val e = ns.asInstanceOf[Elem]
- val children = e.child
- val noMixedChildren =
- if (children.exists(_.isInstanceOf[Elem])) {
- children
- .filter {
- case Text(data) if data.matches("""\s*""") => false
- case Text(data) =>
- throw new Exception("Element %s contains mixed data: %s".format(e.label, data))
- case _ => true
- }
- .map(removeMixedWhitespace)
- } else {
- children.filter {
+
+ def dropWhitespace(e: Node): Node = {
+ val children = e.child
+ val noWhitespace = children
+ .filter {
//
// So this is a bit strange, but we're dropping nodes that are Empty String.
//
// In XML we cannot tell where there is a Text("") child, from with Nil children
//
case Text("") => false // drop empty strings
+ case Text(data) if data.matches("""\s*""") => false
case _ => true
}
+ .map(dropWhitespace)
+ e match {
+ case elem: Elem => elem.copy(child = noWhitespace)
+ case _ => e
}
+ }
+
+ ns match {
+ case e @ Elem(
+ null,
+ XMLTextInfoset.stringAsXml,
+ Null,
+ NamespaceBinding(null, null | "", _),
+ _*
+ ) =>
+ dropWhitespace(e)
+ case _ => {
+ val e = ns.asInstanceOf[Elem]
+ val children = e.child
+ val noMixedChildren =
+ if (children.exists(_.isInstanceOf[Elem])) {
+ children
+ .filter {
+ case Text(data) if data.matches("""\s*""") => false
+ case Text(data) =>
+ throw new Exception(
+ "Element %s contains mixed data: %s".format(e.label, data)
+ )
+ case _ => true
+ }
+ .map(removeMixedWhitespace)
+ } else {
+ children.filter {
+ //
+ // So this is a bit strange, but we're dropping nodes that are Empty String.
+ //
+ // In XML we cannot tell where there is a Text("") child, from with Nil children
+ //
+ case Text("") => false // drop empty strings
+ case _ => true
+ }
+ }
+
+ val res =
+ if (noMixedChildren eq children) e
+ else e.copy(child = noMixedChildren)
+ res
+ }
+ }
- val res =
- if (noMixedChildren eq children) e
- else e.copy(child = noMixedChildren)
- res
}
/**
@@ -700,6 +737,15 @@ object XMLUtils {
): NodeSeq = {
val res = n match {
+ case e @ Elem(
+ null,
+ XMLTextInfoset.stringAsXml,
+ Null,
+ NamespaceBinding(null, null | "", _),
+ _*
+ ) =>
+ e
+
case e @ Elem(prefix, label, attributes, scope, children*) => {
val filteredScope = if (ns.length > 0) filterScope(scope, ns) else xml.TopScope
@@ -804,11 +850,23 @@ object XMLUtils {
* - Removes unnecessary whitespace
*/
def normalize(n: Node): Node = {
- val noComments = removeComments(n)
- val noPCData = convertPCDataToText(noComments)
- val combinedText = coalesceAllAdjacentTextNodes(noPCData)
- val noMixedWS = removeMixedWhitespace(combinedText)
- noMixedWS
+ n match {
+ case x @ Elem(
+ null,
+ XMLTextInfoset.stringAsXml,
+ Null,
+ NamespaceBinding(null, null | "", _),
+ _*
+ ) =>
+ x
+ case _ => {
+ val noComments = removeComments(n)
+ val noPCData = convertPCDataToText(noComments)
+ val combinedText = coalesceAllAdjacentTextNodes(noPCData)
+ val noMixedWS = removeMixedWhitespace(combinedText)
+ noMixedWS
+ }
+ }
}
class XMLDifferenceException(message: String) extends Exception(message)
@@ -973,6 +1031,28 @@ Differences were (path, expected, actual):
} else if (checkPrefixes && prefixA != prefixB) {
// different prefix
List((zPath + "/" + labelA + "@prefix", prefixA, prefixB))
+ } else if (
+ checkPrefixes && prefixA != null && a.getNamespace(prefixA) != nsbA.getURI(prefixA)
+ ) {
+ // prefix doesn't resolve to namespace
+ List(
+ (
+ zPath + "/" + labelA + "@prefix-uri",
+ nsbA.getURI(prefixA),
+ a.getNamespace(prefixA)
+ )
+ )
+ } else if (
+ checkPrefixes && prefixB != null && b.getNamespace(prefixB) != nsbB.getURI(prefixB)
+ ) {
+ // prefix doesn't resolve to namespace
+ List(
+ (
+ zPath + "/" + labelA + "@prefix-uri",
+ nsbB.getURI(prefixB),
+ b.getNamespace(prefixB)
+ )
+ )
} else if (checkNamespaces && mappingsA != mappingsB) {
// different namespace bindings
List((zPath + "/" + labelA + "@xmlns", mappingsA, mappingsB))
diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala
index 3cb5caeb3e..2f4f9a827b 100644
--- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala
+++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala
@@ -172,9 +172,21 @@ class TestXMLLoader {
// and toString will print them out into the text with the preserved.
//
val xmlFromDafLoaderNonNormalized =
- loader.load(ss, None, addPositionAttributes = false, normalizeCRLFtoLF = false)
+ loader.load(
+ ss,
+ None,
+ addPositionAttributes = false,
+ normalizeCRLFtoLF = false,
+ noNormalizations = false
+ )
val xmlFromDafLoaderNormalized =
- loader.load(ss, None, addPositionAttributes = false, normalizeCRLFtoLF = true)
+ loader.load(
+ ss,
+ None,
+ addPositionAttributes = false,
+ normalizeCRLFtoLF = true,
+ noNormalizations = false
+ )
{
// compare to the regular scala XML loader
diff --git a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index 0549c61ac1..cbf96e2d9a 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -2800,7 +2800,7 @@ case class DFDLInfoset(di: Node, parent: Infoset) {
val testSuite = testCase.parent
val before = testSuite.loadingExceptions.clone()
- val elem = loader.load(infosetSrc, None) // no schema
+ val elem = loader.load(infosetSrc, None, noNormalizations = true) // no schema
//
// TODO: DAFFODIL-288 validate the infoset also
// You can pass the optDataSchema, which appears to be the correct thing
diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
index 696a1ab8e8..9d043092d0 100644
--- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
+++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
@@ -173,7 +173,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
private def blobPrefix = ""
private def blobSuffix = ".bin"
- private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "scala")
+ private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "xml")
override def withTracing(bool: Boolean): DaffodilTDMLDFDLProcessor = {
copy(dp = newTracing(bool))
@@ -238,7 +238,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
): TDMLUnparseResult = {
val dafpr = parseResult.asInstanceOf[DaffodilTDMLParseResult]
val inputter = dafpr.inputter
- val resNode = dafpr.getResult
+ val resNode = dafpr.getScalaResult
unparse(inputter, resNode, outStream)
}
@@ -268,8 +268,10 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
): TDMLParseResult = {
val outputter = if (tdmlApiInfosetsEnv == "all") {
TDMLInfosetOutputterAll()
- } else {
+ } else if (tdmlApiInfosetsEnv == "scala") {
TDMLInfosetOutputterScala()
+ } else {
+ TDMLInfosetOutputterXML()
}
outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix)
@@ -308,7 +310,10 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
xri.parse(sis)
if (!actual.isError && !errorHandler.isError) {
- verifySameParseOutput(outputter.xmlStream, saxOutputStream)
+ val actualOutputArray = outputter.getScalaResult.toString.getBytes("UTF-8")
+ val baos = new ByteArrayOutputStream(actualOutputArray.length)
+ baos.write(actualOutputArray)
+ verifySameParseOutput(baos, saxOutputStream)
}
val dpParseDiag = actual.getDiagnostics.asScala.map(_.toString()).toSeq
val saxParseDiag = errorHandler.getDiagnostics.asScala.map(_.toString()).toSeq
@@ -392,7 +397,6 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
XMLUtils.compareAndReport(
dpParseXMLNodeOutput,
saxParseXMLNodeOutput,
- checkNamespaces = true,
checkPrefixes = true
)
} catch {
@@ -433,6 +437,7 @@ final class DaffodilTDMLParseResult(actual: ParseResult, outputter: TDMLInfosetO
extends TDMLParseResult {
override def getResult: Node = outputter.getResult
+ def getScalaResult: Node = outputter.getScalaResult
override def getBlobPaths: java.util.List[Path] = outputter.getBlobPaths
diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala
index 518f38961b..9ea2a7d9d9 100644
--- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala
+++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala
@@ -27,32 +27,31 @@ import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.lib.xml.XMLUtils
import org.apache.daffodil.runtime1.dpath.NodeInfo
import org.apache.daffodil.runtime1.infoset.JsonInfosetInputter
-import org.apache.daffodil.runtime1.infoset.ScalaXMLInfosetInputter
import org.apache.daffodil.tdml.TDMLException
class TDMLInfosetInputter(
- val scalaInputter: ScalaXMLInfosetInputter,
+ val inputter: api.infoset.InfosetInputter,
others: Seq[api.infoset.InfosetInputter]
) extends api.infoset.InfosetInputter {
private def implString: String = "daffodil"
override def getEventType(): InfosetInputterEventType = {
- val res = scalaInputter.getEventType()
+ val res = inputter.getEventType()
if (!others.forall(_.getEventType() == res))
throw TDMLException("getEventType does not match", Some(implString))
res
}
override def getLocalName(): String = {
- val res = scalaInputter.getLocalName()
+ val res = inputter.getLocalName()
if (!others.forall(_.getLocalName() == res))
throw TDMLException("getLocalName does not match", Some(implString))
res
}
override def getNamespaceURI(): String = {
- val res = scalaInputter.getNamespaceURI()
+ val res = inputter.getNamespaceURI()
val resIsEmpty = res == null || res == ""
val othersMatch = others.forall { i =>
if (!i.getSupportsNamespaces) {
@@ -74,7 +73,7 @@ class TDMLInfosetInputter(
primType: NodeInfo.Kind,
runtimeProperties: java.util.Map[String, String]
): String = {
- val res = scalaInputter.getSimpleText(primType, runtimeProperties)
+ val res = inputter.getSimpleText(primType, runtimeProperties)
val resIsEmpty = res == null || res == ""
val otherStrings = others.map { i =>
// Note in an unparserTestCase, there are no others (infoset inputters), because the input infoset is
@@ -100,7 +99,10 @@ class TDMLInfosetInputter(
}
if (!othersmatch)
- throw TDMLException("getSimpleText does not match", Some(implString))
+ throw TDMLException(
+ s"getSimpleText does not match for $res ${others.zip(otherStrings).mkString("\n")}",
+ Some(implString)
+ )
if (primType.isInstanceOf[NodeInfo.AnyURI.Kind]) {
try {
@@ -126,26 +128,26 @@ class TDMLInfosetInputter(
}
override def isNilled(): JBoolean = {
- val res = scalaInputter.isNilled()
+ val res = inputter.isNilled()
if (!others.forall(_.isNilled() == res))
throw TDMLException("isNilled does not match", Some(implString))
res
}
override def hasNext(): Boolean = {
- val res = scalaInputter.hasNext()
+ val res = inputter.hasNext()
if (!others.forall(_.hasNext() == res))
throw TDMLException("hasNext does not match", Some(implString))
res
}
override def next(): Unit = {
- scalaInputter.next()
+ inputter.next()
others.foreach(_.next())
}
override def fini(): Unit = {
- scalaInputter.fini()
+ inputter.fini()
others.foreach(_.fini())
}
diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
index cf913d6877..6f185c7b01 100644
--- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
+++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
@@ -19,7 +19,6 @@ package org.apache.daffodil.processor.tdml
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
-import java.nio.charset.Charset
import scala.xml.Node
import org.apache.daffodil.api
@@ -36,32 +35,62 @@ import org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter
import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter
import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter
-class TDMLInfosetOutputterScala(scalaOut: ScalaXMLInfosetOutputter)
- extends TeeInfosetOutputter(Seq(scalaOut)*)
+class TDMLInfosetOutputterScala(
+ scalaOut: ScalaXMLInfosetOutputter,
+ override val xmlStream: ByteArrayOutputStream,
+ xmlOut: XMLTextInfosetOutputter
+) extends TeeInfosetOutputter(Seq(scalaOut, xmlOut)*)
with TDMLInfosetOutputter {
- override def getResult: Node = scalaOut.getResult()
-
- override lazy val xmlStream: ByteArrayOutputStream = {
- val bos = new ByteArrayOutputStream()
- bos.write(getResult.toString().getBytes(Charset.defaultCharset()))
- bos
- }
+ override def getResult: Node =
+ scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray))
override def toInfosetInputter: TDMLInfosetInputter = {
val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult())
new TDMLInfosetInputter(scalaIn, Seq())
}
+
+ override def getScalaResult: Node = scalaOut.getResult()
}
object TDMLInfosetOutputterScala {
def apply(): TDMLInfosetOutputterScala = {
val scalaOut = new ScalaXMLInfosetOutputter()
scalaOut.setIncludeDataType(true)
- new TDMLInfosetOutputterScala(scalaOut)
+ val baos = new ByteArrayOutputStream()
+ val xmlOut = new XMLTextInfosetOutputter(baos, true)
+ new TDMLInfosetOutputterScala(scalaOut, baos, xmlOut)
+ }
+}
+
+object TDMLInfosetOutputterXML {
+ def apply(): TDMLInfosetOutputterXML = {
+ val baos = new ByteArrayOutputStream()
+ val xmlOut = new XMLTextInfosetOutputter(baos, true)
+ xmlOut.setIncludeDataType(true)
+ val scalaOut = new ScalaXMLInfosetOutputter()
+ new TDMLInfosetOutputterXML(baos, xmlOut, scalaOut)
}
}
+class TDMLInfosetOutputterXML(
+ override val xmlStream: ByteArrayOutputStream,
+ xmlOut: XMLTextInfosetOutputter,
+ scalaOut: ScalaXMLInfosetOutputter
+) extends TeeInfosetOutputter(Seq(xmlOut, scalaOut)*)
+ with TDMLInfosetOutputter {
+
+ override def getResult: Node =
+ scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray))
+
+ override def toInfosetInputter: TDMLInfosetInputter = {
+ val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray))
+ new TDMLInfosetInputter(xmlIn, Seq())
+ }
+
+ override def getScalaResult: Node = scalaOut.getResult()
+}
+
class TDMLInfosetOutputterAll(
jsonStream: ByteArrayOutputStream,
override val xmlStream: ByteArrayOutputStream,
@@ -73,7 +102,9 @@ class TDMLInfosetOutputterAll(
) extends TeeInfosetOutputter(Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)*)
with TDMLInfosetOutputter {
- override def getResult: Node = scalaOut.getResult()
+ def getScalaResult: Node = scalaOut.getResult()
+ override def getResult: Node =
+ scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray))
override def toInfosetInputter: TDMLInfosetInputter = {
val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult())
@@ -82,10 +113,14 @@ class TDMLInfosetOutputterAll(
val jsonIn = new JsonInfosetInputter(new ByteArrayInputStream(jsonStream.toByteArray))
val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray))
val nullIn = {
- val events = NullInfosetInputter.toEvents(new ByteArrayInputStream(xmlStream.toByteArray))
+ val events = NullInfosetInputter.toEvents(
+ new ByteArrayInputStream(
+ scalaOut.getResult().toString().getBytes("UTF-8")
+ )
+ )
new NullInfosetInputter(events)
}
- new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn, nullIn))
+ new TDMLInfosetInputter(xmlIn, Seq(jdomIn, w3cdomIn, jsonIn, scalaIn, nullIn))
}
}
@@ -98,7 +133,7 @@ object TDMLInfosetOutputterAll {
val jdomOut = new JDOMInfosetOutputter()
val w3cdomOut = new W3CDOMInfosetOutputter()
val jsonOut = new JsonInfosetOutputter(jsonStream, false)
- val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)
+ val xmlOut = new XMLTextInfosetOutputter(xmlStream, true)
Seq(scalaOut, jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out =>
out.setIncludeDataType(true)
@@ -121,6 +156,7 @@ trait TDMLInfosetOutputter extends api.infoset.InfosetOutputter {
def xmlStream: ByteArrayOutputStream
def getResult: Node
+ def getScalaResult: Node
def toInfosetInputter: TDMLInfosetInputter
}
diff --git a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory
new file mode 100644
index 0000000000..047377250c
--- /dev/null
+++ b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+org.apache.daffodil.infoset.TestStringAsXmlNamespacedValidatorFactory
+org.apache.daffodil.infoset.TestStringAsXmlNoNamespaceValidatorFactory
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
new file mode 100644
index 0000000000..04a2269769
--- /dev/null
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
@@ -0,0 +1,89 @@
+
+
+
+
+
+
+
+ stringAsXml/namespaced/binMessage_01.dat
+
+
+ stringAsXml/namespaced/binMessage_01.dat.xml
+
+
+
+
+
+ stringAsXml/namespaced/binMessage_01.dat
+
+
+ stringAsXml/namespaced/binMessage_01.dat.xml
+
+
+ Element 'xmlStr' is a simple type
+
+
+
+
+
+ stringAsXml/namespaced/binMessage_03.dat
+
+
+ Unexpected character
+
+
+
+
+
+ stringAsXml/namespaced/binMessage_08.dat
+
+
+ Undeclared general entity "name"
+
+
+
+
+
+
+ stringAsXml/nonamespace/binMessage_01.dat
+
+
+ stringAsXml/nonamespace/binMessage_01.dat.xml
+
+
+ Value '=invalid field' is not facet-valid
+
+
+
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala
new file mode 100644
index 0000000000..62f9392338
--- /dev/null
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import org.apache.daffodil.junit.tdml.TdmlSuite
+import org.apache.daffodil.junit.tdml.TdmlTests
+
+import org.junit.Test
+
+object TestStringAsXmlTDML extends TdmlSuite {
+ val tdmlResource = "/org/apache/daffodil/infoset/stringAsXML.tdml"
+}
+
+class TestStringAsXmlTDML extends TdmlTests {
+ val tdmlSuite = TestStringAsXmlTDML
+
+ @Test def stringAsXml_01_a = test
+ @Test def stringAsXml_01_b = test
+ @Test def stringAsXml_04 = test
+ @Test def stringAsXml_09 = test
+ @Test def stringAsXml_10 = test
+}
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala
new file mode 100644
index 0000000000..658baf0ea7
--- /dev/null
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.daffodil.infoset
+
+import java.io.InputStream
+import java.net.URL
+
+import org.apache.daffodil.api.validation.ValidationHandler
+import org.apache.daffodil.api.validation.Validator
+import org.apache.daffodil.lib.util.Misc
+import org.apache.daffodil.validation.XercesValidator
+
+object TestStringAsXmlNamespacedValidator {
+ val name = "TestStringAsXmlNamespacedValidator"
+}
+
+class TestStringAsXmlNamespacedValidator extends Validator {
+
+ val schemaURL: URL = Misc
+ .getRequiredResource(
+ "/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd"
+ )
+ .toURL
+
+ override def validateXML(document: InputStream, vh: ValidationHandler): Unit = {
+ val v = XercesValidator.fromURL(schemaURL)
+ v.validateXML(document, vh)
+ }
+}
+
+object TestStringAsXmlNoNamespaceValidator {
+ val name = "TestStringAsXmlNoNamespaceValidator"
+}
+
+class TestStringAsXmlNoNamespaceValidator extends Validator {
+
+ val schemaURL: URL = Misc
+ .getRequiredResource(
+ "/org/apache/daffodil/infoset/stringAsXml/nonamespace/xsd/binMessageWithXmlPayload.xsd"
+ )
+ .toURL
+
+ override def validateXML(document: InputStream, vh: ValidationHandler): Unit = {
+ val v = XercesValidator.fromURL(schemaURL)
+ v.validateXML(document, vh)
+ }
+}
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala
new file mode 100644
index 0000000000..e4fb252c94
--- /dev/null
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.daffodil.infoset
+
+import java.util.Properties
+
+import org.apache.daffodil.api.validation.ValidatorFactory
+
+class TestStringAsXmlNamespacedValidatorFactory extends ValidatorFactory {
+ override def name: String = TestStringAsXmlNamespacedValidator.name
+
+ override def make(config: Properties) = new TestStringAsXmlNamespacedValidator
+}
+
+class TestStringAsXmlNoNamespaceValidatorFactory extends ValidatorFactory {
+ override def name: String = TestStringAsXmlNoNamespaceValidator.name
+
+ override def make(config: Properties) = new TestStringAsXmlNoNamespaceValidator
+}
From 0ac13c9c4f4dd5a775d725f08b21522b6b506d24 Mon Sep 17 00:00:00 2001
From: olabusayoT <50379531+olabusayoT@users.noreply.github.com>
Date: Tue, 14 Apr 2026 14:59:21 -0400
Subject: [PATCH 2/7] fixup!
- Removed unneeded `TDMLInfosetOutputterScala`.
- Enhanced `DaffodilXMLLoader` and related classes with `removeComments` and `removeProcInstr` flags, replacing too-broad `noNormalizations`.
- Updated `TestStringAsXmlValidator` with a unified validator for namespace and non-namespace cases.
- add comments for clarity
- remove getScalaResult from all but TDMLInfosetOutputterAll
- add clarifying info to TDMLInfosetInputter TDML Exceptions in case of non-matches
- remove intermingling of ScalaInfosetOutputter with TDMLInfosetOutterXML
- Turn off pretty printing from XMLTextInfosetOutputter in TDMLInfosetOutputterAll
- undo type aware changes for ScalaXMLInfosetOutputter (the way it was adding namespace bindings for scalaXML was not exactly correct as it wasn't part of the child element's minimized scope. We found it would be too much trouble to implement correctly for scalaXML, so we decided to remove the functionality)
DAFFODIL-2909
---
.../lib/xml/DaffodilConstructingLoader.scala | 30 +++--
.../daffodil/lib/xml/DaffodilXMLLoader.scala | 42 ++-----
.../apache/daffodil/lib/xml/XMLUtils.scala | 115 +++++++++---------
.../infoset/ScalaXMLInfosetOutputter.scala | 25 +---
.../lib/xml/test/unit/TestXMLLoader.scala | 6 +-
.../lib/xml/test/unit/TestXMLUtils.scala | 11 ++
.../org/apache/daffodil/tdml/TDMLRunner.scala | 8 +-
.../tdml/DaffodilTDMLDFDLProcessor.scala | 21 +++-
.../processor/tdml/TDMLInfosetInputter.scala | 27 +++-
.../processor/tdml/TDMLInfosetOutputter.scala | 45 +------
.../apache/daffodil/cliTest/TestCLITdml.scala | 2 +-
...e.daffodil.api.validation.ValidatorFactory | 3 +-
.../apache/daffodil/infoset/stringAsXML.tdml | 15 +--
.../section07/variables/variables_01.tdml | 2 +-
.../infoset/TestStringAsXmlValidator.scala | 39 +++---
.../TestStringAsXmlValidatorFactory.scala | 33 -----
16 files changed, 175 insertions(+), 249 deletions(-)
delete mode 100644 daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
index d0be20682b..a8c97963cd 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
@@ -94,15 +94,16 @@ object Position {
* behavior of normalizing CRLF to LF, and solitary CR to LF.
* Defaults to true. Should only be changed in special circumstances
* as not normalizing CRLFs is non-standard for XML.
- * @param noNormalizations True to not remove comments and processing instructions and to not normalize
- * CRLF/CR to LF. This is used to keep the XML as close to the original as possible
+ * @param removeComments True to remove comments. This is used to keep the XML as close to the original as possible
+ * @param removeProcInstr True to remove processing instructions. This is used to keep the XML as close to the original as possible
*/
class DaffodilConstructingLoader private[xml] (
uri: URI,
errorHandler: org.xml.sax.ErrorHandler,
addPositionAttributes: Boolean,
normalizeCRLFtoLF: Boolean,
- noNormalizations: Boolean
+ removeComments: Boolean,
+ removeProcInstr: Boolean
) extends ConstructingParser(
{
// Note: we must open the XML carefully since it might be in some non
@@ -129,7 +130,8 @@ class DaffodilConstructingLoader private[xml] (
errorHandler,
addPositionAttributes,
normalizeCRLFtoLF = true,
- noNormalizations = false
+ removeComments = true,
+ removeProcInstr = true
)
/**
@@ -324,25 +326,29 @@ class DaffodilConstructingLoader private[xml] (
}
/**
- * Drops comments if noNormalizations is false
+ * Drops comments if removeComments is true
+ *
+ * This is optional controlled by a constructor parameter.
*/
override def comment(pos: Int, s: String): Comment = {
- if (noNormalizations) {
- super.comment(pos, s)
- } else {
+ if (removeComments) {
// returning null drops comments
null
+ } else {
+ super.comment(pos, s)
}
}
/**
- * Drops processing instructions if noNormalizations is false
+ * Drops processing instructions if removeProcInstr is false
+ *
+ * This is optional controlled by a constructor parameter.
*/
override def procInstr(pos: Int, target: String, txt: String) = {
- if (noNormalizations) {
- super.procInstr(pos, target, txt)
- } else { // returning null drops processing instructions
+ if (removeProcInstr) { // returning null drops processing instructions
null
+ } else {
+ super.procInstr(pos, target, txt)
}
}
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
index 7994465b71..c250dfcd03 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
@@ -702,43 +702,20 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
* @param optSchemaURI Optional URI for XML schema for the XML source document.
* @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements.
* Defaults to false.
- * @param noNormalizations True to not remove comments and processing instructions and to not normalize
- * CRLF/CR to LF. This is used to keep the XML as close to the original as possible
- * @return an scala.xml.Node (Element actually) which is the document element of the source.
- */
- def load(
- source: DaffodilSchemaSource,
- optSchemaURI: Option[URI],
- addPositionAttributes: Boolean = false,
- noNormalizations: Boolean = false
- ): scala.xml.Node =
- load(
- source,
- optSchemaURI,
- addPositionAttributes,
- normalizeCRLFtoLF = true,
- noNormalizations
- )
-
- /**
- * package private constructor gives access to normalizeCRLFtoLF feature.
- *
- * @param source The URI for the XML document which may be a XML or DFDL schema, or just XML data.
- * @param optSchemaURI Optional URI for XML schema for the XML source document.
- * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements.
- * Defaults to false.
* @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. This should usually be true,
* but some special case situations may require preservation of CRLF/CR.
- * @param noNormalizations True to not remove comments and processing instructions and to not normalize
- * CRLF/CR to LF. This is used to keep the XML as close to the original as possible
+ * @param removeComments True to remove comments. This is used to keep the XML as close to the original as possible
+ * @param removeProcInstr True to remove processing instructions. This is used to keep the XML as close to the original as possible
+ *
* @return an scala.xml.Node (Element actually) which is the document element of the source.
*/
- private[xml] def load(
+ def load(
source: DaffodilSchemaSource,
optSchemaURI: Option[URI],
- addPositionAttributes: Boolean,
- normalizeCRLFtoLF: Boolean,
- noNormalizations: Boolean
+ addPositionAttributes: Boolean = false,
+ normalizeCRLFtoLF: Boolean = true,
+ removeComments: Boolean = true,
+ removeProcInstr: Boolean = true
): scala.xml.Node = {
//
// First we invoke the validator to explicitly validate the XML against
@@ -832,7 +809,8 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
errorHandler,
addPositionAttributes,
normalizeCRLFtoLF,
- noNormalizations
+ removeComments,
+ removeProcInstr
)
val res =
try {
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index de6144050a..b08a594010 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -600,6 +600,14 @@ object XMLUtils {
def removeComments(e: Node): Node = {
e match {
+ case x @ Elem(
+ null,
+ XMLTextInfoset.stringAsXml,
+ Null,
+ NamespaceBinding(null, null | "", _),
+ _*
+ ) =>
+ x
case Elem(prefix, label, attribs, scope, child*) => {
val newChildren = child.filterNot { _.isInstanceOf[Comment] }.map { removeComments(_) }
Elem(prefix, label, attribs, scope, true, newChildren*)
@@ -646,35 +654,31 @@ object XMLUtils {
private def removeMixedWhitespace(ns: Node): Node = {
if (!ns.isInstanceOf[Elem]) return ns
- def dropWhitespace(e: Node): Node = {
- val children = e.child
- val noWhitespace = children
- .filter {
- //
- // So this is a bit strange, but we're dropping nodes that are Empty String.
- //
- // In XML we cannot tell where there is a Text("") child, from with Nil children
- //
- case Text("") => false // drop empty strings
- case Text(data) if data.matches("""\s*""") => false
- case _ => true
- }
- .map(dropWhitespace)
- e match {
- case elem: Elem => elem.copy(child = noWhitespace)
- case _ => e
- }
- }
-
ns match {
+ // NOTE: this is specifically for the stringAsXml feature as we avoid
+ // making changes to any of its children except removing any surrounding
+ // whitespace, requiring that stringAsXml in the infoset match results exactly.
case e @ Elem(
null,
XMLTextInfoset.stringAsXml,
Null,
NamespaceBinding(null, null | "", _),
_*
- ) =>
- dropWhitespace(e)
+ ) => {
+ val (elemChildren, nonElemChildren) = e.child.partition {
+ _.isInstanceOf[Elem]
+ }
+ if (elemChildren.length != 1)
+ throw new Exception("stringAsXml must contain a single child element.")
+ nonElemChildren.foreach {
+ case Text(data) if data.matches("""\s*""") => // no-op, empty text siblings are fine
+ case x =>
+ throw new Exception(
+ "%s is some kind of mixed content not allowed as a stringAsXml child".format(x)
+ )
+ }
+ e.asInstanceOf[Elem].copy(child = elemChildren)
+ }
case _ => {
val e = ns.asInstanceOf[Elem]
val children = e.child
@@ -850,23 +854,11 @@ object XMLUtils {
* - Removes unnecessary whitespace
*/
def normalize(n: Node): Node = {
- n match {
- case x @ Elem(
- null,
- XMLTextInfoset.stringAsXml,
- Null,
- NamespaceBinding(null, null | "", _),
- _*
- ) =>
- x
- case _ => {
- val noComments = removeComments(n)
- val noPCData = convertPCDataToText(noComments)
- val combinedText = coalesceAllAdjacentTextNodes(noPCData)
- val noMixedWS = removeMixedWhitespace(combinedText)
- noMixedWS
- }
- }
+ val noComments = removeComments(n)
+ val noPCData = convertPCDataToText(noComments)
+ val combinedText = coalesceAllAdjacentTextNodes(noPCData)
+ val noMixedWS = removeMixedWhitespace(combinedText)
+ noMixedWS
}
class XMLDifferenceException(message: String) extends Exception(message)
@@ -1031,26 +1023,13 @@ Differences were (path, expected, actual):
} else if (checkPrefixes && prefixA != prefixB) {
// different prefix
List((zPath + "/" + labelA + "@prefix", prefixA, prefixB))
- } else if (
- checkPrefixes && prefixA != null && a.getNamespace(prefixA) != nsbA.getURI(prefixA)
- ) {
- // prefix doesn't resolve to namespace
+ } else if (checkPrefixes && a.scope.getURI(prefixA) != b.scope.getURI(prefixB)) {
+ // prefixes doesn't resolve to same namespace
List(
(
- zPath + "/" + labelA + "@prefix-uri",
- nsbA.getURI(prefixA),
- a.getNamespace(prefixA)
- )
- )
- } else if (
- checkPrefixes && prefixB != null && b.getNamespace(prefixB) != nsbB.getURI(prefixB)
- ) {
- // prefix doesn't resolve to namespace
- List(
- (
- zPath + "/" + labelA + "@prefix-uri",
- nsbB.getURI(prefixB),
- b.getNamespace(prefixB)
+ zPath + "/" + labelA + "@prefix-namespace",
+ a.scope.getURI(prefixA),
+ b.scope.getURI(prefixB)
)
)
} else if (checkNamespaces && mappingsA != mappingsB) {
@@ -1135,6 +1114,28 @@ Differences were (path, expected, actual):
computeTextDiff(zPath, tA, tB, maybeType, maybeFloatEpsilon, maybeDoubleEpsilon)
thisDiff
}
+ case (cA: Comment, cB: Comment) => {
+ val thisDiff = computeTextDiff(
+ zPath,
+ cA.toString,
+ cB.toString,
+ maybeType,
+ maybeFloatEpsilon,
+ maybeDoubleEpsilon
+ )
+ thisDiff
+ }
+ case (pcA: PCData, pcB: PCData) => {
+ val thisDiff = computeTextDiff(
+ zPath,
+ pcA.toString,
+ pcB.toString,
+ maybeType,
+ maybeFloatEpsilon,
+ maybeDoubleEpsilon
+ )
+ thisDiff
+ }
case (pA: ProcInstr, pB: ProcInstr) => {
val ProcInstr(tA1label, tA1content) = pA
val ProcInstr(tB1label, tB1content) = pB
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala
index 4abe32f378..e30534fcc7 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala
@@ -19,9 +19,7 @@ package org.apache.daffodil.runtime1.infoset
import scala.collection.mutable.ListBuffer
import scala.xml.MetaData
-import scala.xml.NamespaceBinding
import scala.xml.Null
-import scala.xml.PrefixedAttribute
import scala.xml.UnprefixedAttribute
import org.apache.daffodil.api.DFDLPrimType
@@ -56,16 +54,6 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false)
resultNode = Maybe(root(0))
}
- private def getScope(diElem: DIElement): NamespaceBinding = {
- val minScope = diElem.metadata.minimizedScope
- // if including xsi:type is enabled, ensure the xsi namespace is defined on the root element
- if (getIncludeDataType() && stack.length == 1 && minScope.getURI("xsi") == null) {
- NamespaceBinding("xsi", XMLUtils.XSI_NAMESPACE, minScope)
- } else {
- minScope
- }
- }
-
private def getAttributes(diElem: DIElement): MetaData = {
val nilAttr = if (diElem.isNilled) XMLUtils.xmlNilAttribute else Null
val freedAttr =
@@ -92,14 +80,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false)
} else {
nilAttr
}
- val typedAttr =
- if (getIncludeDataType() && diElem.isSimple) {
- val primName = diElem.erd.optPrimType.get.name
- new PrefixedAttribute("xsi", "type", "xs:" + primName, freedAttr)
- } else {
- freedAttr
- }
- typedAttr
+ freedAttr
}
override def startSimple(se: InfosetSimpleElement): Unit = {
@@ -124,7 +105,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false)
diSimple.metadata.prefix,
diSimple.metadata.name,
attributes,
- getScope(diSimple),
+ diSimple.metadata.minimizedScope,
minimizeEmpty = true,
children*
)
@@ -149,7 +130,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false)
diComplex.metadata.prefix,
diComplex.metadata.name,
attributes,
- getScope(diComplex),
+ diComplex.metadata.minimizedScope,
minimizeEmpty = true,
children*
)
diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala
index 2f4f9a827b..1471da3a3d 100644
--- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala
+++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala
@@ -176,16 +176,14 @@ class TestXMLLoader {
ss,
None,
addPositionAttributes = false,
- normalizeCRLFtoLF = false,
- noNormalizations = false
+ normalizeCRLFtoLF = false
)
val xmlFromDafLoaderNormalized =
loader.load(
ss,
None,
addPositionAttributes = false,
- normalizeCRLFtoLF = true,
- noNormalizations = false
+ normalizeCRLFtoLF = true
)
{
diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala
index 3e70d7129d..2c9690f4b1 100644
--- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala
+++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala
@@ -98,6 +98,17 @@ class TestXMLUtils {
assertEquals("ns2", b)
}
+ @Test def testPrefixNSDiff(): Unit = {
+ // different prefix should error, even though the namespace is the same
+ val d1 = a
+ val d2 = a
+ val diffs = XMLUtils.computeDiff(d1, d2, checkPrefixes = true)
+ val Seq((path, a, b)) = diffs
+ assertEquals("/a@prefix-namespace", path)
+ assertEquals("someprefix", a)
+ assertEquals("someotherprefix", b)
+ }
+
@Test def testNamespaceDiff(): Unit = {
// different namespace mappings should error
val d1 = a
diff --git a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index cbf96e2d9a..f36a2a4909 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -2800,7 +2800,13 @@ case class DFDLInfoset(di: Node, parent: Infoset) {
val testSuite = testCase.parent
val before = testSuite.loadingExceptions.clone()
- val elem = loader.load(infosetSrc, None, noNormalizations = true) // no schema
+ val elem = loader.load(
+ infosetSrc,
+ None,
+ normalizeCRLFtoLF = false,
+ removeComments = false,
+ removeProcInstr = false
+ ) // no schema
//
// TODO: DAFFODIL-288 validate the infoset also
// You can pass the optDataSchema, which appears to be the correct thing
diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
index 9d043092d0..da0d134a67 100644
--- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
+++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
@@ -41,6 +41,7 @@ import org.apache.daffodil.lib.util.MaybeULong
import org.apache.daffodil.lib.xml.DaffodilSAXParserFactory
import org.apache.daffodil.lib.xml.XMLUtils
import org.apache.daffodil.lib.xml.XMLUtils.XMLDifferenceException
+import org.apache.daffodil.processor.tdml
import org.apache.daffodil.runtime1.iapi.*
import org.apache.daffodil.runtime1.iapi.DFDL.DaffodilUnhandledSAXException
import org.apache.daffodil.runtime1.iapi.DFDL.DaffodilUnparseContentHandler
@@ -238,7 +239,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
): TDMLUnparseResult = {
val dafpr = parseResult.asInstanceOf[DaffodilTDMLParseResult]
val inputter = dafpr.inputter
- val resNode = dafpr.getScalaResult
+ val resNode = dafpr.getResult
unparse(inputter, resNode, outStream)
}
@@ -268,8 +269,6 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
): TDMLParseResult = {
val outputter = if (tdmlApiInfosetsEnv == "all") {
TDMLInfosetOutputterAll()
- } else if (tdmlApiInfosetsEnv == "scala") {
- TDMLInfosetOutputterScala()
} else {
TDMLInfosetOutputterXML()
}
@@ -310,7 +309,14 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
xri.parse(sis)
if (!actual.isError && !errorHandler.isError) {
- val actualOutputArray = outputter.getScalaResult.toString.getBytes("UTF-8")
+ // we use the scala result because both the ScalaInfosetOutputter and
+ // the SAXInfosetOutputter do not implement stringAsXml,
+ // which helps to avoid any differences cause by the stringAsXml conversions.
+ val actualOutputArray = outputter
+ .asInstanceOf[tdml.TDMLInfosetOutputterAll]
+ .getScalaResult
+ .toString
+ .getBytes("UTF-8")
val baos = new ByteArrayOutputStream(actualOutputArray.length)
baos.write(actualOutputArray)
verifySameParseOutput(baos, saxOutputStream)
@@ -397,6 +403,12 @@ class DaffodilTDMLDFDLProcessor private[tdml] (
XMLUtils.compareAndReport(
dpParseXMLNodeOutput,
saxParseXMLNodeOutput,
+ // we no longer checkNamespaces because SAX outputs the same namespaces as
+ // the XMLTextInfosetOutputter but not the scalaXMLInfosetOutputter, so checking
+ // namespaces fails in the DAFFODIL_TDML_API_INFOSETS='all' case due to differences
+ // in the scalaXMLInfosetOutputter namespaces, probably having to do with
+ // minimizeScope issues
+ // checkNamespaces = true,
checkPrefixes = true
)
} catch {
@@ -437,7 +449,6 @@ final class DaffodilTDMLParseResult(actual: ParseResult, outputter: TDMLInfosetO
extends TDMLParseResult {
override def getResult: Node = outputter.getResult
- def getScalaResult: Node = outputter.getScalaResult
override def getBlobPaths: java.util.List[Path] = outputter.getBlobPaths
diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala
index 9ea2a7d9d9..e9097b4413 100644
--- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala
+++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala
@@ -39,14 +39,20 @@ class TDMLInfosetInputter(
override def getEventType(): InfosetInputterEventType = {
val res = inputter.getEventType()
if (!others.forall(_.getEventType() == res))
- throw TDMLException("getEventType does not match", Some(implString))
+ throw TDMLException(
+ s"getEventType does not match\n${others.zip(others.map(_.getEventType)).mkString("\n")}",
+ Some(implString)
+ )
res
}
override def getLocalName(): String = {
val res = inputter.getLocalName()
if (!others.forall(_.getLocalName() == res))
- throw TDMLException("getLocalName does not match", Some(implString))
+ throw TDMLException(
+ s"getLocalName does not match\n${others.zip(others.map(_.getLocalName)).mkString("\n")}",
+ Some(implString)
+ )
res
}
@@ -65,7 +71,10 @@ class TDMLInfosetInputter(
}
}
if (!othersMatch)
- throw TDMLException("getNamespaceURI does not match", Some(implString))
+ throw TDMLException(
+ s"getNamespaceURI does not match\n${others.filter(_.getSupportsNamespaces).map(o => (o, o.getNamespaceURI)).mkString("\n")}",
+ Some(implString)
+ )
res
}
@@ -100,7 +109,7 @@ class TDMLInfosetInputter(
if (!othersmatch)
throw TDMLException(
- s"getSimpleText does not match for $res ${others.zip(otherStrings).mkString("\n")}",
+ s"getSimpleText does not match for $res\n${others.zip(otherStrings).mkString("\n")}",
Some(implString)
)
@@ -130,14 +139,20 @@ class TDMLInfosetInputter(
override def isNilled(): JBoolean = {
val res = inputter.isNilled()
if (!others.forall(_.isNilled() == res))
- throw TDMLException("isNilled does not match", Some(implString))
+ throw TDMLException(
+ s"isNilled does not match\n${others.zip(others.map(_.isNilled)).mkString("\n")}",
+ Some(implString)
+ )
res
}
override def hasNext(): Boolean = {
val res = inputter.hasNext()
if (!others.forall(_.hasNext() == res))
- throw TDMLException("hasNext does not match", Some(implString))
+ throw TDMLException(
+ s"hasNext does not match\n${others.zip(others.map(_.hasNext)).mkString("\n")}",
+ Some(implString)
+ )
res
}
diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
index 6f185c7b01..a04262955f 100644
--- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
+++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
@@ -35,49 +35,19 @@ import org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter
import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter
import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter
-class TDMLInfosetOutputterScala(
- scalaOut: ScalaXMLInfosetOutputter,
- override val xmlStream: ByteArrayOutputStream,
- xmlOut: XMLTextInfosetOutputter
-) extends TeeInfosetOutputter(Seq(scalaOut, xmlOut)*)
- with TDMLInfosetOutputter {
-
- override def getResult: Node =
- scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray))
-
- override def toInfosetInputter: TDMLInfosetInputter = {
- val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult())
- new TDMLInfosetInputter(scalaIn, Seq())
- }
-
- override def getScalaResult: Node = scalaOut.getResult()
-}
-
-object TDMLInfosetOutputterScala {
- def apply(): TDMLInfosetOutputterScala = {
- val scalaOut = new ScalaXMLInfosetOutputter()
- scalaOut.setIncludeDataType(true)
- val baos = new ByteArrayOutputStream()
- val xmlOut = new XMLTextInfosetOutputter(baos, true)
- new TDMLInfosetOutputterScala(scalaOut, baos, xmlOut)
- }
-}
-
object TDMLInfosetOutputterXML {
def apply(): TDMLInfosetOutputterXML = {
val baos = new ByteArrayOutputStream()
- val xmlOut = new XMLTextInfosetOutputter(baos, true)
+ val xmlOut = new XMLTextInfosetOutputter(baos, false)
xmlOut.setIncludeDataType(true)
- val scalaOut = new ScalaXMLInfosetOutputter()
- new TDMLInfosetOutputterXML(baos, xmlOut, scalaOut)
+ new TDMLInfosetOutputterXML(baos, xmlOut)
}
}
class TDMLInfosetOutputterXML(
override val xmlStream: ByteArrayOutputStream,
- xmlOut: XMLTextInfosetOutputter,
- scalaOut: ScalaXMLInfosetOutputter
-) extends TeeInfosetOutputter(Seq(xmlOut, scalaOut)*)
+ xmlOut: XMLTextInfosetOutputter
+) extends TeeInfosetOutputter(Seq(xmlOut)*)
with TDMLInfosetOutputter {
override def getResult: Node =
@@ -87,8 +57,6 @@ class TDMLInfosetOutputterXML(
val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray))
new TDMLInfosetInputter(xmlIn, Seq())
}
-
- override def getScalaResult: Node = scalaOut.getResult()
}
class TDMLInfosetOutputterAll(
@@ -133,9 +101,9 @@ object TDMLInfosetOutputterAll {
val jdomOut = new JDOMInfosetOutputter()
val w3cdomOut = new W3CDOMInfosetOutputter()
val jsonOut = new JsonInfosetOutputter(jsonStream, false)
- val xmlOut = new XMLTextInfosetOutputter(xmlStream, true)
+ val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)
- Seq(scalaOut, jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out =>
+ Seq(jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out =>
out.setIncludeDataType(true)
}
@@ -156,7 +124,6 @@ trait TDMLInfosetOutputter extends api.infoset.InfosetOutputter {
def xmlStream: ByteArrayOutputStream
def getResult: Node
- def getScalaResult: Node
def toInfosetInputter: TDMLInfosetInputter
}
diff --git a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
index 1f0e062f32..046fbe6fb2 100644
--- a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
+++ b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
@@ -45,7 +45,7 @@ class TestCLITdml {
"daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml"
)
- val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "scala")
+ val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "xml")
runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli =>
// parse
diff --git a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory
index 047377250c..a12f49a78a 100644
--- a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory
+++ b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory
@@ -13,5 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-org.apache.daffodil.infoset.TestStringAsXmlNamespacedValidatorFactory
-org.apache.daffodil.infoset.TestStringAsXmlNoNamespaceValidatorFactory
+org.apache.daffodil.infoset.TestStringAsXmlValidatorFactory
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
index 04a2269769..7d71f77ef9 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
@@ -23,11 +23,11 @@
xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:ex="http://example.com"
- defaultValidation="off">
+ defaultValidation="on">
+ validation="TestStringAsXmlValidator">
stringAsXml/namespaced/binMessage_01.dat
@@ -37,8 +37,7 @@
+ model="/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd">
stringAsXml/namespaced/binMessage_01.dat
@@ -51,8 +50,7 @@
+ model="/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd">
stringAsXml/namespaced/binMessage_03.dat
@@ -62,8 +60,7 @@
+ model="/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd">
stringAsXml/namespaced/binMessage_08.dat
@@ -75,7 +72,7 @@
+ validation="TestStringAsXmlValidator">
stringAsXml/nonamespace/binMessage_01.dat
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml
index 6d636c6af1..545f47029f 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml
@@ -48,7 +48,7 @@
- 42
+ 42
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala
index 658baf0ea7..560329ad87 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala
@@ -18,44 +18,33 @@ package org.apache.daffodil.infoset
import java.io.InputStream
import java.net.URL
+import java.util.Properties
import org.apache.daffodil.api.validation.ValidationHandler
import org.apache.daffodil.api.validation.Validator
-import org.apache.daffodil.lib.util.Misc
+import org.apache.daffodil.api.validation.ValidatorFactory
import org.apache.daffodil.validation.XercesValidator
-object TestStringAsXmlNamespacedValidator {
- val name = "TestStringAsXmlNamespacedValidator"
+object TestStringAsXmlValidator {
+ val name = "TestStringAsXmlValidator"
}
-class TestStringAsXmlNamespacedValidator extends Validator {
-
- val schemaURL: URL = Misc
- .getRequiredResource(
- "/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd"
- )
- .toURL
+class TestStringAsXmlValidator(schemaURL: String) extends Validator {
+ private lazy val xercesValidator = XercesValidator.fromURL(new URL(schemaURL))
override def validateXML(document: InputStream, vh: ValidationHandler): Unit = {
- val v = XercesValidator.fromURL(schemaURL)
- v.validateXML(document, vh)
+ xercesValidator.validateXML(document, vh)
}
}
-object TestStringAsXmlNoNamespaceValidator {
- val name = "TestStringAsXmlNoNamespaceValidator"
-}
-
-class TestStringAsXmlNoNamespaceValidator extends Validator {
+class TestStringAsXmlValidatorFactory extends ValidatorFactory {
- val schemaURL: URL = Misc
- .getRequiredResource(
- "/org/apache/daffodil/infoset/stringAsXml/nonamespace/xsd/binMessageWithXmlPayload.xsd"
- )
- .toURL
+ override def name: String = TestStringAsXmlValidator.name
- override def validateXML(document: InputStream, vh: ValidationHandler): Unit = {
- val v = XercesValidator.fromURL(schemaURL)
- v.validateXML(document, vh)
+ override def make(config: Properties) = {
+ val dfdlSchema = config.getProperty(name)
+ // assumes the validation XSD path is in the same as the DFDL schema but with a different suffix
+ val xsdSchema = dfdlSchema.replace(".dfdl.xsd", "WithXmlPayload.xsd")
+ new TestStringAsXmlValidator(xsdSchema)
}
}
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala
deleted file mode 100644
index e4fb252c94..0000000000
--- a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidatorFactory.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.daffodil.infoset
-
-import java.util.Properties
-
-import org.apache.daffodil.api.validation.ValidatorFactory
-
-class TestStringAsXmlNamespacedValidatorFactory extends ValidatorFactory {
- override def name: String = TestStringAsXmlNamespacedValidator.name
-
- override def make(config: Properties) = new TestStringAsXmlNamespacedValidator
-}
-
-class TestStringAsXmlNoNamespaceValidatorFactory extends ValidatorFactory {
- override def name: String = TestStringAsXmlNoNamespaceValidator.name
-
- override def make(config: Properties) = new TestStringAsXmlNoNamespaceValidator
-}
From 47912e48bc1b445aca61454a41c8b3a700f67e39 Mon Sep 17 00:00:00 2001
From: olabusayoT <50379531+olabusayoT@users.noreply.github.com>
Date: Sat, 16 May 2026 15:54:43 -0400
Subject: [PATCH 3/7] fixup! fixup!
- ensure stringAsXml file line endings are not normalized in windows
- change generic exception to InvalidInfosetException
---
.gitattributes | 4 +++-
.../src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala | 3 ++-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/.gitattributes b/.gitattributes
index b49c2777e5..4538f0d789 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -14,4 +14,6 @@
# limitations under the License.
# Do not include KEYS in archived source releases
-/KEYS export-ignore
+/KEYS export-ignore
+# ensure stringAsXml file line endings are not normalized in windows
+/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/** binary
\ No newline at end of file
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index b08a594010..b161ff862b 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -42,6 +42,7 @@ import org.apache.daffodil.lib.iapi.URISchemaSource
import org.apache.daffodil.lib.schema.annotation.props.LookupLocation
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Misc
+import org.apache.daffodil.runtime1.infoset.InvalidInfosetException
import org.apache.daffodil.runtime1.infoset.XMLTextInfoset
import org.apache.commons.io.IOUtils
@@ -669,7 +670,7 @@ object XMLUtils {
_.isInstanceOf[Elem]
}
if (elemChildren.length != 1)
- throw new Exception("stringAsXml must contain a single child element.")
+ throw new InvalidInfosetException("stringAsXml must contain a single child element.")
nonElemChildren.foreach {
case Text(data) if data.matches("""\s*""") => // no-op, empty text siblings are fine
case x =>
From 6ddad8d791a3f73ddad82dc79a671476760b99f4 Mon Sep 17 00:00:00 2001
From: olabusayoT <50379531+olabusayoT@users.noreply.github.com>
Date: Mon, 18 May 2026 17:12:43 -0400
Subject: [PATCH 4/7] fixup! fixup! fixup!
- make XMLUtils normalize CRLFs on expected/actual normalization during comparison if it is not StringAsXML
DEPRECATION/COMPATIBILITY
This code checks the actual infoset for the presence of XMLTextInfoset.stringAsXML(currently stringAsXML) and won't normalize CRLF to LF in infosets that contain that element
---
.gitattributes | 2 +-
.../apache/daffodil/lib/xml/XMLUtils.scala | 46 +++++++++++++++++--
2 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/.gitattributes b/.gitattributes
index 4538f0d789..51ddbe2634 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -16,4 +16,4 @@
# Do not include KEYS in archived source releases
/KEYS export-ignore
# ensure stringAsXml file line endings are not normalized in windows
-/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/** binary
\ No newline at end of file
+/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/** -text
\ No newline at end of file
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index b161ff862b..9800fb4fb4 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -648,6 +648,43 @@ object XMLUtils {
res
}
+ /**
+ * normalizes CRLF to LF within text nodes in non-stringAsXML elements
+ */
+ private def normalizeCRLFtoLF(ns:Node): Node = {
+ if (!ns.isInstanceOf[Elem]) return ns
+
+ ns match {
+ // NOTE: this is specifically for the stringAsXml feature as we avoid
+ // making changes to any of its children requiring that stringAsXml in
+ // the infoset match results exactly.
+ case e @ Elem(
+ null,
+ XMLTextInfoset.stringAsXml,
+ Null,
+ NamespaceBinding(null, null | "", _),
+ _*
+ ) => {
+ e
+ }
+ case _ => {
+ val e = ns.asInstanceOf[Elem]
+ val children = e.child
+ val normalized = children.map {
+ case Text(data) if data.contains("\r") => {
+ val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n")
+ Text(replaced)
+ }
+ case c => c
+ }.map(normalizeCRLFtoLF)
+ val res =
+ if (normalized eq children) e
+ else e.copy(child = normalized)
+ res
+ }
+ }
+ }
+
/**
* removes insignificant whitespace from between elements
*/
@@ -859,7 +896,8 @@ object XMLUtils {
val noPCData = convertPCDataToText(noComments)
val combinedText = coalesceAllAdjacentTextNodes(noPCData)
val noMixedWS = removeMixedWhitespace(combinedText)
- noMixedWS
+ val noCRLFs = normalizeCRLFtoLF(noMixedWS)
+ noCRLFs
}
class XMLDifferenceException(message: String) extends Exception(message)
@@ -900,11 +938,11 @@ Actual (attributes %s for diff)
Differences were (path, expected, actual):
%s""".format(
(if (checkPrefixes || checkNamespaces) "compared for diff"
- else "stripped"),
+ else "stripped"),
(if (checkPrefixes || checkNamespaces) expected
- else removeAttributes(expected).toString),
+ else removeAttributes(expected).toString),
(if (checkPrefixes || checkNamespaces) "compared"
- else "ignored"),
+ else "ignored"),
actual,
diffs.map { _.toString }.mkString("- ", "\n- ", "\n")
)
From 1e8ba75e4eb376b7f29e9f58e5f9c80f98e37d0c Mon Sep 17 00:00:00 2001
From: olabusayoT <50379531+olabusayoT@users.noreply.github.com>
Date: Mon, 18 May 2026 17:35:23 -0400
Subject: [PATCH 5/7] fixup! fixup! fixup! fixup!
- reformat code
---
.../apache/daffodil/lib/xml/XMLUtils.scala | 34 ++++++++++---------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index 9800fb4fb4..8c761b543a 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -651,7 +651,7 @@ object XMLUtils {
/**
* normalizes CRLF to LF within text nodes in non-stringAsXML elements
*/
- private def normalizeCRLFtoLF(ns:Node): Node = {
+ private def normalizeCRLFtoLF(ns: Node): Node = {
if (!ns.isInstanceOf[Elem]) return ns
ns match {
@@ -659,24 +659,26 @@ object XMLUtils {
// making changes to any of its children requiring that stringAsXml in
// the infoset match results exactly.
case e @ Elem(
- null,
- XMLTextInfoset.stringAsXml,
- Null,
- NamespaceBinding(null, null | "", _),
- _*
- ) => {
+ null,
+ XMLTextInfoset.stringAsXml,
+ Null,
+ NamespaceBinding(null, null | "", _),
+ _*
+ ) => {
e
}
case _ => {
val e = ns.asInstanceOf[Elem]
val children = e.child
- val normalized = children.map {
- case Text(data) if data.contains("\r") => {
- val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n")
- Text(replaced)
+ val normalized = children
+ .map {
+ case Text(data) if data.contains("\r") => {
+ val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n")
+ Text(replaced)
+ }
+ case c => c
}
- case c => c
- }.map(normalizeCRLFtoLF)
+ .map(normalizeCRLFtoLF)
val res =
if (normalized eq children) e
else e.copy(child = normalized)
@@ -938,11 +940,11 @@ Actual (attributes %s for diff)
Differences were (path, expected, actual):
%s""".format(
(if (checkPrefixes || checkNamespaces) "compared for diff"
- else "stripped"),
+ else "stripped"),
(if (checkPrefixes || checkNamespaces) expected
- else removeAttributes(expected).toString),
+ else removeAttributes(expected).toString),
(if (checkPrefixes || checkNamespaces) "compared"
- else "ignored"),
+ else "ignored"),
actual,
diffs.map { _.toString }.mkString("- ", "\n- ", "\n")
)
From bd27e1583053d05a08c97b89dd06fd1780488457 Mon Sep 17 00:00:00 2001
From: olabusayoT <50379531+olabusayoT@users.noreply.github.com>
Date: Wed, 20 May 2026 11:34:57 -0400
Subject: [PATCH 6/7] fixup! fixup! fixup! fixup! fixup! fixup!
- Introduced `binMessageA` element and relevant schema changes, including `stringAsXmlGroupA` and updated XML payload references to test inline comment in stringAsXml.
- Enhanced `XMLUtils` to streamline `CRLF` normalization and `stringAsXml` element identification.
- Adjusted `.gitattributes` to prevent line ending normalization for specific test files.
---
.gitattributes | 2 +-
.../apache/daffodil/lib/xml/XMLUtils.scala | 91 ++++++++----------
.../apache/daffodil/infoset/stringAsXML.tdml | 12 +++
.../namespaced/binMessage_01.dat.xml | 3 +
.../stringAsXml/namespaced/binMessage_01a.dat | Bin 0 -> 916 bytes
.../namespaced/binMessage_01a.dat.xml | 41 ++++++++
.../namespaced/xsd/binMessage.dfdl.xsd | 28 +++++-
.../xsd/binMessageWithXmlPayload.xsd | 27 ++++++
.../namespaced/xsd/stringAsXmlWrapper.xsd | 12 +++
.../stringAsXml/namespaced/xsd/xmlPayload.xsd | 8 ++
.../infoset/TestStringAsXmlTDML.scala | 1 +
11 files changed, 172 insertions(+), 53 deletions(-)
create mode 100644 daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat
create mode 100644 daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml
diff --git a/.gitattributes b/.gitattributes
index 51ddbe2634..cf1911d4fc 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -16,4 +16,4 @@
# Do not include KEYS in archived source releases
/KEYS export-ignore
# ensure stringAsXml file line endings are not normalized in windows
-/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/** -text
\ No newline at end of file
+/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml -text
\ No newline at end of file
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index 8c761b543a..fb0b706fbe 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -601,14 +601,7 @@ object XMLUtils {
def removeComments(e: Node): Node = {
e match {
- case x @ Elem(
- null,
- XMLTextInfoset.stringAsXml,
- Null,
- NamespaceBinding(null, null | "", _),
- _*
- ) =>
- x
+ case x : Elem if isStringAsXmlElem(x) => x
case Elem(prefix, label, attribs, scope, child*) => {
val newChildren = child.filterNot { _.isInstanceOf[Comment] }.map { removeComments(_) }
Elem(prefix, label, attribs, scope, true, newChildren*)
@@ -648,42 +641,47 @@ object XMLUtils {
res
}
+ private def isStringAsXmlElem(ns: Node): Boolean = {
+ ns match {
+ case e @ Elem(
+ null,
+ XMLTextInfoset.stringAsXml,
+ Null,
+ NamespaceBinding(null, null | "", _),
+ _*
+ ) => true
+ case _ => false
+ }
+ }
+
/**
* normalizes CRLF to LF within text nodes in non-stringAsXML elements
+ *
+ * Some fields in infosets could contain LFs, but could be changed to CRLF
+ * in Windows due to git's autocrlf feature. And since infoset outputters
+ * always output LF we need to undo with git might do and normalize those CRLF's
+ * to LF.
*/
private def normalizeCRLFtoLF(ns: Node): Node = {
- if (!ns.isInstanceOf[Elem]) return ns
-
ns match {
// NOTE: this is specifically for the stringAsXml feature as we avoid
// making changes to any of its children requiring that stringAsXml in
// the infoset match results exactly.
- case e @ Elem(
- null,
- XMLTextInfoset.stringAsXml,
- Null,
- NamespaceBinding(null, null | "", _),
- _*
- ) => {
- e
- }
- case _ => {
- val e = ns.asInstanceOf[Elem]
+ case e: Elem if isStringAsXmlElem(e) => e
+ case e: Elem => {
val children = e.child
- val normalized = children
- .map {
- case Text(data) if data.contains("\r") => {
- val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n")
- Text(replaced)
- }
- case c => c
- }
- .map(normalizeCRLFtoLF)
- val res =
+ val normalized = children.map(normalizeCRLFtoLF)
+ val res = {
if (normalized eq children) e
else e.copy(child = normalized)
+ }
res
}
+ case Text(data) if data.contains("\r") => {
+ val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n")
+ Text(replaced)
+ }
+ case _ => ns
}
}
@@ -692,19 +690,11 @@ object XMLUtils {
*/
private def removeMixedWhitespace(ns: Node): Node = {
- if (!ns.isInstanceOf[Elem]) return ns
-
ns match {
// NOTE: this is specifically for the stringAsXml feature as we avoid
// making changes to any of its children except removing any surrounding
// whitespace, requiring that stringAsXml in the infoset match results exactly.
- case e @ Elem(
- null,
- XMLTextInfoset.stringAsXml,
- Null,
- NamespaceBinding(null, null | "", _),
- _*
- ) => {
+ case e: Elem if isStringAsXmlElem(e) => {
val (elemChildren, nonElemChildren) = e.child.partition {
_.isInstanceOf[Elem]
}
@@ -719,8 +709,7 @@ object XMLUtils {
}
e.asInstanceOf[Elem].copy(child = elemChildren)
}
- case _ => {
- val e = ns.asInstanceOf[Elem]
+ case e: Elem => {
val children = e.child
val noMixedChildren =
if (children.exists(_.isInstanceOf[Elem])) {
@@ -751,8 +740,8 @@ object XMLUtils {
else e.copy(child = noMixedChildren)
res
}
+ case _ => ns
}
-
}
/**
@@ -1157,23 +1146,23 @@ Differences were (path, expected, actual):
}
case (cA: Comment, cB: Comment) => {
val thisDiff = computeTextDiff(
- zPath,
+ zPath + "/@comment",
cA.toString,
cB.toString,
- maybeType,
- maybeFloatEpsilon,
- maybeDoubleEpsilon
+ None,
+ None,
+ None
)
thisDiff
}
case (pcA: PCData, pcB: PCData) => {
val thisDiff = computeTextDiff(
- zPath,
+ zPath + "/@PCDATA",
pcA.toString,
pcB.toString,
- maybeType,
- maybeFloatEpsilon,
- maybeDoubleEpsilon
+ None,
+ None,
+ None
)
thisDiff
}
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
index 7d71f77ef9..9e88bc3126 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml
@@ -83,4 +83,16 @@
Value '=invalid field' is not facet-valid
+
+
+
+ stringAsXml/namespaced/binMessage_01a.dat
+
+
+ stringAsXml/namespaced/binMessage_01a.dat.xml
+
+
+
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
index c54b830fc8..921df82034 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
@@ -1,4 +1,7 @@
+
1
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat
new file mode 100644
index 0000000000000000000000000000000000000000..1a31a12c3c5425b271dba091229a0c8adc10815c
GIT binary patch
literal 916
zcma)4L2lbH5bT++*sy?gl4ZGZP{K@O}$#{?u&CFw|hQoqrcbtOxp
zllBmusNwF+aCc&IbA3b7Ol4D2ef{Ja_7u4qt_I=Q=CzF;pI^PWdtC-3PZ?sRaHVc2*V!R)JEsR
zTGR%qtTV^lp)J(MhP@PJ!P2Y8?U&n!+s9q^3r|lB8tD$mH{f}}9q-^YI(NW97y;5k
zC0kLTrgA9h8Ewr`bE<|(3Qy;|4jRe0DxlIx_HEB?t1Wut)ig(jst
zff0N3uh3!wv(l}B!Hie+8Yl$t=l8#T+xroleQb;MFt@wih>`)>-g~UC94~|_rrK#3
zp4B__{T{ny^=Kv)?p_wquVi9RqjRhnS&=1Kt`_f&BW|SsQgazg^E!ZCYQ`_cjSMku
zt<31bxjKoWrfKHQr@1!I(Q>hfqaR=IzU2o|@vw^@$EjnutxVjh78z}AB9-goJM!Xu
gx*$tcN+tR{`#AfW)uodNt$1r`bbbOOG5J9L0<7~FZ~y=R
literal 0
HcmV?d00001
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml
new file mode 100644
index 0000000000..c9b89ad5ad
--- /dev/null
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml
@@ -0,0 +1,41 @@
+
+
+
+ 1
+
+
+
+
+
+ with here is mixed content
+ spaces spaces and more mixed content
+ and more mixed content
+
+ entity references: < > & " ' ©
+ CR
LF
+CRLF
+end
+ CR
+ LF
+ CRLF
+ =invalid field
+
+
+
+ 5
+
+
+ 2
+
+
+
+
+
+ 5
+
+
+
+
+ 1
+
+
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd
index a7d456c16f..1f27c8dcdb 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd
@@ -72,5 +72,31 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd
index 3b46c663ca..3118b06280 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd
@@ -50,6 +50,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd
index 3c7cd0bffe..87ad24db76 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd
@@ -77,6 +77,18 @@ validate XML embedded in data and subsequently embedded into the infoset.
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd
index 2f92d344c5..870d89b890 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd
@@ -42,5 +42,13 @@
+
+
+
+
+
+
+
+
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala
index 62f9392338..0495d584c6 100644
--- a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala
+++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala
@@ -34,4 +34,5 @@ class TestStringAsXmlTDML extends TdmlTests {
@Test def stringAsXml_04 = test
@Test def stringAsXml_09 = test
@Test def stringAsXml_10 = test
+ @Test def stringAsXml_11 = test
}
From ee366fbe64b50f86346ccd82c2f5fb6eebbc8d82 Mon Sep 17 00:00:00 2001
From: olabusayoT <50379531+olabusayoT@users.noreply.github.com>
Date: Thu, 21 May 2026 13:11:26 -0400
Subject: [PATCH 7/7] fixup! fixup! fixup! fixup! fixup! fixup! fixup!
- add new test files to rat
- reformat code
- add missing comment to test file
---
.../org/apache/daffodil/lib/xml/XMLUtils.scala | 15 ++++++++-------
.../stringAsXml/namespaced/binMessage_01.dat | Bin 821 -> 790 bytes
.../namespaced/binMessage_01.dat.xml | 5 +----
.../namespaced/binMessage_01.dat.xml.dat | Bin 776 -> 790 bytes
.../stringAsXml/namespaced/binMessage_01a.dat | Bin 916 -> 930 bytes
.../namespaced/binMessage_01a.dat.xml | 2 +-
project/Rat.scala | 6 ++++++
7 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index fb0b706fbe..d2a8179126 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -601,7 +601,7 @@ object XMLUtils {
def removeComments(e: Node): Node = {
e match {
- case x : Elem if isStringAsXmlElem(x) => x
+ case x: Elem if isStringAsXmlElem(x) => x
case Elem(prefix, label, attribs, scope, child*) => {
val newChildren = child.filterNot { _.isInstanceOf[Comment] }.map { removeComments(_) }
Elem(prefix, label, attribs, scope, true, newChildren*)
@@ -644,12 +644,13 @@ object XMLUtils {
private def isStringAsXmlElem(ns: Node): Boolean = {
ns match {
case e @ Elem(
- null,
- XMLTextInfoset.stringAsXml,
- Null,
- NamespaceBinding(null, null | "", _),
- _*
- ) => true
+ null,
+ XMLTextInfoset.stringAsXml,
+ Null,
+ NamespaceBinding(null, null | "", _),
+ _*
+ ) =>
+ true
case _ => false
}
}
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat
index bdb4f0b65c4fc391ab49ae2e5840a76ea33fcb9e..c5b1045188d2b25cdf5e23046c69b2601944dc20 100644
GIT binary patch
delta 280
zcmdnWHjT~8kju=>fXl|dA~#2&EVZaOGe6H(-B8a!T_H6uIX@*cFWpu>G{jBULfzhu
z%cj7pC_leM0jM~y*s9Q0skA81svxm4CqFSoX=0DGiIJ_6iIPHMNlB5Rt&&1nVoqtQ
zf|4DVf`W~ruC79IeqKppW?r#^bC8c)u|i^AiUN?{cvzQ_&rU%tF}J{4K}kVf;n2#-
z{7iBd6)Dy>X_=`xDRw??Hu?}27gPjf08C6x8K@E_k(!qR)GWiuHMxsPbMh7@Z9P*i
PBQ8TjV!b%|1(PoTsl-&7
delta 305
zcmbQnww0~kkjvcIkjuutA~#2&EVZaOGe6H($xzQgNg*{aIX@*cFWpu>G{jBULfzhu
zOF@ClM$yIJIV3W`RiP+9zeFJ#C}yMR>KEb}5~+}vn47AgF0a_C(3V%dv?$N2Ah9whKQTppqL;Lcp{=??Sz=CUs)9O5
zzm1}04C}jgd%Qi4VIj&z@)~=JK3E{6G#>?X>$U7Y|3TCWo|h6
HB$F=yunJTH
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
index 921df82034..669b020959 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
@@ -1,14 +1,11 @@
-
1
-
+
with here is mixed content
spaces spaces and more mixed content
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat
index 92703251fc6ec21860a3cbd5b8c6c14a95052eb7..c5b1045188d2b25cdf5e23046c69b2601944dc20 100644
GIT binary patch
delta 38
scmeBRo5sd!$Yo|`Fp;%EB(EefGp|^|ImpMYSRpYlMFB`}?5<@30MC;Ps{jB1
delta 24
fcmbQn*1^VV$Yo|?Hj%Y~EjKqeHLqmj%vvS@P_YMo
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat
index 1a31a12c3c5425b271dba091229a0c8adc10815c..9ea0e13f4b93a72dd20924a47cda359a016d5730 100644
GIT binary patch
delta 38
scmbQjzKEUGkju=>U?OXSNM1=|W?r#^bC8c)u|i^AiUN?{*v-id0Mcv=QUCw|
delta 24
fcmZ3)K82mtkju=(Y$9s|TW)S{YF^34nVif3QXU5-
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml
index c9b89ad5ad..875d7f4ceb 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml
@@ -5,7 +5,7 @@
-
+
with here is mixed content
spaces spaces and more mixed content
diff --git a/project/Rat.scala b/project/Rat.scala
index 825a661523..e39a695743 100644
--- a/project/Rat.scala
+++ b/project/Rat.scala
@@ -123,9 +123,15 @@ object Rat {
file(
"daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat"
),
+ file(
+ "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat"
+ ),
file(
"daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml"
),
+ file(
+ "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml"
+ ),
file(
"daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat"
),