diff --git a/.gitattributes b/.gitattributes index b49c2777e5..cf1911d4fc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -14,4 +14,6 @@ # limitations under the License. # Do not include KEYS in archived source releases -/KEYS export-ignore +/KEYS export-ignore +# ensure stringAsXml file line endings are not normalized in windows +/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml -text \ No newline at end of file diff --git a/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd b/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd index 8a8441a1e2..0b6397666f 100644 --- a/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd +++ b/daffodil-core/src/main/resources/org/apache/daffodil/xsd/tdml.xsd @@ -224,11 +224,21 @@ - - - - - + + + + + + + + + + + + + + + diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala index 8c1af3f61d..a8c97963cd 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala @@ -94,13 +94,16 @@ object Position { * behavior of normalizing CRLF to LF, and solitary CR to LF. * Defaults to true. Should only be changed in special circumstances * as not normalizing CRLFs is non-standard for XML. - * + * @param removeComments True to remove comments. This is used to keep the XML as close to the original as possible + * @param removeProcInstr True to remove processing instructions. This is used to keep the XML as close to the original as possible */ class DaffodilConstructingLoader private[xml] ( uri: URI, errorHandler: org.xml.sax.ErrorHandler, addPositionAttributes: Boolean, - normalizeCRLFtoLF: Boolean + normalizeCRLFtoLF: Boolean, + removeComments: Boolean, + removeProcInstr: Boolean ) extends ConstructingParser( { // Note: we must open the XML carefully since it might be in some non @@ -122,7 +125,14 @@ class DaffodilConstructingLoader private[xml] ( errorHandler: org.xml.sax.ErrorHandler, addPositionAttributes: Boolean = false ) = - this(uri, errorHandler, addPositionAttributes, normalizeCRLFtoLF = true) + this( + uri, + errorHandler, + addPositionAttributes, + normalizeCRLFtoLF = true, + removeComments = true, + removeProcInstr = true + ) /** * Ensures that DOCTYPES aka DTDs, if encountered, are rejected. @@ -316,19 +326,30 @@ class DaffodilConstructingLoader private[xml] ( } /** - * Drops comments + * Drops comments if removeComments is true + * + * This is optional controlled by a constructor parameter. */ override def comment(pos: Int, s: String): Comment = { - // returning null drops comments - null + if (removeComments) { + // returning null drops comments + null + } else { + super.comment(pos, s) + } } /** - * Drops processing instructions + * Drops processing instructions if removeProcInstr is false + * + * This is optional controlled by a constructor parameter. */ override def procInstr(pos: Int, target: String, txt: String) = { - // returning null drops processing instructions - null + if (removeProcInstr) { // returning null drops processing instructions + null + } else { + super.procInstr(pos, target, txt) + } } private def parseXMLPrologAttributes( diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala index 0b32d1accc..c250dfcd03 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala @@ -702,31 +702,20 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) * @param optSchemaURI Optional URI for XML schema for the XML source document. * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements. * Defaults to false. - * @return an scala.xml.Node (Element actually) which is the document element of the source. - */ - def load( - source: DaffodilSchemaSource, - optSchemaURI: Option[URI], - addPositionAttributes: Boolean = false - ): scala.xml.Node = - load(source, optSchemaURI, addPositionAttributes, normalizeCRLFtoLF = true) - - /** - * package private constructor gives access to normalizeCRLFtoLF feature. - * - * @param source The URI for the XML document which may be a XML or DFDL schema, or just XML data. - * @param optSchemaURI Optional URI for XML schema for the XML source document. - * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements. - * Defaults to false. * @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. This should usually be true, * but some special case situations may require preservation of CRLF/CR. + * @param removeComments True to remove comments. This is used to keep the XML as close to the original as possible + * @param removeProcInstr True to remove processing instructions. This is used to keep the XML as close to the original as possible + * * @return an scala.xml.Node (Element actually) which is the document element of the source. */ - private[xml] def load( + def load( source: DaffodilSchemaSource, optSchemaURI: Option[URI], - addPositionAttributes: Boolean, - normalizeCRLFtoLF: Boolean + addPositionAttributes: Boolean = false, + normalizeCRLFtoLF: Boolean = true, + removeComments: Boolean = true, + removeProcInstr: Boolean = true ): scala.xml.Node = { // // First we invoke the validator to explicitly validate the XML against @@ -819,7 +808,9 @@ class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) source.uriForLoading, errorHandler, addPositionAttributes, - normalizeCRLFtoLF + normalizeCRLFtoLF, + removeComments, + removeProcInstr ) val res = try { diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala index da67ac01bb..d2a8179126 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala @@ -42,6 +42,8 @@ import org.apache.daffodil.lib.iapi.URISchemaSource import org.apache.daffodil.lib.schema.annotation.props.LookupLocation import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.Misc +import org.apache.daffodil.runtime1.infoset.InvalidInfosetException +import org.apache.daffodil.runtime1.infoset.XMLTextInfoset import org.apache.commons.io.IOUtils import org.xml.sax.XMLReader @@ -599,6 +601,7 @@ object XMLUtils { def removeComments(e: Node): Node = { e match { + case x: Elem if isStringAsXmlElem(x) => x case Elem(prefix, label, attribs, scope, child*) => { val newChildren = child.filterNot { _.isInstanceOf[Comment] }.map { removeComments(_) } Elem(prefix, label, attribs, scope, true, newChildren*) @@ -638,40 +641,108 @@ object XMLUtils { res } + private def isStringAsXmlElem(ns: Node): Boolean = { + ns match { + case e @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + true + case _ => false + } + } + + /** + * normalizes CRLF to LF within text nodes in non-stringAsXML elements + * + * Some fields in infosets could contain LFs, but could be changed to CRLF + * in Windows due to git's autocrlf feature. And since infoset outputters + * always output LF we need to undo with git might do and normalize those CRLF's + * to LF. + */ + private def normalizeCRLFtoLF(ns: Node): Node = { + ns match { + // NOTE: this is specifically for the stringAsXml feature as we avoid + // making changes to any of its children requiring that stringAsXml in + // the infoset match results exactly. + case e: Elem if isStringAsXmlElem(e) => e + case e: Elem => { + val children = e.child + val normalized = children.map(normalizeCRLFtoLF) + val res = { + if (normalized eq children) e + else e.copy(child = normalized) + } + res + } + case Text(data) if data.contains("\r") => { + val replaced = data.replaceAll("\r\n", "\n").replaceAll("\r", "\n") + Text(replaced) + } + case _ => ns + } + } + /** * removes insignificant whitespace from between elements */ private def removeMixedWhitespace(ns: Node): Node = { - if (!ns.isInstanceOf[Elem]) return ns - val e = ns.asInstanceOf[Elem] - val children = e.child - val noMixedChildren = - if (children.exists(_.isInstanceOf[Elem])) { - children - .filter { - case Text(data) if data.matches("""\s*""") => false - case Text(data) => - throw new Exception("Element %s contains mixed data: %s".format(e.label, data)) - case _ => true - } - .map(removeMixedWhitespace) - } else { - children.filter { - // - // So this is a bit strange, but we're dropping nodes that are Empty String. - // - // In XML we cannot tell where there is a Text("") child, from with Nil children - // - case Text("") => false // drop empty strings - case _ => true + ns match { + // NOTE: this is specifically for the stringAsXml feature as we avoid + // making changes to any of its children except removing any surrounding + // whitespace, requiring that stringAsXml in the infoset match results exactly. + case e: Elem if isStringAsXmlElem(e) => { + val (elemChildren, nonElemChildren) = e.child.partition { + _.isInstanceOf[Elem] + } + if (elemChildren.length != 1) + throw new InvalidInfosetException("stringAsXml must contain a single child element.") + nonElemChildren.foreach { + case Text(data) if data.matches("""\s*""") => // no-op, empty text siblings are fine + case x => + throw new Exception( + "%s is some kind of mixed content not allowed as a stringAsXml child".format(x) + ) } + e.asInstanceOf[Elem].copy(child = elemChildren) } + case e: Elem => { + val children = e.child + val noMixedChildren = + if (children.exists(_.isInstanceOf[Elem])) { + children + .filter { + case Text(data) if data.matches("""\s*""") => false + case Text(data) => + throw new Exception( + "Element %s contains mixed data: %s".format(e.label, data) + ) + case _ => true + } + .map(removeMixedWhitespace) + } else { + children.filter { + // + // So this is a bit strange, but we're dropping nodes that are Empty String. + // + // In XML we cannot tell where there is a Text("") child, from with Nil children + // + case Text("") => false // drop empty strings + case _ => true + } + } - val res = - if (noMixedChildren eq children) e - else e.copy(child = noMixedChildren) - res + val res = + if (noMixedChildren eq children) e + else e.copy(child = noMixedChildren) + res + } + case _ => ns + } } /** @@ -700,6 +771,15 @@ object XMLUtils { ): NodeSeq = { val res = n match { + case e @ Elem( + null, + XMLTextInfoset.stringAsXml, + Null, + NamespaceBinding(null, null | "", _), + _* + ) => + e + case e @ Elem(prefix, label, attributes, scope, children*) => { val filteredScope = if (ns.length > 0) filterScope(scope, ns) else xml.TopScope @@ -808,7 +888,8 @@ object XMLUtils { val noPCData = convertPCDataToText(noComments) val combinedText = coalesceAllAdjacentTextNodes(noPCData) val noMixedWS = removeMixedWhitespace(combinedText) - noMixedWS + val noCRLFs = normalizeCRLFtoLF(noMixedWS) + noCRLFs } class XMLDifferenceException(message: String) extends Exception(message) @@ -973,6 +1054,15 @@ Differences were (path, expected, actual): } else if (checkPrefixes && prefixA != prefixB) { // different prefix List((zPath + "/" + labelA + "@prefix", prefixA, prefixB)) + } else if (checkPrefixes && a.scope.getURI(prefixA) != b.scope.getURI(prefixB)) { + // prefixes doesn't resolve to same namespace + List( + ( + zPath + "/" + labelA + "@prefix-namespace", + a.scope.getURI(prefixA), + b.scope.getURI(prefixB) + ) + ) } else if (checkNamespaces && mappingsA != mappingsB) { // different namespace bindings List((zPath + "/" + labelA + "@xmlns", mappingsA, mappingsB)) @@ -1055,6 +1145,28 @@ Differences were (path, expected, actual): computeTextDiff(zPath, tA, tB, maybeType, maybeFloatEpsilon, maybeDoubleEpsilon) thisDiff } + case (cA: Comment, cB: Comment) => { + val thisDiff = computeTextDiff( + zPath + "/@comment", + cA.toString, + cB.toString, + None, + None, + None + ) + thisDiff + } + case (pcA: PCData, pcB: PCData) => { + val thisDiff = computeTextDiff( + zPath + "/@PCDATA", + pcA.toString, + pcB.toString, + None, + None, + None + ) + thisDiff + } case (pA: ProcInstr, pB: ProcInstr) => { val ProcInstr(tA1label, tA1content) = pA val ProcInstr(tB1label, tB1content) = pB diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala index 4abe32f378..e30534fcc7 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala @@ -19,9 +19,7 @@ package org.apache.daffodil.runtime1.infoset import scala.collection.mutable.ListBuffer import scala.xml.MetaData -import scala.xml.NamespaceBinding import scala.xml.Null -import scala.xml.PrefixedAttribute import scala.xml.UnprefixedAttribute import org.apache.daffodil.api.DFDLPrimType @@ -56,16 +54,6 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) resultNode = Maybe(root(0)) } - private def getScope(diElem: DIElement): NamespaceBinding = { - val minScope = diElem.metadata.minimizedScope - // if including xsi:type is enabled, ensure the xsi namespace is defined on the root element - if (getIncludeDataType() && stack.length == 1 && minScope.getURI("xsi") == null) { - NamespaceBinding("xsi", XMLUtils.XSI_NAMESPACE, minScope) - } else { - minScope - } - } - private def getAttributes(diElem: DIElement): MetaData = { val nilAttr = if (diElem.isNilled) XMLUtils.xmlNilAttribute else Null val freedAttr = @@ -92,14 +80,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) } else { nilAttr } - val typedAttr = - if (getIncludeDataType() && diElem.isSimple) { - val primName = diElem.erd.optPrimType.get.name - new PrefixedAttribute("xsi", "type", "xs:" + primName, freedAttr) - } else { - freedAttr - } - typedAttr + freedAttr } override def startSimple(se: InfosetSimpleElement): Unit = { @@ -124,7 +105,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) diSimple.metadata.prefix, diSimple.metadata.name, attributes, - getScope(diSimple), + diSimple.metadata.minimizedScope, minimizeEmpty = true, children* ) @@ -149,7 +130,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false) diComplex.metadata.prefix, diComplex.metadata.name, attributes, - getScope(diComplex), + diComplex.metadata.minimizedScope, minimizeEmpty = true, children* ) diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala index 3cb5caeb3e..1471da3a3d 100644 --- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala +++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLLoader.scala @@ -172,9 +172,19 @@ class TestXMLLoader { // and toString will print them out into the text with the preserved. // val xmlFromDafLoaderNonNormalized = - loader.load(ss, None, addPositionAttributes = false, normalizeCRLFtoLF = false) + loader.load( + ss, + None, + addPositionAttributes = false, + normalizeCRLFtoLF = false + ) val xmlFromDafLoaderNormalized = - loader.load(ss, None, addPositionAttributes = false, normalizeCRLFtoLF = true) + loader.load( + ss, + None, + addPositionAttributes = false, + normalizeCRLFtoLF = true + ) { // compare to the regular scala XML loader diff --git a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala index 3e70d7129d..2c9690f4b1 100644 --- a/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala +++ b/daffodil-core/src/test/scala/org/apache/daffodil/lib/xml/test/unit/TestXMLUtils.scala @@ -98,6 +98,17 @@ class TestXMLUtils { assertEquals("ns2", b) } + @Test def testPrefixNSDiff(): Unit = { + // different prefix should error, even though the namespace is the same + val d1 = a + val d2 = a + val diffs = XMLUtils.computeDiff(d1, d2, checkPrefixes = true) + val Seq((path, a, b)) = diffs + assertEquals("/a@prefix-namespace", path) + assertEquals("someprefix", a) + assertEquals("someotherprefix", b) + } + @Test def testNamespaceDiff(): Unit = { // different namespace mappings should error val d1 = a diff --git a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala index 0549c61ac1..f36a2a4909 100644 --- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala +++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala @@ -2800,7 +2800,13 @@ case class DFDLInfoset(di: Node, parent: Infoset) { val testSuite = testCase.parent val before = testSuite.loadingExceptions.clone() - val elem = loader.load(infosetSrc, None) // no schema + val elem = loader.load( + infosetSrc, + None, + normalizeCRLFtoLF = false, + removeComments = false, + removeProcInstr = false + ) // no schema // // TODO: DAFFODIL-288 validate the infoset also // You can pass the optDataSchema, which appears to be the correct thing diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala index 696a1ab8e8..da0d134a67 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala @@ -41,6 +41,7 @@ import org.apache.daffodil.lib.util.MaybeULong import org.apache.daffodil.lib.xml.DaffodilSAXParserFactory import org.apache.daffodil.lib.xml.XMLUtils import org.apache.daffodil.lib.xml.XMLUtils.XMLDifferenceException +import org.apache.daffodil.processor.tdml import org.apache.daffodil.runtime1.iapi.* import org.apache.daffodil.runtime1.iapi.DFDL.DaffodilUnhandledSAXException import org.apache.daffodil.runtime1.iapi.DFDL.DaffodilUnparseContentHandler @@ -173,7 +174,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( private def blobPrefix = "" private def blobSuffix = ".bin" - private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "scala") + private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "xml") override def withTracing(bool: Boolean): DaffodilTDMLDFDLProcessor = { copy(dp = newTracing(bool)) @@ -269,7 +270,7 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( val outputter = if (tdmlApiInfosetsEnv == "all") { TDMLInfosetOutputterAll() } else { - TDMLInfosetOutputterScala() + TDMLInfosetOutputterXML() } outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix) @@ -308,7 +309,17 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( xri.parse(sis) if (!actual.isError && !errorHandler.isError) { - verifySameParseOutput(outputter.xmlStream, saxOutputStream) + // we use the scala result because both the ScalaInfosetOutputter and + // the SAXInfosetOutputter do not implement stringAsXml, + // which helps to avoid any differences cause by the stringAsXml conversions. + val actualOutputArray = outputter + .asInstanceOf[tdml.TDMLInfosetOutputterAll] + .getScalaResult + .toString + .getBytes("UTF-8") + val baos = new ByteArrayOutputStream(actualOutputArray.length) + baos.write(actualOutputArray) + verifySameParseOutput(baos, saxOutputStream) } val dpParseDiag = actual.getDiagnostics.asScala.map(_.toString()).toSeq val saxParseDiag = errorHandler.getDiagnostics.asScala.map(_.toString()).toSeq @@ -392,7 +403,12 @@ class DaffodilTDMLDFDLProcessor private[tdml] ( XMLUtils.compareAndReport( dpParseXMLNodeOutput, saxParseXMLNodeOutput, - checkNamespaces = true, + // we no longer checkNamespaces because SAX outputs the same namespaces as + // the XMLTextInfosetOutputter but not the scalaXMLInfosetOutputter, so checking + // namespaces fails in the DAFFODIL_TDML_API_INFOSETS='all' case due to differences + // in the scalaXMLInfosetOutputter namespaces, probably having to do with + // minimizeScope issues + // checkNamespaces = true, checkPrefixes = true ) } catch { diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala index 518f38961b..e9097b4413 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetInputter.scala @@ -27,32 +27,37 @@ import org.apache.daffodil.lib.util.Misc import org.apache.daffodil.lib.xml.XMLUtils import org.apache.daffodil.runtime1.dpath.NodeInfo import org.apache.daffodil.runtime1.infoset.JsonInfosetInputter -import org.apache.daffodil.runtime1.infoset.ScalaXMLInfosetInputter import org.apache.daffodil.tdml.TDMLException class TDMLInfosetInputter( - val scalaInputter: ScalaXMLInfosetInputter, + val inputter: api.infoset.InfosetInputter, others: Seq[api.infoset.InfosetInputter] ) extends api.infoset.InfosetInputter { private def implString: String = "daffodil" override def getEventType(): InfosetInputterEventType = { - val res = scalaInputter.getEventType() + val res = inputter.getEventType() if (!others.forall(_.getEventType() == res)) - throw TDMLException("getEventType does not match", Some(implString)) + throw TDMLException( + s"getEventType does not match\n${others.zip(others.map(_.getEventType)).mkString("\n")}", + Some(implString) + ) res } override def getLocalName(): String = { - val res = scalaInputter.getLocalName() + val res = inputter.getLocalName() if (!others.forall(_.getLocalName() == res)) - throw TDMLException("getLocalName does not match", Some(implString)) + throw TDMLException( + s"getLocalName does not match\n${others.zip(others.map(_.getLocalName)).mkString("\n")}", + Some(implString) + ) res } override def getNamespaceURI(): String = { - val res = scalaInputter.getNamespaceURI() + val res = inputter.getNamespaceURI() val resIsEmpty = res == null || res == "" val othersMatch = others.forall { i => if (!i.getSupportsNamespaces) { @@ -66,7 +71,10 @@ class TDMLInfosetInputter( } } if (!othersMatch) - throw TDMLException("getNamespaceURI does not match", Some(implString)) + throw TDMLException( + s"getNamespaceURI does not match\n${others.filter(_.getSupportsNamespaces).map(o => (o, o.getNamespaceURI)).mkString("\n")}", + Some(implString) + ) res } @@ -74,7 +82,7 @@ class TDMLInfosetInputter( primType: NodeInfo.Kind, runtimeProperties: java.util.Map[String, String] ): String = { - val res = scalaInputter.getSimpleText(primType, runtimeProperties) + val res = inputter.getSimpleText(primType, runtimeProperties) val resIsEmpty = res == null || res == "" val otherStrings = others.map { i => // Note in an unparserTestCase, there are no others (infoset inputters), because the input infoset is @@ -100,7 +108,10 @@ class TDMLInfosetInputter( } if (!othersmatch) - throw TDMLException("getSimpleText does not match", Some(implString)) + throw TDMLException( + s"getSimpleText does not match for $res\n${others.zip(otherStrings).mkString("\n")}", + Some(implString) + ) if (primType.isInstanceOf[NodeInfo.AnyURI.Kind]) { try { @@ -126,26 +137,32 @@ class TDMLInfosetInputter( } override def isNilled(): JBoolean = { - val res = scalaInputter.isNilled() + val res = inputter.isNilled() if (!others.forall(_.isNilled() == res)) - throw TDMLException("isNilled does not match", Some(implString)) + throw TDMLException( + s"isNilled does not match\n${others.zip(others.map(_.isNilled)).mkString("\n")}", + Some(implString) + ) res } override def hasNext(): Boolean = { - val res = scalaInputter.hasNext() + val res = inputter.hasNext() if (!others.forall(_.hasNext() == res)) - throw TDMLException("hasNext does not match", Some(implString)) + throw TDMLException( + s"hasNext does not match\n${others.zip(others.map(_.hasNext)).mkString("\n")}", + Some(implString) + ) res } override def next(): Unit = { - scalaInputter.next() + inputter.next() others.foreach(_.next()) } override def fini(): Unit = { - scalaInputter.fini() + inputter.fini() others.foreach(_.fini()) } diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala index cf913d6877..a04262955f 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala @@ -19,7 +19,6 @@ package org.apache.daffodil.processor.tdml import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream -import java.nio.charset.Charset import scala.xml.Node import org.apache.daffodil.api @@ -36,29 +35,27 @@ import org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter -class TDMLInfosetOutputterScala(scalaOut: ScalaXMLInfosetOutputter) - extends TeeInfosetOutputter(Seq(scalaOut)*) - with TDMLInfosetOutputter { +object TDMLInfosetOutputterXML { + def apply(): TDMLInfosetOutputterXML = { + val baos = new ByteArrayOutputStream() + val xmlOut = new XMLTextInfosetOutputter(baos, false) + xmlOut.setIncludeDataType(true) + new TDMLInfosetOutputterXML(baos, xmlOut) + } +} - override def getResult: Node = scalaOut.getResult() +class TDMLInfosetOutputterXML( + override val xmlStream: ByteArrayOutputStream, + xmlOut: XMLTextInfosetOutputter +) extends TeeInfosetOutputter(Seq(xmlOut)*) + with TDMLInfosetOutputter { - override lazy val xmlStream: ByteArrayOutputStream = { - val bos = new ByteArrayOutputStream() - bos.write(getResult.toString().getBytes(Charset.defaultCharset())) - bos - } + override def getResult: Node = + scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray)) override def toInfosetInputter: TDMLInfosetInputter = { - val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult()) - new TDMLInfosetInputter(scalaIn, Seq()) - } -} - -object TDMLInfosetOutputterScala { - def apply(): TDMLInfosetOutputterScala = { - val scalaOut = new ScalaXMLInfosetOutputter() - scalaOut.setIncludeDataType(true) - new TDMLInfosetOutputterScala(scalaOut) + val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray)) + new TDMLInfosetInputter(xmlIn, Seq()) } } @@ -73,7 +70,9 @@ class TDMLInfosetOutputterAll( ) extends TeeInfosetOutputter(Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)*) with TDMLInfosetOutputter { - override def getResult: Node = scalaOut.getResult() + def getScalaResult: Node = scalaOut.getResult() + override def getResult: Node = + scala.xml.XML.load(new ByteArrayInputStream(xmlStream.toByteArray)) override def toInfosetInputter: TDMLInfosetInputter = { val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult()) @@ -82,10 +81,14 @@ class TDMLInfosetOutputterAll( val jsonIn = new JsonInfosetInputter(new ByteArrayInputStream(jsonStream.toByteArray)) val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray)) val nullIn = { - val events = NullInfosetInputter.toEvents(new ByteArrayInputStream(xmlStream.toByteArray)) + val events = NullInfosetInputter.toEvents( + new ByteArrayInputStream( + scalaOut.getResult().toString().getBytes("UTF-8") + ) + ) new NullInfosetInputter(events) } - new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn, nullIn)) + new TDMLInfosetInputter(xmlIn, Seq(jdomIn, w3cdomIn, jsonIn, scalaIn, nullIn)) } } @@ -100,7 +103,7 @@ object TDMLInfosetOutputterAll { val jsonOut = new JsonInfosetOutputter(jsonStream, false) val xmlOut = new XMLTextInfosetOutputter(xmlStream, false) - Seq(scalaOut, jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out => + Seq(jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out => out.setIncludeDataType(true) } diff --git a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala index 1f0e062f32..046fbe6fb2 100644 --- a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala +++ b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala @@ -45,7 +45,7 @@ class TestCLITdml { "daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml" ) - val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "scala") + val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "xml") runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli => // parse diff --git a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory new file mode 100644 index 0000000000..a12f49a78a --- /dev/null +++ b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.api.validation.ValidatorFactory @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.daffodil.infoset.TestStringAsXmlValidatorFactory diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml new file mode 100644 index 0000000000..9e88bc3126 --- /dev/null +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXML.tdml @@ -0,0 +1,98 @@ + + + + + + + + stringAsXml/namespaced/binMessage_01.dat + + + stringAsXml/namespaced/binMessage_01.dat.xml + + + + + + stringAsXml/namespaced/binMessage_01.dat + + + stringAsXml/namespaced/binMessage_01.dat.xml + + + Element 'xmlStr' is a simple type + + + + + + stringAsXml/namespaced/binMessage_03.dat + + + Unexpected character + + + + + + stringAsXml/namespaced/binMessage_08.dat + + + Undeclared general entity "name" + + + + + + + stringAsXml/nonamespace/binMessage_01.dat + + + stringAsXml/nonamespace/binMessage_01.dat.xml + + + Value '=invalid field' is not facet-valid + + + + + + stringAsXml/namespaced/binMessage_01a.dat + + + stringAsXml/namespaced/binMessage_01a.dat.xml + + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat index bdb4f0b65c..c5b1045188 100644 Binary files a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat and b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat differ diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml index c54b830fc8..669b020959 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml @@ -5,7 +5,7 @@ - + with here is mixed content spaces spaces and more mixed content diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat index 92703251fc..c5b1045188 100644 Binary files a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat and b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat differ diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat new file mode 100644 index 0000000000..9ea0e13f4b Binary files /dev/null and b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat differ diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml new file mode 100644 index 0000000000..875d7f4ceb --- /dev/null +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml @@ -0,0 +1,41 @@ + + + + 1 + + + + + + with here is mixed content + spaces spaces and more mixed content + and more mixed content + + entity references: < > & " ' © + CR LF +CRLF +end + CR + LF + CRLF + =invalid field + + + + 5 + + + 2 + + + + + + 5 + + + + + 1 + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd index a7d456c16f..1f27c8dcdb 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessage.dfdl.xsd @@ -72,5 +72,31 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd index 3b46c663ca..3118b06280 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/binMessageWithXmlPayload.xsd @@ -50,6 +50,33 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd index 3c7cd0bffe..87ad24db76 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/stringAsXmlWrapper.xsd @@ -77,6 +77,18 @@ validate XML embedded in data and subsequently embedded into the infoset. + + + + + + + + + + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd index 2f92d344c5..870d89b890 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd +++ b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/xsd/xmlPayload.xsd @@ -42,5 +42,13 @@ + + + + + + + + diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml index 6d636c6af1..545f47029f 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section07/variables/variables_01.tdml @@ -48,7 +48,7 @@ - 42 + 42 diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala new file mode 100644 index 0000000000..0495d584c6 --- /dev/null +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlTDML.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import org.apache.daffodil.junit.tdml.TdmlSuite +import org.apache.daffodil.junit.tdml.TdmlTests + +import org.junit.Test + +object TestStringAsXmlTDML extends TdmlSuite { + val tdmlResource = "/org/apache/daffodil/infoset/stringAsXML.tdml" +} + +class TestStringAsXmlTDML extends TdmlTests { + val tdmlSuite = TestStringAsXmlTDML + + @Test def stringAsXml_01_a = test + @Test def stringAsXml_01_b = test + @Test def stringAsXml_04 = test + @Test def stringAsXml_09 = test + @Test def stringAsXml_10 = test + @Test def stringAsXml_11 = test +} diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala new file mode 100644 index 0000000000..560329ad87 --- /dev/null +++ b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXmlValidator.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.infoset + +import java.io.InputStream +import java.net.URL +import java.util.Properties + +import org.apache.daffodil.api.validation.ValidationHandler +import org.apache.daffodil.api.validation.Validator +import org.apache.daffodil.api.validation.ValidatorFactory +import org.apache.daffodil.validation.XercesValidator + +object TestStringAsXmlValidator { + val name = "TestStringAsXmlValidator" +} + +class TestStringAsXmlValidator(schemaURL: String) extends Validator { + private lazy val xercesValidator = XercesValidator.fromURL(new URL(schemaURL)) + + override def validateXML(document: InputStream, vh: ValidationHandler): Unit = { + xercesValidator.validateXML(document, vh) + } +} + +class TestStringAsXmlValidatorFactory extends ValidatorFactory { + + override def name: String = TestStringAsXmlValidator.name + + override def make(config: Properties) = { + val dfdlSchema = config.getProperty(name) + // assumes the validation XSD path is in the same as the DFDL schema but with a different suffix + val xsdSchema = dfdlSchema.replace(".dfdl.xsd", "WithXmlPayload.xsd") + new TestStringAsXmlValidator(xsdSchema) + } +} diff --git a/project/Rat.scala b/project/Rat.scala index 825a661523..e39a695743 100644 --- a/project/Rat.scala +++ b/project/Rat.scala @@ -123,9 +123,15 @@ object Rat { file( "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat" ), + file( + "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat" + ), file( "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml" ), + file( + "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01a.dat.xml" + ), file( "daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat" ),