#!perl
# working test script to explore MSXML usage.
##################################################
use strict;
use Win32::OLE;
Win32::OLE->Option(Warn => 2);
use XML::XPath;
$| = 1;
my $contentType;
my $inputXML = $ARGV[0];
if ($inputXML =~ /OVAL/) { $contentType = 'OVAL'; }
# Verify that the input XML file exists.
unless (-e $inputXML) { print "File $inputXML does not exist."; exit; }
# Load the root node.
my $nodes = XML::XPath->new($inputXML);
my $nodeSet = $nodes->find("/");
# Ensure that we only find one node.
if ($nodeSet->size < 1) {
print "Root node not found during validation of $inputXML.";
exit;
}
elsif ($nodeSet->size > 1) {
print "More than one root node found during validation of $inputXML.";
exit;
}
print "Root node exists\n";
# Get the root node.
my $rootNode = $nodeSet->get_node(1);
# Get the node as a string.
my $rootNodeString = $rootNode->toString(1);
my $schemaLocations;
my %schemaLocations;
if ($rootNodeString =~ /schemaLocation\s*=\s*"(.*?)"/i) { $schemaLocations = $1; }
else { print "Could not find schemaLocation in $inputXML."; exit; }
print "schemaLocation found.\n";
# print "$schemaLocations\n";
# '!spaces' then 'space(s)' then '!spaces' then 'space(s)'
my $num_schemata;
while ($schemaLocations =~ /\s*([^\s]+?)\s+([^\s]+)\s*/g) { $schemaLocations{$1} = $2; $num_schemata++; }
if ($num_schemata > 0) { print "Successfully built schema locations hash.\n"; }
else { die "SchemaLocations hash is empty.\n"; }
# Verify that all of the files exist in our schema directory.
foreach my $uri (keys(%schemaLocations) )
{
my $sl = $schemaLocations{$uri};
$sl =~ s/.*[\\\/](.*$)/$1/gis;
if ($contentType eq 'OVAL') { $sl = '../Schemas/oval_5.7/'. $sl; }
else { $sl = './Schemas/'. $sl; }
if (!(-e $sl)) { print "Could not find schema " . $sl. ".\n"; }
else { $schemaLocations{$uri} = $sl; } # Now, points to local copy of the schema
}
# Create a schema cache. MSXML 6.0 must be installed, 3.0 will not work.
my $schemaCache = Win32::OLE->new("MSXML2.XMLSchemaCache.6.0");
unless ($schemaCache) { die "MSXML 6.0 is not installed."; }
print "MSXML 6.0 is installed and schemaCache created.\n";
# Set properties of the schema cache.
$schemaCache->{async} = "false";
$schemaCache->{validateOnParse} = "1";
$schemaCache->{validateOnLoad} = "1";
# Load each schema into the cache.
foreach my $uri (keys(%schemaLocations)) { $schemaCache->add($uri, $schemaLocations{$uri}); }
$schemaCache->add('garbage');
# Create a DOM object. MSXML 6.0 must be installed, 3.0 will not work.
my $xmlDoc = Win32::OLE->new('MSXML2.DOMDocument.6.0');
unless ($xmlDoc) { die "MSXML 6.0 is not installed."; }
print "Setting DOM object properties.\n";
$xmlDoc->{async} = "false";
$xmlDoc->{validateOnParse} = "true";
$xmlDoc->{schemas} = $schemaCache;
# All required schema references have been addressed.
# Remove 'schemaLocation from root element to absolutely prevent
# any possible redundant use by MSXML libs
my $xmlString;
open(XML,"<$inputXML") or die "Failed to open XML file, $!\n";
while () { $xmlString .= $_; }
close XML;
if ($xmlString =~ s/xsi:schemaLocation\s*=\s*".*?"\s*//is) { print "Nulled out schemaLocation.\n"; }
else { print "Failed to null out schemaLocation\n"; }
print "Validating the input XML file against the cached schemas.\n";
unless ($xmlDoc->LoadXML("$xmlString"))
{
my $Rs = $xmlDoc->{parseError}->{reason};
my $Ln = $xmlDoc->{parseError}->{line};
my $Ps = $xmlDoc->{parseError}->{linePos};
my $Tx = $xmlDoc->{parseError}->{srcText};
print "Did not load at line $Ln, pos $Ps, reason: $Rs, text: $Tx\n";
my $parseError = $xmlDoc->{parseError};
exit;
}
print "XML load successful, XML file is validated.\n";
No comments:
Post a Comment