blob: fa81813f637ed90b9cc20886c3f6ac33cc6686e4 [file] [log] [blame]
/*
******************************************************************************
* Copyright (C) 2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/xmlcomparator/Attic/XMLValidator.java,v $
* $Date: 2003/05/14 18:37:16 $
* $Revision: 1.2 $
*
******************************************************************************
*/
package com.ibm.icu.dev.tool.xmlcomparator;
/**
* @author Ram
*
* To change this generated comment edit the template variable "typecomment":
* Window>Preferences>Java>Templates.
* To enable and disable the creation of type comments go to
* Window>Preferences>Java>Code Generation.
*/
/**
* @author ram
*
* This tool validates xml against DTD ... IE 6 does not do a good job
*/
import java.io.*;
import java.io.File;
import java.io.FileReader;
import java.io.BufferedReader;
// DOM imports
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;
// Needed JAXP classes
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
// SAX2 imports
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class XMLValidator {
public static void main(String[] args) {
if (args.length == 0) {
System.out.println("No files specified. Validation failed");
return;
}
for (int i = 0; i < args.length; i++) {
/*Document doc =*/ parse(args[i]);
System.out.println("Processing file " + args[i]);
}
}
/**
* Utility method to translate a String filename to URL.
*
* Note: This method is not necessarily proven to get the
* correct URL for every possible kind of filename; it should
* be improved. It handles the most common cases that we've
* encountered when running Conformance tests on Xalan.
* Also note, this method does not handle other non-file:
* flavors of URLs at all.
*
* If the name is null, return null.
* If the name starts with a common URI scheme (namely the ones
* found in the examples of RFC2396), then simply return the
* name as-is (the assumption is that it's already a URL)
* Otherwise we attempt (cheaply) to convert to a file:/// URL.
*
* @param String local path\filename of a file
* @return a file:/// URL, the same string if it appears to
* already be a URL, or null if error
*/
public static String filenameToURL(String filename) {
// null begets null - something like the commutative property
if (null == filename)
return null;
// Don't translate a string that already looks like a URL
if (filename.startsWith("file:")
|| filename.startsWith("http:")
|| filename.startsWith("ftp:")
|| filename.startsWith("gopher:")
|| filename.startsWith("mailto:")
|| filename.startsWith("news:")
|| filename.startsWith("telnet:"))
return filename;
File f = new File(filename);
String tmp = null;
try {
// This normally gives a better path
tmp = f.getCanonicalPath();
} catch (IOException ioe) {
// But this can be used as a backup, for cases
// where the file does not exist, etc.
tmp = f.getAbsolutePath();
}
// URLs must explicitly use only forward slashes
if (File.separatorChar == '\\') {
tmp = tmp.replace('\\', '/');
}
// Note the presumption that it's a file reference
// Ensure we have the correct number of slashes at the
// start: we always want 3 /// if it's absolute
// (which we should have forced above)
if (tmp.startsWith("/"))
return "file://" + tmp;
else
return "file:///" + tmp;
}
static Document parse(String filename) {
// Force filerefs to be URI's if needed: note this is independent of any other files
String docURI = filenameToURL(filename);
return parse(new InputSource(docURI), filename);
}
static Document parse(InputSource docSrc, String filename) {
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
// Always set namespaces on
dfactory.setNamespaceAware(true);
dfactory.setValidating(true);
// Set other attributes here as needed
//applyAttributes(dfactory, attributes);
// Local class: cheap non-printing ErrorHandler
// This is used to suppress validation warnings
ErrorHandler nullHandler = new ErrorHandler() {
public void warning(SAXParseException e) throws SAXException {
System.err.println("Warning: " + e.getMessage());
}
public void error(SAXParseException e) throws SAXException {
System.err.println("Element " +e.getPublicId() + " is not valid because "+ e.getMessage());
System.err.println("Error: " +"at line "+e.getLineNumber()+", column "+e.getColumnNumber());
}
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
};
Document doc = null;
try {
// First, attempt to parse as XML (preferred)...
DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
docBuilder.setErrorHandler(nullHandler);
//if(docBuilder.isValidating()){
// System.out.println("The parser is a validating parser");
//}
doc = docBuilder.parse(docSrc);
} catch (Throwable se) {
// ... if we couldn't parse as XML, attempt parse as HTML...
System.out.println("ERROR :" + se.toString());
try {
// @todo need to find an HTML to DOM parser we can use!!!
// doc = someHTMLParser.parse(new InputSource(filename));
throw new RuntimeException("XMLComparator no HTML parser!");
} catch (Exception e) {
if (filename != null) {
// ... if we can't parse as HTML, then just parse the text
try {
// Parse as text, line by line
// Since we already know it should be text, this should
// work better than parsing by bytes.
FileReader fr = new FileReader(filename);
BufferedReader br = new BufferedReader(fr);
StringBuffer buffer = new StringBuffer();
for (;;) {
String tmp = br.readLine();
if (tmp == null) {
break;
}
buffer.append(tmp);
buffer.append("\n"); // Put in the newlines as well
}
DocumentBuilder docBuilder =
dfactory.newDocumentBuilder();
doc = docBuilder.newDocument();
Element outElem = doc.createElement("out");
Text textNode = doc.createTextNode(buffer.toString());
// Note: will this always be a valid node? If we're parsing
// in as text, will there ever be cases where the diff that's
// done later on will fail becuase some really garbage-like
// text has been put into a node?
outElem.appendChild(textNode);
doc.appendChild(outElem);
} catch (Throwable throwable) {
//throwable.printStackTrace();
}
}
}
}
return doc;
}
}