src/com/ibm/icu/dev/tool/xmlcomparator/XMLValidator.java - external/github.com/unicode-org/icu - Git at Google

 /*
 ******************************************************************************
 * Copyright (C) 2003-2004, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 */

 package com.ibm.icu.dev.tool.xmlcomparator;

 /**
  * @author Ram
  *
  * To change this generated comment edit the template variable "typecomment":
  * Window>Preferences>Java>Templates.
  * To enable and disable the creation of type comments go to
  * Window>Preferences>Java>Code Generation.
  */
 /**
  * @author ram
  *
  * This tool validates xml against DTD ... IE 6 does not do a good job
  */
 import java.io.*;
 import java.io.File;
 import java.io.FileReader;
 import java.io.BufferedReader;


 // DOM imports
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Text;

 // Needed JAXP classes
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;

 // SAX2 imports
 import org.xml.sax.ErrorHandler;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 import org.xml.sax.SAXParseException;

 public class XMLValidator {
 	public static void main(String[] args) {
 		if (args.length == 0) {
 			System.out.println("No files specified. Validation failed");
 			return;
 		}
 		for (int i = 0; i < args.length; i++) {
 			/*Document doc =*/ parse(args[i]);
 			System.out.println("Processing file " + args[i]);
 		}

 	}
 	/**
 	 * Utility method to translate a String filename to URL.
 	 *
 	 * Note: This method is not necessarily proven to get the
 	 * correct URL for every possible kind of filename; it should
 	 * be improved.  It handles the most common cases that we've
 	 * encountered when running Conformance tests on Xalan.
 	 * Also note, this method does not handle other non-file:
 	 * flavors of URLs at all.
 	 *
 	 * If the name is null, return null.
 	 * If the name starts with a common URI scheme (namely the ones
 	 * found in the examples of RFC2396), then simply return the
 	 * name as-is (the assumption is that it's already a URL)
 	 * Otherwise we attempt (cheaply) to convert to a file:/// URL.
 	 *
 	 * @param String local path\filename of a file
 	 * @return a file:/// URL, the same string if it appears to
 	 * already be a URL, or null if error
 	 */
 	public static String filenameToURL(String filename) {
 		// null begets null - something like the commutative property
 		if (null == filename)
 			return null;

 		// Don't translate a string that already looks like a URL
 		if (filename.startsWith("file:")
 			|| filename.startsWith("http:")
 			|| filename.startsWith("ftp:")
 			|| filename.startsWith("gopher:")
 			|| filename.startsWith("mailto:")
 			|| filename.startsWith("news:")
 			|| filename.startsWith("telnet:"))
 			return filename;

 		File f = new File(filename);
 		String tmp = null;
 		try {
 			// This normally gives a better path
 			tmp = f.getCanonicalPath();
 		} catch (IOException ioe) {
 			// But this can be used as a backup, for cases
 			//  where the file does not exist, etc.
 			tmp = f.getAbsolutePath();
 		}

 		// URLs must explicitly use only forward slashes
 		if (File.separatorChar == '\\') {
 			tmp = tmp.replace('\\', '/');
 		}
 		// Note the presumption that it's a file reference
 		// Ensure we have the correct number of slashes at the
 		//  start: we always want 3 /// if it's absolute
 		//  (which we should have forced above)
 		if (tmp.startsWith("/"))
 			return "file://" + tmp;
 		else
 			return "file:///" + tmp;

 	}
 	static Document parse(String filename) {
 		// Force filerefs to be URI's if needed: note this is independent of any other files
 		String docURI = filenameToURL(filename);
 		return parse(new InputSource(docURI), filename);
 	}

 	static Document parse(InputSource docSrc, String filename) {

 		DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
 		// Always set namespaces on
 		dfactory.setNamespaceAware(true);
 		dfactory.setValidating(true);
 		// Set other attributes here as needed
 		//applyAttributes(dfactory, attributes);

 		// Local class: cheap non-printing ErrorHandler
 		// This is used to suppress validation warnings
 		ErrorHandler nullHandler = new ErrorHandler() {
 			public void warning(SAXParseException e) throws SAXException {
 				System.err.println("Warning: " + e.getMessage());

 			}
 			public void error(SAXParseException e) throws SAXException {
 				System.err.println("Element " +e.getPublicId() + " is not valid because "+ e.getMessage());
 				System.err.println("Error: " +"at line "+e.getLineNumber()+", column "+e.getColumnNumber());
 			}
 			public void fatalError(SAXParseException e) throws SAXException {
 				throw e;
 			}
 		};

 		Document doc = null;
 		try {
 			// First, attempt to parse as XML (preferred)...
 			DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
 			docBuilder.setErrorHandler(nullHandler);
 			//if(docBuilder.isValidating()){
 			//	System.out.println("The parser is a validating parser");
 			//}
 			doc = docBuilder.parse(docSrc);
 		} catch (Throwable se) {
 			// ... if we couldn't parse as XML, attempt parse as HTML...
 			System.out.println("ERROR :" + se.toString());
 			try {
 				// @todo need to find an HTML to DOM parser we can use!!!
 				// doc = someHTMLParser.parse(new InputSource(filename));
 				throw new RuntimeException("XMLComparator no HTML parser!");
 			} catch (Exception e) {
 				if (filename != null) {
 					// ... if we can't parse as HTML, then just parse the text
 					try {

 						// Parse as text, line by line
 						//   Since we already know it should be text, this should
 						//   work better than parsing by bytes.
 						FileReader fr = new FileReader(filename);
 						BufferedReader br = new BufferedReader(fr);
 						StringBuffer buffer = new StringBuffer();
 						for (;;) {
 							String tmp = br.readLine();

 							if (tmp == null) {
 								break;
 							}

 							buffer.append(tmp);
 							buffer.append("\n"); // Put in the newlines as well
 						}

 						DocumentBuilder docBuilder =
 							dfactory.newDocumentBuilder();
 						doc = docBuilder.newDocument();
 						Element outElem = doc.createElement("out");
 						Text textNode = doc.createTextNode(buffer.toString());

 						// Note: will this always be a valid node?  If we're parsing
 						//    in as text, will there ever be cases where the diff that's
 						//    done later on will fail becuase some really garbage-like
 						//    text has been put into a node?
 						outElem.appendChild(textNode);
 						doc.appendChild(outElem);
 					} catch (Throwable throwable) {

 						//throwable.printStackTrace();
 					}
 				}
 			}
 		}
 		return doc;
 	}
 }
	/*
	******************************************************************************
	* Copyright (C) 2003-2004, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	******************************************************************************
	*/

	package com.ibm.icu.dev.tool.xmlcomparator;

	/**
	* @author Ram
	*
	* To change this generated comment edit the template variable "typecomment":
	* Window>Preferences>Java>Templates.
	* To enable and disable the creation of type comments go to
	* Window>Preferences>Java>Code Generation.
	*/
	/**
	* @author ram
	*
	* This tool validates xml against DTD ... IE 6 does not do a good job
	*/
	import java.io.*;
	import java.io.File;
	import java.io.FileReader;
	import java.io.BufferedReader;


	// DOM imports
	import org.w3c.dom.Document;
	import org.w3c.dom.Element;
	import org.w3c.dom.Text;

	// Needed JAXP classes
	import javax.xml.parsers.DocumentBuilder;
	import javax.xml.parsers.DocumentBuilderFactory;

	// SAX2 imports
	import org.xml.sax.ErrorHandler;
	import org.xml.sax.InputSource;
	import org.xml.sax.SAXException;
	import org.xml.sax.SAXParseException;

	public class XMLValidator {
	public static void main(String[] args) {
	if (args.length == 0) {
	System.out.println("No files specified. Validation failed");
	return;
	}
	for (int i = 0; i < args.length; i++) {
	/Document doc =/ parse(args[i]);
	System.out.println("Processing file " + args[i]);
	}

	}
	/**
	* Utility method to translate a String filename to URL.
	*
	* Note: This method is not necessarily proven to get the
	* correct URL for every possible kind of filename; it should
	* be improved. It handles the most common cases that we've
	* encountered when running Conformance tests on Xalan.
	* Also note, this method does not handle other non-file:
	* flavors of URLs at all.
	*
	* If the name is null, return null.
	* If the name starts with a common URI scheme (namely the ones
	* found in the examples of RFC2396), then simply return the
	* name as-is (the assumption is that it's already a URL)
	* Otherwise we attempt (cheaply) to convert to a file:/// URL.
	*
	* @param String local path\filename of a file
	* @return a file:/// URL, the same string if it appears to
	* already be a URL, or null if error
	*/
	public static String filenameToURL(String filename) {
	// null begets null - something like the commutative property
	if (null == filename)
	return null;

	// Don't translate a string that already looks like a URL
	if (filename.startsWith("file:")
	\|\| filename.startsWith("http:")
	\|\| filename.startsWith("ftp:")
	\|\| filename.startsWith("gopher:")
	\|\| filename.startsWith("mailto:")
	\|\| filename.startsWith("news:")
	\|\| filename.startsWith("telnet:"))
	return filename;

	File f = new File(filename);
	String tmp = null;
	try {
	// This normally gives a better path
	tmp = f.getCanonicalPath();
	} catch (IOException ioe) {
	// But this can be used as a backup, for cases
	// where the file does not exist, etc.
	tmp = f.getAbsolutePath();
	}

	// URLs must explicitly use only forward slashes
	if (File.separatorChar == '\\') {
	tmp = tmp.replace('\\', '/');
	}
	// Note the presumption that it's a file reference
	// Ensure we have the correct number of slashes at the
	// start: we always want 3 /// if it's absolute
	// (which we should have forced above)
	if (tmp.startsWith("/"))
	return "file://" + tmp;
	else
	return "file:///" + tmp;

	}
	static Document parse(String filename) {
	// Force filerefs to be URI's if needed: note this is independent of any other files
	String docURI = filenameToURL(filename);
	return parse(new InputSource(docURI), filename);
	}

	static Document parse(InputSource docSrc, String filename) {

	DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
	// Always set namespaces on
	dfactory.setNamespaceAware(true);
	dfactory.setValidating(true);
	// Set other attributes here as needed
	//applyAttributes(dfactory, attributes);

	// Local class: cheap non-printing ErrorHandler
	// This is used to suppress validation warnings
	ErrorHandler nullHandler = new ErrorHandler() {
	public void warning(SAXParseException e) throws SAXException {
	System.err.println("Warning: " + e.getMessage());

	}
	public void error(SAXParseException e) throws SAXException {
	System.err.println("Element " +e.getPublicId() + " is not valid because "+ e.getMessage());
	System.err.println("Error: " +"at line "+e.getLineNumber()+", column "+e.getColumnNumber());
	}
	public void fatalError(SAXParseException e) throws SAXException {
	throw e;
	}
	};

	Document doc = null;
	try {
	// First, attempt to parse as XML (preferred)...
	DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
	docBuilder.setErrorHandler(nullHandler);
	//if(docBuilder.isValidating()){
	// System.out.println("The parser is a validating parser");
	//}
	doc = docBuilder.parse(docSrc);
	} catch (Throwable se) {
	// ... if we couldn't parse as XML, attempt parse as HTML...
	System.out.println("ERROR :" + se.toString());
	try {
	// @todo need to find an HTML to DOM parser we can use!!!
	// doc = someHTMLParser.parse(new InputSource(filename));
	throw new RuntimeException("XMLComparator no HTML parser!");
	} catch (Exception e) {
	if (filename != null) {
	// ... if we can't parse as HTML, then just parse the text
	try {

	// Parse as text, line by line
	// Since we already know it should be text, this should
	// work better than parsing by bytes.
	FileReader fr = new FileReader(filename);
	BufferedReader br = new BufferedReader(fr);
	StringBuffer buffer = new StringBuffer();
	for (;;) {
	String tmp = br.readLine();

	if (tmp == null) {
	break;
	}

	buffer.append(tmp);
	buffer.append("\n"); // Put in the newlines as well
	}

	DocumentBuilder docBuilder =
	dfactory.newDocumentBuilder();
	doc = docBuilder.newDocument();
	Element outElem = doc.createElement("out");
	Text textNode = doc.createTextNode(buffer.toString());

	// Note: will this always be a valid node? If we're parsing
	// in as text, will there ever be cases where the diff that's
	// done later on will fail becuase some really garbage-like
	// text has been put into a node?
	outElem.appendChild(textNode);
	doc.appendChild(outElem);
	} catch (Throwable throwable) {

	//throwable.printStackTrace();
	}
	}
	}
	}
	return doc;
	}
	}