/**
* RDFaExtractorCore.java
*
* Created on 2006-11-12
*
* Distribution and Usage
* ======================
* This software may be used in terms of the CC Attribution 2.5 License
* as stated at http://creativecommons.org/licenses/by/2.5/
*
* Credits
* =======
* This software is built on top of other software which I gratefully acknowledge:
* Jena 2 (cf. http://jena.sourceforge.net/license.html for details) is used for RDF processing
* and JDOM (cf. JDOM Project http://www.jdom.org/ for details) is used for XML processing.
*
*/
package org.sw_app.sw;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import org.jdom.Attribute;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;
import org.jdom.Document;
import com.hp.hpl.jena.rdf.model.AnonId;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.RDFWriter;
import com.hp.hpl.jena.rdf.model.ReifiedStatement;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.util.FileManager;
import com.hp.hpl.jena.vocabulary.RDF;
/**
* RDFaExtractorCore
*
*
This class implements the RDFa syntax according to @link http://www.w3.org/2006/07/SWD/RDFa/syntax/ .
* It allows to process either a single or a list of (local or remote) XHTML document(s) that may contain(s) RDFa elements.
* The output of the processing is a RDF graph that contains all RDF triples that are implicit to the input XHTML.
*
* @author Michael Hausenblas
* @link http://creativecommons.org/licenses/by/2.5/
*/
public class RDFaExtractorCore {
/**
* Turns XHTML tree processing display ON/OFF:
*
*/
boolean doDumpXHTMLProcessing = false;
/**
* The URI of the input HTML document.
*/
URI inputDocURI = null;
/**
* The main RDF graph.
*/
Model rdfGraph = null;
/**
* Base URI for serializing.
*/
String baseURI = "";
/**
*
*/
HashMap nsMap = null;
/**
* The 6 RDFa elements due to are:
* about, property, rel, rev, href, content (datatpye only in conjunction with property)
*/
public static final String RDFa_about = "about";
public static final String RDFa_prop = "property";
public static final String RDFa_rel = "rel";
public static final String RDFa_rev = "rev";
public static final String RDFa_href = "href";
public static final String RDFa_content = "content";
public static final String RDFa_datatype = "datatype";
public static final String RDFa_element_meta = "meta";
public static final String RDFa_element_link = "link";
public static final String HTML_root = "html";
// OLD VERSION, looks at all RDFa elements
/*
public static final String RDFa_elementSelector = "//*[@" + RDFa_about + " | "+
"@" + RDFa_prop + " | "+
"@" + RDFa_rel + " | "+
"@" + RDFa_rev + " | "+
"@" + RDFa_href + " | "+
"@" + RDFa_content + "]";
*/
/**
* NEW version: only look at attributes that CAN trigger a triple.
* Due to these are: property, rel, rev
*/
public static final String RDFa_elementSelector = "//*[@" + RDFa_prop + " | "+
"@" + RDFa_rel + " | "+
"@" + RDFa_rev + "]";
public static final String XMLLiteral = "XMLLiteral";
/**
* Simple constructor.
*
*/
public RDFaExtractorCore(boolean doDumpXHTMLProcessing){
this.rdfGraph = ModelFactory.createDefaultModel();
this.baseURI = "";
this.nsMap = new HashMap();
this.doDumpXHTMLProcessing = doDumpXHTMLProcessing;
}
/**
* Reads an HTML input document (locally or remote via URL) and creates a JDOM representation out of it.
*
* @return
*/
private Document createHMTLDocument() {
SAXBuilder builder = new SAXBuilder();
Document doc = null;
URL inFileURL = null;
String inFileName = "";
String localPrefix = "file";
System.out.println("Parsing HTML file result : ");
try {
inFileURL = this.getInputDocument().toURL();
inFileName = new File(inFileURL.getFile()).getAbsolutePath();
if(inFileURL.getProtocol().startsWith(localPrefix)){// workaround due to JDOM bug; use local file path
doc = builder.build(inFileName);
System.out.println(inFileName + " seems to be well-formed.");
}
else { // use URL/HTTP access
doc = builder.build(inFileURL);
System.out.println(inFileURL.toExternalForm() + " seems to be well-formed.");
}
// TODO: xml:base URI handling
this.setBaseURI(inFileURL.toExternalForm()); // use the document URL as base URI
}
catch (MalformedURLException mue) {
System.out.println(inFileName + " seems NOT to be a well-formed URL.");
mue.printStackTrace();
}
catch (JDOMException je) {
System.out.println(inFileName + " seems NOT to be well-formed.");
System.out.println(je.getMessage());
}
catch (IOException ioe) {
System.out.println("Could not check " + inFileName);
System.out.println(" due to " + ioe.getMessage());
}
return doc;
}
/**
*
* @return
*/
public File serializeGraph(String fileName, String serFormat){
File tmpFile = new File(fileName);
RDFWriter w = this.rdfGraph.getWriter(serFormat);
if(serFormat.startsWith("RDF")){
w.setProperty("showXMLDeclaration","true");
w.setProperty("tab","1");
}
/*
if(serFormat.startsWith("N3")|| serFormat.equals("TURTLE")){
w.setProperty("abbrevBaseURI","false");
}
*/
try {
w.write(this.rdfGraph, new PrintWriter(tmpFile), this.getBaseURI());
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
return tmpFile;
}
/**
* Process a list of XHTML documents.
*
* @param docList
* @param outFormat
* @param doDumpDetails
*/
public void processMultiple(Vector docList, String outFormat, boolean doDumpDetails){
// preformance
long start = 0L;
long end = 0L;
int diff = 0;
for (Iterator iter = docList.iterator(); iter.hasNext();) {
URI docURI = (URI) iter.next();
this.setInputDocument(docURI);
start = System.currentTimeMillis();
if(this.process() != null){
end = System.currentTimeMillis();
System.out.println("\n===========================================================");
System.out.println("\nUsing BASE URI: " + this.getBaseURI());
System.out.println("\nResulting RDF graph : ");
System.out.println(this.getRDFGraphDump(outFormat));
}
else {
System.out.println("HTML input document not valid!");
}
diff = (int) (end - start);
System.out.println("Process time: " + diff + "ms");
}
}
/**
* Takes a HTML input document and processes it.
* Returns an RDF graph if any RDFa is present in the HTML input document.
*
* @param htmlInputFileURL The HTML input document URL
*/
public Model process() {
Document doc = null;
Namespace ns = null;
Element root = null;
List rdfaElementList = null;
XPath elementsWithAttributes = null;
if(this.getInputDocument() == null) return null; // no input document set
else {
doc = this.createHMTLDocument();
root = doc.getRootElement();
// handle global namespaces
if(this.doDumpXHTMLProcessing) System.out.println("\nResult of processing XHTML top level namespaces :");
//ns = root.getNamespace();
this.addNSofElements(doc);
this.rdfGraph.setNsPrefix("", this.getBaseURI()); // add the base URI of the document as the empty prefix
// process XHTML tree
if(this.doDumpXHTMLProcessing) System.out.println("\nResult of processing XHTML tree :");
try {
// select all XHTML elements that have relevant RDFa elements inside
elementsWithAttributes = XPath.newInstance(RDFaExtractorCore.RDFa_elementSelector);
rdfaElementList = elementsWithAttributes.selectNodes(doc);
if(rdfaElementList.isEmpty()){ // no RDFa elements found
if(this.doDumpXHTMLProcessing) System.out.println("Found no RDFa in the input HTML document.");
return null;
}
else{ // process all RDFa elements iterativly
Iterator itEwA = rdfaElementList.iterator();
while(itEwA.hasNext()) {
processElement((Element)itEwA.next());
}
}
}
catch (JDOMException e) {
e.printStackTrace();
}
}
return this.rdfGraph;
}
/**
* Processes a single XHTML element. Evaluates its attributes and add RDF triples according to
*
*
* @param e The current XHTML element being processed.
* @param num The number of the element being processed.
*/
private void processElement(Element e){
Property pProp = null;
Property pRel = null;
Property pRev = null;
Resource s = null;
RDFNode o = null;
String propURI = "";
String relURI = "";
String revURI = "";
String aboutURI = "";
String contentVal ="";
String contentType ="";
String hrefURI = "";
String lang = "";
boolean doReification = false;
if(this.doDumpXHTMLProcessing) System.out.println(" Processing element: " + e.getName());
// add additional, local namespaces
// this.addNSofElement(e); //DEPRECATED
// === FIRST STEP: evaluate the attributes to get the URIs and values ===
// I. Establishing the predicate, cf.
propURI = this.evaluateAttributeAtElement(e, RDFaExtractorCore.RDFa_prop); // try to extract predicate from @property
relURI = this.evaluateAttributeAtElement(e, RDFaExtractorCore.RDFa_rel); // try to extract predicate from @rel
revURI = this.evaluateAttributeAtElement(e, RDFaExtractorCore.RDFa_rev); // try to extract predicate from @content
// II. Establishing the subject/object, cf.
aboutURI = this.evaluateAttributeAtElement(e, RDFaExtractorCore.RDFa_about); // try to extract subject from @about
// handle xml:base
if(!aboutURI.equals("") && this.getXMLBaseFromElement(e) != null) aboutURI += this.getXMLBaseFromElement(e);
if(aboutURI.equals("")) { // the element itself has NO @about
// CASE and ; try to extract subject/object from context statement
// due to
if(this.isMetaOrLink(e)) {
Element parent = e.getParentElement();
// CASE parent is a and itself; cf.
if(this.isMetaOrLink(parent)) {
aboutURI = this.evaluateAttributeAtElement(parent, RDFaExtractorCore.RDFa_about); // try to extract subject/object from a parent
if(aboutURI.equals("")) { // parent does not have an @about
doReification = true;
}
//if(this.doDumpXHTMLProcessing) System.out.println(" NOTE: reification due to parent or element currently not supported !");
}
// CASE any other XHTML element; try to extract subject/object from context statement
else{
aboutURI = this.evaluateAttributeAtElement(parent, RDFaExtractorCore.RDFa_about); // try to extract subject/object from a parent
if(aboutURI.equals("")) { // parent does not have an @about
aboutURI = this.getXMLIDFromElement(parent); // try to extract subject/object from parent's @xml:id
if(aboutURI == null) { // parent has no xml:id
aboutURI = this.createBlankNodeID(parent); // use the parent element as a bNode
}
}
if(this.getXMLBaseFromElement(parent) != null){ // parent has a xml:base; use this instead of bNode (?)
aboutURI = this.getXMLBaseFromElement(parent) ;
}
}
}
// CASE any other XHTML element; try to extract subject/object from an ancestor
else{ // if current element has no @about, perform recursive to find the closest ancestor with an @about
boolean foundAbout = false;
Element parent = e.getParentElement();
while(!foundAbout){
if(parent.getName().equals(RDFaExtractorCore.HTML_root)){ // reached root; using the document itself as subject/object
foundAbout = true;
aboutURI = "";
}
else { // recursive ancestor lookup
aboutURI = this.evaluateAttributeAtElement(parent, RDFaExtractorCore.RDFa_about);
if(!aboutURI.equals("")) { // found an @about at an ancestor
foundAbout = true;
}
else{
parent = parent.getParentElement();
}
}
}
}
}
// III. Establishing the object/subject, cf.
// CASE literal object
if(!propURI.equals("")){ // we have a @property set, check for a literal value
// due to two cases have to be
// distinguished: without or with @datatype
contentType = this.evaluateAttributeAtElement(e, RDFaExtractorCore.RDFa_datatype); // try to determine @datatype
contentVal = this.evaluateAttributeAtElement(e, RDFaExtractorCore.RDFa_content); // try to extract literal object value from @content
lang = this.getXMLLanguageFromElement(e); // try to extract @xml:lang from element
if(contentType.equals("")) { // element has NO @datatype
if(contentVal.equals("")) { // element has NO @content
// TODO: make XMLLiteral handling spec conformant
//contentVal = this.createXMLLiteralFromElement(e); // try to extract object value from element's content as XMLLiteral
contentVal = e.getTextTrim();
contentType = RDFaExtractorCore.XMLLiteral;
}
else {
// plain literal; use contentVal as extracted above
}
}
else{ // element has a @datatype
if(contentVal.equals("")) { // element has NO @content
contentVal = e.getTextNormalize(); // try to extract object value from element's content
}
else {
// plain literal; use contentVal as extracted above
}
}
}
// CASE URI ref object/subject
if(!relURI.equals("") || !revURI.equals("") ){ // we have a @rel/@rev set, check for a object/subject resource (URI ref)
if(!this.isMetaOrLink(e)) {
hrefURI = this.evaluateAttributeAtElement(e, RDFaExtractorCore.RDFa_href); // try to extract object/subject value from @href
}
else{
Element parent = e.getParentElement();
// CASE parent is a and itself; cf.
if(this.isMetaOrLink(parent)) {
hrefURI = this.evaluateAttributeAtElement(parent, RDFaExtractorCore.RDFa_href); // try to extract object/subject value from @href of parent
}
}
}
// === SECOND STEP: update the RDF graph according to the found S, P, O ===
if(this.doDumpXHTMLProcessing) System.out.print(" Adding TRIPLE: ");
// CASE @property
if(!propURI.equals("")) {
s = this.createResourceFromURIRef(aboutURI); // create the subject
pProp = this.rdfGraph.createProperty(propURI); // create the predicate
// create the object using a literal
if(lang == null){ // create simple literal without @xml:lang
o = this.rdfGraph.createLiteral(contentVal);
}
else{ // create a literal with @xml:lang
o = this.rdfGraph.createLiteral(contentVal, lang);
}
if(!contentType.equals("")){ // a typed literal object
if(contentType.equals(RDFaExtractorCore.XMLLiteral)) { // we have an XMLLiteral
o = this.rdfGraph.createLiteral(contentVal, true);
}
else{ // a type was specified; use it!
if(contentType.equals("plaintext")) { // @datatype is plaintext
o = this.rdfGraph.createLiteral(contentVal); // create simple literal without datatype
}
else { // @datatype is an XSD (?)
o = this.rdfGraph.createTypedLiteral(contentVal, contentType); // // create typed literal
}
}
}
if(doReification){// a link/meta element inside a link/meta element
this.addReifiedTriple(s, pProp, o);
}
else {
this.addTriple(s, pProp, o);
}
}
// CASE @rel
if(!relURI.equals("")){
s = this.createResourceFromURIRef(aboutURI); // create the subject using a resource (URI ref)
pRel = this.rdfGraph.createProperty(relURI); // create the predicate
o = this.createResourceFromURIRef(hrefURI); // create the object using a resource (URI ref)
if(doReification){// a link/meta element inside a link/meta element
this.addReifiedTriple(s, pRel, o);
}
else{
this.addTriple(s, pRel, o);
}
}
// CASE @rev
if(!revURI.equals("")) {
s = this.createResourceFromURIRef(hrefURI); // create the subject using a resource (URI ref)
pRev = this.rdfGraph.createProperty(revURI);
o = this.createResourceFromURIRef(aboutURI); // create the object using a resource (URI ref)
if(doReification){// a link/meta element inside a link/meta element
this.addReifiedTriple(s, pRev, o);
}
else{
this.addTriple(s, pRev, o);
}
}
}
private Resource createResourceFromURIRef(String aURIREf){
if(aURIREf.startsWith("_")){ // URI ref is a bNode
AnonId anonID = new AnonId(aURIREf.substring(1)); // create anonymous ID
return this.rdfGraph.createResource(anonID);
}
else { // URI ref is a URI indeed
return this.rdfGraph.createResource(aURIREf);
}
}
private String createXMLLiteralFromElement(Element e) {
StringBuffer tmp = new StringBuffer();
//List contentList = e.getContent();
XMLOutputter xmlOutputter = new XMLOutputter();
List children = e.getChildren();
Iterator iterator = children.iterator();
while (iterator.hasNext()) {
Element child = (Element) iterator.next();
tmp.append(xmlOutputter.outputString(child));
}
return tmp.toString();
}
private String createBlankNodeID(Element parent) {
return "_" + parent.getName() + System.currentTimeMillis(); //creates random bNode ID
}
private void addTriple(Resource s, Property p, RDFNode o){
Statement triple = this.rdfGraph.createStatement(s, p, o);
this.rdfGraph.add(triple);
if(this.doDumpXHTMLProcessing) System.out.println(" Added triple: " + triple.toString());
}
private void addReifiedTriple(Resource s, Property p, RDFNode o){
Statement triple = this.rdfGraph.createStatement(s, p, o);
ReifiedStatement rTriple = this.rdfGraph.createReifiedStatement(triple);
this.rdfGraph.add(rTriple.getStatement());
if(this.doDumpXHTMLProcessing) System.out.println(" Added triple: " + triple.toString());
}
private String getXMLIDFromElement(Element e){
String tmpID = e.getAttributeValue("id", Namespace.XML_NAMESPACE); // try to extract the @xml:id of element (with namespace)
if(tmpID == null) tmpID = e.getAttributeValue("id"); // try to extract the @xml:id of element (without namespace)
return tmpID;
}
private String getXMLLanguageFromElement(Element e){
String tmpLang = e.getAttributeValue("lang", Namespace.XML_NAMESPACE); // try to extract the @xml:lang of element (with namespace)
if(tmpLang == null ) tmpLang = e.getAttributeValue("lang"); // try to extract the @xml:lang of element (without namespace)
return tmpLang;
}
private String getXMLBaseFromElement(Element e){
return e.getAttributeValue("base", Namespace.XML_NAMESPACE); // try to extract the xml:base of element (with namespace)
}
private String evaluateAttributeAtElement(Element e, String aName) {
Attribute tmpAttribute = e.getAttribute(aName);
String tmpURI = "";
if(tmpAttribute != null) { // element has an attribute with the specified name
if(this.isCURIE(tmpAttribute)) { // the attribute contains a CURIE
tmpURI = this.normaliseCURIE(tmpAttribute);
}
else{ // the attribute contains a URI
tmpURI = this.normaliseQName(tmpAttribute);
}
if(this.doDumpXHTMLProcessing) System.out.println(" @" + aName + "=" + tmpURI);
}
return tmpURI;
}
private boolean isMetaOrLink(Element e){
if(e.getName().equals(RDFaExtractorCore.RDFa_element_meta) || e.getName().equals(RDFaExtractorCore.RDFa_element_link)) return true;
else return false;
}
private boolean isCURIE(Attribute a){
if(a.getValue().startsWith("[")) return true;
else return false;
}
private void addNSofElements(Document doc){
XPath elements = null;
try {
elements = XPath.newInstance("//*");
List elementsList = elements.selectNodes(doc);
if(!elementsList.isEmpty()){ // no RDFa elements found
Iterator itE = elementsList.iterator();
while(itE.hasNext()) {
this.addNSofSingleElement((Element)itE.next());
}
}
}
catch (JDOMException e) {
e.printStackTrace();
}
}
/**
* Generates NS to URI mappings on an element (and put it into the global map).
*
* @param e
*/
private void addNSofSingleElement(Element e) {
List nsList = e.getAdditionalNamespaces();
Iterator itNS = nsList.iterator();
if(!nsList.isEmpty()){
while(itNS.hasNext()) {
Namespace current = (Namespace)itNS.next();
String nsURI = current.getURI();
String nsPrefix = current.getPrefix();
this.nsMap.put(nsPrefix, nsURI); // add to global NS map
this.rdfGraph.setNsPrefix(nsPrefix, nsURI); // add to RDF graph
if(this.doDumpXHTMLProcessing) System.out.println(" added NS: " + nsPrefix + " = "+ nsURI);
}
}
}
/**
* Convert CURIEs to normal form.
*
* @param currentA
* @return
*/
private String normaliseCURIE(Attribute currentA) {
String aVal = currentA.getValue();
String nURI = "";
StringBuffer tmp = new StringBuffer();
String tmpNS = "";
if(aVal.startsWith("[")){ // this is a CURIE: [NS:LOCALPART]
tmpNS = aVal.substring(1, aVal.indexOf(":"));
if(tmpNS.startsWith("_")){ // bNode
tmp.append(tmpNS); // add bNode reference
}
else{ // try to look up namespace
tmp.append(this.nsMap.get(tmpNS));
}
tmp.append(aVal.substring(aVal.indexOf(":") + 1, aVal.indexOf("]"))); // append local part
nURI = tmp.toString();
}
else { // no CURIE
if (aVal.startsWith("#")){ // a fragment - append base URI
nURI = this.getBaseURI() + aVal;
}
else nURI = aVal; // a URI already
}
return nURI;
}
private String normaliseQName(Attribute currentA){
String aVal = currentA.getValue();
StringBuffer tmp = new StringBuffer();
if(!aVal.startsWith("_")){ // a URI
if(aVal.indexOf("/") == 0){ // a QName to expand
tmp.append(this.nsMap.get(aVal.substring(0, aVal.indexOf(":"))));
tmp.append(aVal.substring(aVal.indexOf(":")+1));
}
else{ // a URI already
tmp.append(aVal);
}
}
else { // a bNode
tmp.append(aVal.substring(2));
}
return tmp.toString();
}
private void setBaseURI(String aBaseURI) {
this.baseURI = aBaseURI;
}
public String getBaseURI() {
return this.baseURI;
}
public void setInputDocument(URI inputDocumentURI) {
this.inputDocURI = inputDocumentURI;
if(!this.rdfGraph.isEmpty()) this.rdfGraph.removeAll();
if(!this.nsMap.isEmpty()) this.nsMap.clear();
}
public URI getInputDocument() {
return this.inputDocURI;
}
public Model getRDFGraph(){
if (this.rdfGraph.isEmpty()) return null;
else return this.rdfGraph;
}
public String getRDFGraphDump(String outFormat){
StringWriter sw = new StringWriter();
rdfGraph.write(sw, outFormat);
return sw.toString();
}
/**
* @param args
*/
public static void main(String[] args) {
RDFaExtractorCore rdfx = new RDFaExtractorCore(false);
// web HTML input test files
String webBaseDir = "http://dev.torrez.us/public/2006/rdfa/tests/";
//String webInputFileName = "1.htm"; // mixed testcase; not so sure if correct (@role)?
//String webInputFileName = "2.htm"; // property, about, element-content :: PASSED
//String webInputFileName = "3.htm"; // container, property, about, element-content :: PASSED
//String webInputFileName = "4.htm"; // bNode container, property, about, element-content :: PASSED
//String webInputFileName = "5.htm"; // xml:base container, property + rel :: PASSED
//String webInputFileName = "6.htm"; // CURIE with namespace :: PASSED
//String webInputFileName = "7.htm"; // rel, and rev :: PASSED
//String webInputFileName = "8.htm"; // rel, rev, and property + about, and content :: PASSED
//String webInputFileName = "9.htm"; // xml:base container, rdf:type, href, etc. :: PASSED
//String webInputFileName = "10.htm"; // CURIE, empty about :: PASSED
//String webInputFileName = "11.htm"; // rel, and about with mailto: (no namespace) :: PASSED
//String webInputFileName = "12.htm"; // rel+rev, and about with mailto: (no namespace) :: PASSED
//String webInputFileName = "13.htm"; // XMLLiteral :: children content handling OPEN ISSUE
//String webInputFileName = "14.htm"; // xml:lang without @datatype directly at element :: PASSED
//String webInputFileName = "15.htm"; // xml:lang at ancestor :: OPEN ISSUE
//String webInputFileName = "16.htm"; // datatype=xsd:int :: PASSED
//String webInputFileName = "17.htm"; // link, meta, bNode (with rel and property) :: PASSED
//String webInputFileName = "18.htm"; // datatype=plaintext :: PASSED
//String webInputFileName = "19.htm"; // bNode references (output same as 17) :: PASSED
//String webInputFileName = "20.htm"; // bNode references S/P mutual :: PASSED
//String webInputFileName = "21.htm"; // rel with global about :: PASSED
//String webInputFileName = "22.htm"; // mixed with CURIE, URI refs, etc. :: OPEN ISSUE
//String webInputFileName = "23.htm"; // meta in link (reification) :: OPEN ISSUE
// local HTML input test files
String baseDir = "file://C:/project/MMSEM-XG/sandbox/RDFaXtractor/workspace/RDFaX/testcases/";
//String inputFileName = "benadida.html";
// (local) RDF output file
//String rdfOutputFileName = "C:/project/MMSEM-XG/sandbox/RDFaXtractor/RDFa/testcases/out/" + webInputFileName + ".rdf";
//String serFormat = "RDF/XML";
// the list of testcases (input document URIs)
Vector inputDocURIList = new Vector();
int startDocNum = 23;
int endDocNum = 23;
try {
for (int i = startDocNum; i < endDocNum +1 ; i++) {
inputDocURIList.add(new URI(baseDir + i + ".htm"));
}
rdfx.processMultiple(inputDocURIList, "N-TRIPLE", true);
}
catch (URISyntaxException e) {
e.printStackTrace();
}
}
}