Commit e22f8b89 authored by jhammen's avatar jhammen

define attribute tags, set MAX_ARTICLES to unlimited

parent 557abbfa
......@@ -23,10 +23,12 @@ import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import javax.xml.bind.JAXBException;
import javax.xml.stream.XMLOutputFactory;
......@@ -65,13 +67,14 @@ import de.fau.cs.osr.ptk.common.jxpath.AstNodePointerFactory;
public class Wiktionary2XML implements ExpansionCallback {
private static final int MAX_ARTICLES = 14095; // Integer.MAX_VALUE;
private static final int MAX_ARTICLES = Integer.MAX_VALUE;
private final WikiXMLReader wikiXMLReader;
private final WikiConfig wikiConfig;
private final WtEngineImpl engine;
private final Map<String, FullPage> templateMap = new HashMap<String, FullPage>();
private final List<String> pagePrefix2Ignore;
private final Set<String> attributeTags;
private XMLStreamWriter xmlWriter;
private String singleArticleTitle = null;
......@@ -89,6 +92,8 @@ public class Wiktionary2XML implements ExpansionCallback {
pagePrefix2Ignore.add("Index:");
pagePrefix2Ignore.add("Template:");
pagePrefix2Ignore.add("Wiktionary:");
attributeTags = new HashSet<String>();
attributeTags.add("root");
}
public String fileUrl(PageTitle pageTitle, int width, int height) {
......@@ -400,13 +405,15 @@ public class Wiktionary2XML implements ExpansionCallback {
}
else if (child instanceof WtXmlElement) {
WtXmlElement element = (WtXmlElement) child;
String name = element.getName();
String existing = attributeMap.get(name);
String tagContent = renderText(element.getBody());
if(existing == null) {
attributeMap.put(name, tagContent);
} else {
attributeMap.put(name, existing + "," + tagContent);
String name = element.getName();
if(attributeTags.contains(name)) {
String existing = attributeMap.get(name);
if(existing == null) {
attributeMap.put(name, tagContent);
} else {
attributeMap.put(name, existing + "," + tagContent);
}
}
ret.append(tagContent);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment