Révision 2787
| tmp/org.txm.core/src/java/org/txm/scripts/importer/SAttributesListener.groovy (revision 2787) | ||
|---|---|---|
| 1 |
package org.txm.scripts.importer |
|
| 2 |
|
|
| 3 |
import java.util.ArrayList; |
|
| 4 |
import java.util.HashMap; |
|
| 5 |
|
|
| 6 |
import javax.xml.stream.*; |
|
| 7 |
|
|
| 8 |
import org.txm.utils.io.IOUtils |
|
| 9 |
|
|
| 10 |
/** |
|
| 11 |
* Read an XML file and find out : XML elements, their attributes and recursivity level |
|
| 12 |
* names are lowercases |
|
| 13 |
* |
|
| 14 |
* @author mdecorde |
|
| 15 |
* |
|
| 16 |
*/ |
|
| 17 |
class SAttributesListener {
|
|
| 18 |
|
|
| 19 |
private static HashMap<String,ArrayList<String>> structs = new HashMap<String, ArrayList<String>>(); |
|
| 20 |
private static HashMap<String, Integer> structsCountProf = new HashMap<String, Integer>(); |
|
| 21 |
private static HashMap<String, Integer> structsMaxProf = new HashMap<String, Integer>(); |
|
| 22 |
private static String structPath = "/"; |
|
| 23 |
private XMLStreamReader parser; |
|
| 24 |
|
|
| 25 |
SAttributesListener() {
|
|
| 26 |
structs = new HashMap<String, ArrayList<String>>(); |
|
| 27 |
structsCountProf = new HashMap<String, Integer>(); |
|
| 28 |
structsMaxProf = new HashMap<String, Integer>(); |
|
| 29 |
structPath = "/"; |
|
| 30 |
} |
|
| 31 |
|
|
| 32 |
SAttributesListener(XMLStreamReader parser) {
|
|
| 33 |
this(); |
|
| 34 |
this.parser = parser; |
|
| 35 |
} |
|
| 36 |
|
|
| 37 |
public void appendResultsTo(SAttributesListener another) {
|
|
| 38 |
structs = another.structs; |
|
| 39 |
structsCountProf = another.structsCountProf; |
|
| 40 |
structsMaxProf = another.structsMaxProf; |
|
| 41 |
} |
|
| 42 |
|
|
| 43 |
public void start(def parser) {
|
|
| 44 |
this.parser = parser; |
|
| 45 |
} |
|
| 46 |
|
|
| 47 |
def W = "w"; |
|
| 48 |
def ANA = "ana"; |
|
| 49 |
def FORM = "form"; |
|
| 50 |
/** |
|
| 51 |
* Call this method for each START_ELEMENT stax event |
|
| 52 |
* @param localname the element localname |
|
| 53 |
*/ |
|
| 54 |
public void startElement(String localname) {
|
|
| 55 |
localname = localname.toLowerCase(); |
|
| 56 |
|
|
| 57 |
//String localname = parser.getLocalName(); |
|
| 58 |
if(localname.equals(W)) return; |
|
| 59 |
if(localname.equals(ANA)) return; |
|
| 60 |
if(localname.equals(FORM)) return; |
|
| 61 |
|
|
| 62 |
structPath += localname+"/" |
|
| 63 |
//println "add: "+structPath |
|
| 64 |
def attrs = structs.get(localname) |
|
| 65 |
if (!structs.containsKey(localname)) {
|
|
| 66 |
attrs = new HashSet(); |
|
| 67 |
structs.put(localname, attrs); |
|
| 68 |
structsCountProf.put(localname, 0) |
|
| 69 |
structsMaxProf.put(localname, 0) |
|
| 70 |
} //else {
|
|
| 71 |
|
|
| 72 |
// get structure recursion |
|
| 73 |
int prof = structsCountProf.get(localname)+1 |
|
| 74 |
structsCountProf.put(localname, prof) |
|
| 75 |
if (structsMaxProf.get(localname) < prof ) |
|
| 76 |
structsMaxProf.put(localname, prof) |
|
| 77 |
|
|
| 78 |
// get the structure attributes |
|
| 79 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 80 |
attrs << parser.getAttributeLocalName(i).toLowerCase(); |
|
| 81 |
} |
|
| 82 |
} |
|
| 83 |
|
|
| 84 |
/** |
|
| 85 |
* Call this method for each END_ELEMENT stax event |
|
| 86 |
* @param localname the element localname |
|
| 87 |
*/ |
|
| 88 |
public void endElement(String localname) {
|
|
| 89 |
localname = localname.toLowerCase(); |
|
| 90 |
//String localname = parser.getLocalName(); |
|
| 91 |
if(localname.equals(W)) return; |
|
| 92 |
if(localname.equals(ANA)) return; |
|
| 93 |
if(localname.equals(FORM)) return; |
|
| 94 |
|
|
| 95 |
if (structPath.length() > 1) {
|
|
| 96 |
int idx = structPath.lastIndexOf("/");
|
|
| 97 |
if (idx > 0) {
|
|
| 98 |
structPath = structPath.substring(0, idx) |
|
| 99 |
//println "end of $localname "+(structsCountProf.get(localname)) |
|
| 100 |
//if (structsCountProf.get(localname) != null) |
|
| 101 |
structsCountProf.put(localname, structsCountProf.get(localname)-1) |
|
| 102 |
} |
|
| 103 |
//println "pop: "+structPath |
|
| 104 |
} |
|
| 105 |
} |
|
| 106 |
|
|
| 107 |
boolean firstGetStructs = true; |
|
| 108 |
public HashMap<String,ArrayList<String>> getStructs() {
|
|
| 109 |
if (structsCountProf.get("div") > 0)
|
|
| 110 |
structs.remove("div1")
|
|
| 111 |
if (structsCountProf.get("div") > 1)
|
|
| 112 |
structs.remove("div2")
|
|
| 113 |
if (structsCountProf.get("div") > 2)
|
|
| 114 |
structs.remove("div3")
|
|
| 115 |
if (structsCountProf.get("div") > 3)
|
|
| 116 |
structs.remove("div4")
|
|
| 117 |
if (structsCountProf.get("div") > 4)
|
|
| 118 |
structs.remove("div5")
|
|
| 119 |
if (structsCountProf.get("div") > 5)
|
|
| 120 |
structs.remove("div6")
|
|
| 121 |
|
|
| 122 |
if (firstGetStructs) {
|
|
| 123 |
firstGetStructs = false; |
|
| 124 |
def keys = [] |
|
| 125 |
keys.addAll(structs.keySet()); |
|
| 126 |
for( String key : keys) {
|
|
| 127 |
def value = structs.get(key); |
|
| 128 |
structs.remove(key) |
|
| 129 |
structs.put(key.toLowerCase(), value); |
|
| 130 |
} |
|
| 131 |
} |
|
| 132 |
|
|
| 133 |
return structs; |
|
| 134 |
} |
|
| 135 |
|
|
| 136 |
boolean firstGetstructsCountProf = true; |
|
| 137 |
public HashMap<String, Integer> getProfs() {
|
|
| 138 |
|
|
| 139 |
// if (firstGetstructsCountProf) {
|
|
| 140 |
// firstGetstructsCountProf = false; |
|
| 141 |
// def keys = [] |
|
| 142 |
// keys.addAll(structsCountProf.keySet()); |
|
| 143 |
// for( String key : keys) {
|
|
| 144 |
// def value = structsCountProf.get(key); |
|
| 145 |
// structsCountProf.remove(key) |
|
| 146 |
// structsCountProf.put(key.toLowerCase(), value); |
|
| 147 |
// } |
|
| 148 |
// } |
|
| 149 |
HashMap<String, Integer> clone = new HashMap<String, Integer>(); |
|
| 150 |
for (String key : structsMaxProf.keySet()) {
|
|
| 151 |
if (structsMaxProf.get(key) > 0) |
|
| 152 |
clone.put(key, structsMaxProf.get(key)-1) |
|
| 153 |
else |
|
| 154 |
clone.put(key, 0) |
|
| 155 |
} |
|
| 156 |
return clone; |
|
| 157 |
} |
|
| 158 |
|
|
| 159 |
public static SAttributesListener scanFile(File xmlFile) {
|
|
| 160 |
return scanFile(xmlFile, null) |
|
| 161 |
} |
|
| 162 |
|
|
| 163 |
public void setParser(def parser) {
|
|
| 164 |
this.parser = parser; |
|
| 165 |
} |
|
| 166 |
|
|
| 167 |
/** |
|
| 168 |
* Merge results in the parentListener |
|
| 169 |
* |
|
| 170 |
* @param xmlFile |
|
| 171 |
* @param parentListener results are appended to the parentListener if any |
|
| 172 |
* @return |
|
| 173 |
*/ |
|
| 174 |
public static SAttributesListener scanFile(File xmlFile, SAttributesListener parentListener) {
|
|
| 175 |
|
|
| 176 |
def start = false; |
|
| 177 |
def inputData = xmlFile.toURI().toURL().openStream(); |
|
| 178 |
def factory = XMLInputFactory.newInstance(); |
|
| 179 |
def parser = factory.createXMLStreamReader(inputData); |
|
| 180 |
|
|
| 181 |
SAttributesListener listener; |
|
| 182 |
if (parentListener != null) {
|
|
| 183 |
listener = parentListener; |
|
| 184 |
listener.setParser(parser) |
|
| 185 |
} else {
|
|
| 186 |
listener = new SAttributesListener(parser); |
|
| 187 |
} |
|
| 188 |
String TEXT = "text"; |
|
| 189 |
//HashSet<String> types = new HashSet<String>(); |
|
| 190 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
|
| 191 |
if (event == XMLStreamConstants.START_ELEMENT) { // start elem
|
|
| 192 |
if (TEXT.equals(parser.getLocalName())) start = true; |
|
| 193 |
if (start) listener.startElement(parser.getLocalName()) |
|
| 194 |
} else if (event == XMLStreamConstants.END_ELEMENT) { // end elem
|
|
| 195 |
if (start) listener.endElement(parser.getLocalName()) |
|
| 196 |
if (TEXT.equals(parser.getLocalName())) start = false; |
|
| 197 |
} |
|
| 198 |
} |
|
| 199 |
if (parser != null) parser.close(); |
|
| 200 |
if (inputData != null) inputData.close(); |
|
| 201 |
|
|
| 202 |
return listener |
|
| 203 |
} |
|
| 204 |
|
|
| 205 |
public static SAttributesListener scanFiles(File xmlDirectory, String wordTag) {
|
|
| 206 |
SAttributesListener listener = new SAttributesListener() |
|
| 207 |
listener.W = wordTag |
|
| 208 |
for (File xmlFile : xmlDirectory.listFiles(IOUtils.HIDDENFILE_FILTER)) {
|
|
| 209 |
if (xmlFile.isFile() && !xmlFile.isHidden() && xmlFile.getName().toLowerCase().endsWith(".xml")) {
|
|
| 210 |
scanFile(xmlFile, listener); // results saved in 'listener' data |
|
| 211 |
// println "LISTENER RESULT with ${xmlFile.getName()}: "+listener
|
|
| 212 |
// println " prof: "+listener.getStructs() |
|
| 213 |
// println " prof: "+listener.getProfs() |
|
| 214 |
// println " path: "+listener.structPath |
|
| 215 |
} |
|
| 216 |
} |
|
| 217 |
|
|
| 218 |
return listener; |
|
| 219 |
} |
|
| 220 |
} |
|
| tmp/org.txm.core/src/java/org/txm/core/results/TXMResult.java (revision 2787) | ||
|---|---|---|
| 1947 | 1947 |
* Gets a message indicating the start of computing. |
| 1948 | 1948 |
* Dedicated to indicate the start of computing and, for example, the parameters used for the computing. |
| 1949 | 1949 |
* |
| 1950 |
* Warning: at this point we don't know if all parameters are set -> you should avoid NPE |
|
| 1951 |
* |
|
| 1950 | 1952 |
* @return a message indicating the start of computing |
| 1951 | 1953 |
*/ |
| 1952 |
// FIXME: SJ: this method must become abstract in TXM0.8.1 and implemented by subclasses |
|
| 1954 |
// FIXME: SJ: this method must become abstract in TXM 0.8.1 and implemented by subclasses
|
|
| 1953 | 1955 |
public String getComputingStartMessage() {
|
| 1954 | 1956 |
return NLS.bind("Computing {0}...", this.getClass().getSimpleName());
|
| 1955 | 1957 |
} |
| tmp/org.txm.core/src/java/org/txm/importer/SAttributesListener.java (revision 2787) | ||
|---|---|---|
| 1 |
package org.txm.importer; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.io.IOException; |
|
| 5 |
import java.io.InputStream; |
|
| 6 |
import java.net.MalformedURLException; |
|
| 7 |
import java.util.ArrayList; |
|
| 8 |
import java.util.HashMap; |
|
| 9 |
import java.util.HashSet; |
|
| 10 |
|
|
| 11 |
import javax.xml.stream.*; |
|
| 12 |
|
|
| 13 |
import org.txm.utils.io.IOUtils; |
|
| 14 |
|
|
| 15 |
/** |
|
| 16 |
* Read an XML file and find out : XML elements, their attributes and recursivity level |
|
| 17 |
* names are lowercases |
|
| 18 |
* |
|
| 19 |
* @author mdecorde |
|
| 20 |
* |
|
| 21 |
*/ |
|
| 22 |
public class SAttributesListener {
|
|
| 23 |
|
|
| 24 |
public HashMap<String,HashSet<String>> structs = new HashMap<String, HashSet<String>>(); |
|
| 25 |
public HashSet<String> anatypes = new HashSet<String>(); |
|
| 26 |
public HashMap<String, Integer> structsCountProf = new HashMap<String, Integer>(); |
|
| 27 |
public HashMap<String, Integer> structsMaxProf = new HashMap<String, Integer>(); |
|
| 28 |
private String structPath = "/"; |
|
| 29 |
private XMLStreamReader parser; |
|
| 30 |
|
|
| 31 |
SAttributesListener() {
|
|
| 32 |
structs = new HashMap<String, HashSet<String>>(); |
|
| 33 |
structsCountProf = new HashMap<String, Integer>(); |
|
| 34 |
structsMaxProf = new HashMap<String, Integer>(); |
|
| 35 |
structPath = "/"; |
|
| 36 |
anatypes = new HashSet<String>(); // store scanned word attributes |
|
| 37 |
} |
|
| 38 |
|
|
| 39 |
SAttributesListener(XMLStreamReader parser) {
|
|
| 40 |
this(); |
|
| 41 |
this.parser = parser; |
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
public void appendResultsTo(SAttributesListener another) {
|
|
| 45 |
structs = another.structs; |
|
| 46 |
structsCountProf = another.structsCountProf; |
|
| 47 |
structsMaxProf = another.structsMaxProf; |
|
| 48 |
anatypes = another.anatypes; // store scanned word attributes |
|
| 49 |
} |
|
| 50 |
|
|
| 51 |
public void start(XMLStreamReader parser) {
|
|
| 52 |
this.parser = parser; |
|
| 53 |
} |
|
| 54 |
|
|
| 55 |
String W = "w"; |
|
| 56 |
String ANA = "ana"; |
|
| 57 |
String FORM = "form"; |
|
| 58 |
/** |
|
| 59 |
* Call this method for each START_ELEMENT stax event |
|
| 60 |
* @param localname the element localname |
|
| 61 |
*/ |
|
| 62 |
public void startElement(String localname) {
|
|
| 63 |
localname = localname.toLowerCase(); |
|
| 64 |
|
|
| 65 |
//String localname = parser.getLocalName(); |
|
| 66 |
if(localname.equals(W)) return; |
|
| 67 |
if(localname.equals(ANA)) return; |
|
| 68 |
if(localname.equals(FORM)) return; |
|
| 69 |
|
|
| 70 |
structPath += localname+"/"; |
|
| 71 |
//println "add: "+structPath |
|
| 72 |
HashSet<String> attrs = structs.get(localname); |
|
| 73 |
if (!structs.containsKey(localname)) {
|
|
| 74 |
attrs = new HashSet<String>(); |
|
| 75 |
structs.put(localname, attrs); |
|
| 76 |
structsCountProf.put(localname, 0); |
|
| 77 |
structsMaxProf.put(localname, 0); |
|
| 78 |
} //else {
|
|
| 79 |
|
|
| 80 |
// get structure recursion |
|
| 81 |
int prof = structsCountProf.get(localname)+1; |
|
| 82 |
structsCountProf.put(localname, prof); |
|
| 83 |
if (structsMaxProf.get(localname) < prof) {
|
|
| 84 |
structsMaxProf.put(localname, prof); |
|
| 85 |
} |
|
| 86 |
|
|
| 87 |
// get the structure attributes |
|
| 88 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 89 |
attrs.add(parser.getAttributeLocalName(i).toLowerCase()); |
|
| 90 |
} |
|
| 91 |
} |
|
| 92 |
|
|
| 93 |
/** |
|
| 94 |
* Call this method for each END_ELEMENT stax event |
|
| 95 |
* @param localname the element localname |
|
| 96 |
*/ |
|
| 97 |
public void endElement(String localname) {
|
|
| 98 |
localname = localname.toLowerCase(); |
|
| 99 |
//String localname = parser.getLocalName(); |
|
| 100 |
if(localname.equals(W)) return; |
|
| 101 |
if(localname.equals(ANA)) return; |
|
| 102 |
if(localname.equals(FORM)) return; |
|
| 103 |
|
|
| 104 |
if (structPath.length() > 1) {
|
|
| 105 |
int idx = structPath.lastIndexOf("/");
|
|
| 106 |
if (idx > 0) {
|
|
| 107 |
structPath = structPath.substring(0, idx); |
|
| 108 |
//println "end of $localname "+(structsCountProf.get(localname)) |
|
| 109 |
//if (structsCountProf.get(localname) != null) |
|
| 110 |
structsCountProf.put(localname, structsCountProf.get(localname)-1); |
|
| 111 |
} |
|
| 112 |
//println "pop: "+structPath |
|
| 113 |
} |
|
| 114 |
} |
|
| 115 |
|
|
| 116 |
// boolean firstGetStructs = true; |
|
| 117 |
public HashMap<String,HashSet<String>> getStructs() {
|
|
| 118 |
if (structsCountProf.containsKey("div")) {
|
|
| 119 |
if (structsCountProf.get("div") > 0)
|
|
| 120 |
structs.remove("div1");
|
|
| 121 |
if (structsCountProf.get("div") > 1)
|
|
| 122 |
structs.remove("div2");
|
|
| 123 |
if (structsCountProf.get("div") > 2)
|
|
| 124 |
structs.remove("div3");
|
|
| 125 |
if (structsCountProf.get("div") > 3)
|
|
| 126 |
structs.remove("div4");
|
|
| 127 |
if (structsCountProf.get("div") > 4)
|
|
| 128 |
structs.remove("div5");
|
|
| 129 |
if (structsCountProf.get("div") > 5)
|
|
| 130 |
structs.remove("div6");
|
|
| 131 |
} |
|
| 132 |
// if (firstGetStructs) {
|
|
| 133 |
// firstGetStructs = false; |
|
| 134 |
// fix min&maj names for CQP |
|
| 135 |
ArrayList<String> keys = new ArrayList<String>(); |
|
| 136 |
keys.addAll(structs.keySet()); |
|
| 137 |
for (String key : keys) {
|
|
| 138 |
HashSet<String> value = structs.get(key); |
|
| 139 |
structs.remove(key); |
|
| 140 |
structs.put(key.toLowerCase(), value); |
|
| 141 |
} |
|
| 142 |
// } |
|
| 143 |
|
|
| 144 |
return structs; |
|
| 145 |
} |
|
| 146 |
|
|
| 147 |
boolean firstGetstructsCountProf = true; |
|
| 148 |
public HashMap<String, Integer> getProfs() {
|
|
| 149 |
|
|
| 150 |
// if (firstGetstructsCountProf) {
|
|
| 151 |
// firstGetstructsCountProf = false; |
|
| 152 |
// def keys = [] |
|
| 153 |
// keys.addAll(structsCountProf.keySet()); |
|
| 154 |
// for( String key : keys) {
|
|
| 155 |
// def value = structsCountProf.get(key); |
|
| 156 |
// structsCountProf.remove(key) |
|
| 157 |
// structsCountProf.put(key.toLowerCase(), value); |
|
| 158 |
// } |
|
| 159 |
// } |
|
| 160 |
HashMap<String, Integer> clone = new HashMap<String, Integer>(); |
|
| 161 |
for (String key : structsMaxProf.keySet()) {
|
|
| 162 |
if (structsMaxProf.get(key) > 0) |
|
| 163 |
clone.put(key, structsMaxProf.get(key)-1); |
|
| 164 |
else |
|
| 165 |
clone.put(key, 0); |
|
| 166 |
} |
|
| 167 |
return clone; |
|
| 168 |
} |
|
| 169 |
|
|
| 170 |
public void initialize(ArrayList<String> pattributes, HashMap<String, HashSet<String>> sAttributesMap, HashMap<String, Integer> sAttributesProfs) {
|
|
| 171 |
this.anatypes.addAll(pattributes); |
|
| 172 |
for (String s : sAttributesMap.keySet()) {
|
|
| 173 |
this.structsMaxProf.put(s, sAttributesProfs.get(s)); |
|
| 174 |
this.structsCountProf.put(s, 0); |
|
| 175 |
this.structs.put(s, sAttributesMap.get(s)); |
|
| 176 |
} |
|
| 177 |
} |
|
| 178 |
|
|
| 179 |
public HashSet<String> getAnatypes() {
|
|
| 180 |
return anatypes; |
|
| 181 |
} |
|
| 182 |
|
|
| 183 |
// public SAttributesListener scanFile(File xmlFile) throws MalformedURLException, IOException, XMLStreamException {
|
|
| 184 |
// return scanFile(xmlFile, this); |
|
| 185 |
// } |
|
| 186 |
|
|
| 187 |
public void setParser(XMLStreamReader parser) {
|
|
| 188 |
this.parser = parser; |
|
| 189 |
} |
|
| 190 |
|
|
| 191 |
/** |
|
| 192 |
* Merge results in the parentListener |
|
| 193 |
* |
|
| 194 |
* @param xmlFile |
|
| 195 |
* @param parentListener results are appended to the parentListener if any |
|
| 196 |
* @return |
|
| 197 |
* @throws IOException |
|
| 198 |
* @throws MalformedURLException |
|
| 199 |
* @throws XMLStreamException |
|
| 200 |
*/ |
|
| 201 |
public SAttributesListener scanFile(File xmlFile) throws MalformedURLException, IOException, XMLStreamException {
|
|
| 202 |
|
|
| 203 |
boolean startText = false; |
|
| 204 |
boolean startWord = false; |
|
| 205 |
InputStream inputData = xmlFile.toURI().toURL().openStream(); |
|
| 206 |
XMLInputFactory factory = XMLInputFactory.newInstance(); |
|
| 207 |
XMLStreamReader parser = factory.createXMLStreamReader(inputData); |
|
| 208 |
|
|
| 209 |
// SAttributesListener listener; |
|
| 210 |
// if (parentListener != null) {
|
|
| 211 |
// listener = parentListener; |
|
| 212 |
// listener.setParser(parser); |
|
| 213 |
// } else {
|
|
| 214 |
// listener = new SAttributesListener(parser); |
|
| 215 |
// } |
|
| 216 |
String TEXT = "text"; |
|
| 217 |
String ANA = "ana"; |
|
| 218 |
String TYPE = "type"; |
|
| 219 |
//HashSet<String> types = new HashSet<String>(); |
|
| 220 |
this.setParser(parser); |
|
| 221 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
|
| 222 |
if (event == XMLStreamConstants.START_ELEMENT) { // start elem
|
|
| 223 |
if (TEXT.equals(parser.getLocalName())) startText = true; |
|
| 224 |
|
|
| 225 |
if (startText) this.startElement(parser.getLocalName()); |
|
| 226 |
|
|
| 227 |
if (this.W.equals(parser.getLocalName())) {
|
|
| 228 |
startWord = true; |
|
| 229 |
} else if (startWord && ANA.equals(parser.getLocalName())) { // ana elem
|
|
| 230 |
for (int i = 0 ; i < parser.getAttributeCount(); i++) { // find @type
|
|
| 231 |
if (TYPE.equals(parser.getAttributeLocalName(i))) { // @type
|
|
| 232 |
this.anatypes.add(parser.getAttributeValue(i).substring(1)); //remove the # |
|
| 233 |
break; |
|
| 234 |
} |
|
| 235 |
} |
|
| 236 |
} |
|
| 237 |
} else if (event == XMLStreamConstants.END_ELEMENT) { // end elem
|
|
| 238 |
if (startText) this.endElement(parser.getLocalName()); |
|
| 239 |
if (TEXT.equals(parser.getLocalName())) startText = false; |
|
| 240 |
|
|
| 241 |
if (this.W.equals(parser.getLocalName())) {
|
|
| 242 |
startWord = false; |
|
| 243 |
} |
|
| 244 |
} |
|
| 245 |
} |
|
| 246 |
if (parser != null) parser.close(); |
|
| 247 |
if (inputData != null) inputData.close(); |
|
| 248 |
|
|
| 249 |
return this; |
|
| 250 |
} |
|
| 251 |
|
|
| 252 |
/** |
|
| 253 |
* scan the XML files of a directory to list the structures with their properties and levels. Also list the word properties |
|
| 254 |
* @param xmlDirectory |
|
| 255 |
* @param wordTag |
|
| 256 |
* @return |
|
| 257 |
* @throws XMLStreamException |
|
| 258 |
* @throws IOException |
|
| 259 |
* @throws MalformedURLException |
|
| 260 |
*/ |
|
| 261 |
public static SAttributesListener scanFiles(File xmlDirectory, String wordTag) throws MalformedURLException, IOException, XMLStreamException {
|
|
| 262 |
SAttributesListener listener = new SAttributesListener(); |
|
| 263 |
listener.W = wordTag; |
|
| 264 |
for (File xmlFile : xmlDirectory.listFiles(IOUtils.HIDDENFILE_FILTER)) {
|
|
| 265 |
if (xmlFile.isFile() && !xmlFile.isHidden() && xmlFile.getName().toLowerCase().endsWith(".xml")) {
|
|
| 266 |
listener.scanFile(xmlFile); // results saved in 'listener' data |
|
| 267 |
// println "LISTENER RESULT with ${xmlFile.getName()}: "+listener
|
|
| 268 |
// println " prof: "+listener.getStructs() |
|
| 269 |
// println " prof: "+listener.getProfs() |
|
| 270 |
// println " path: "+listener.structPath |
|
| 271 |
} |
|
| 272 |
} |
|
| 273 |
|
|
| 274 |
return listener; |
|
| 275 |
} |
|
| 276 |
} |
|
| 0 | 277 | |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/Compiler.java (revision 2787) | ||
|---|---|---|
| 28 | 28 |
inputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.getCorpusName()); |
| 29 | 29 |
cqpDirectory = new File(module.getBinaryDirectory(), "cqp"); |
| 30 | 30 |
outputDirectory = new File(module.getBinaryDirectory(), "data"); |
| 31 |
registryDirectory = new File(module.getBinaryDirectory(), "registry"); |
|
| 32 | 31 |
dataDirectory = new File(outputDirectory, module.getCorpusName()); |
| 32 |
registryDirectory = new File(module.getBinaryDirectory(), "registry"); |
|
| 33 | 33 |
|
| 34 | 34 |
DeleteDir.deleteDirectory(outputDirectory); |
| 35 | 35 |
outputDirectory.mkdirs(); |
| 36 |
|
|
| 37 |
DeleteDir.deleteDirectory(dataDirectory); |
|
| 38 | 36 |
dataDirectory.mkdirs(); |
| 39 | 37 |
|
| 40 | 38 |
DeleteDir.deleteDirectory(registryDirectory); |
| ... | ... | |
| 42 | 40 |
|
| 43 | 41 |
if (!module.isUpdatingCorpus()) {
|
| 44 | 42 |
DeleteDir.deleteDirectory(cqpDirectory); |
| 45 |
cqpDirectory.mkdir();
|
|
| 46 |
}
|
|
| 43 |
}
|
|
| 44 |
cqpDirectory.mkdir();
|
|
| 47 | 45 |
} |
| 48 | 46 |
|
| 49 | 47 |
@Override |
Formats disponibles : Unified diff