16 |
16 |
*/
|
17 |
17 |
public class XTZCompilerStep extends Step {
|
18 |
18 |
|
19 |
|
static String FORM = "form";
|
20 |
|
static String ANA = "ana";
|
21 |
|
static String ID = "id";
|
22 |
|
static String TYPE = "type";
|
23 |
|
static String TAB = "\t";
|
24 |
|
static String QUOTE = "\"";
|
|
19 |
static String FORM = "form"
|
|
20 |
static String ANA = "ana"
|
|
21 |
static String ID = "id"
|
|
22 |
static String TYPE = "type"
|
|
23 |
static String TAB = "\t"
|
|
24 |
static String QUOTE = "\""
|
25 |
25 |
|
26 |
26 |
File xmlFile
|
27 |
27 |
File cqpFile
|
28 |
|
String textname, corpusname, projectname;
|
29 |
|
boolean normalizeAttributeValues = false;
|
30 |
|
boolean normalizeAnaValues = true;
|
31 |
|
boolean normalizeFormValues = true;
|
|
28 |
String textname, corpusname, projectname
|
|
29 |
boolean normalizeAttributeValues = false
|
|
30 |
boolean normalizeAnaValues = true
|
|
31 |
boolean normalizeFormValues = true
|
32 |
32 |
|
33 |
|
def inputData;
|
34 |
|
XMLInputFactory factory;
|
35 |
|
XMLStreamReader parser;
|
36 |
|
OutputStreamWriter output;
|
|
33 |
def inputData
|
|
34 |
XMLInputFactory factory
|
|
35 |
XMLStreamReader parser
|
|
36 |
OutputStreamWriter output
|
37 |
37 |
|
38 |
|
def anavalues = [:];
|
39 |
|
def anatypes;
|
|
38 |
def anavalues = [:]
|
|
39 |
def anatypes
|
40 |
40 |
|
41 |
41 |
String WTAG = "w"
|
42 |
42 |
|
43 |
43 |
public void setNormalizeAttributeValues(boolean n) {
|
44 |
|
this.normalizeAttributeValues = n;
|
|
44 |
this.normalizeAttributeValues = n
|
45 |
45 |
}
|
46 |
46 |
|
47 |
47 |
public void setNormalizeAnaValues(boolean n) {
|
48 |
|
this.normalizeAnaValues = n;
|
|
48 |
this.normalizeAnaValues = n
|
49 |
49 |
}
|
50 |
50 |
|
51 |
51 |
public void setNormalizeFormValues(boolean n) {
|
52 |
|
this.normalizeFormValues = n;
|
|
52 |
this.normalizeFormValues = n
|
53 |
53 |
}
|
54 |
54 |
|
55 |
55 |
public XTZCompilerStep(File xmlFile, File cqpFile, String textname, String corpusname, String projectname, def anatypes, def wtag) {
|
56 |
|
this.xmlFile = xmlFile;
|
57 |
|
this.cqpFile = cqpFile;
|
|
56 |
this.xmlFile = xmlFile
|
|
57 |
this.cqpFile = cqpFile
|
58 |
58 |
this.textname = textname
|
59 |
|
this.corpusname = corpusname;
|
60 |
|
this.projectname = projectname;
|
61 |
|
this.anatypes = anatypes;
|
|
59 |
this.corpusname = corpusname
|
|
60 |
this.projectname = projectname
|
|
61 |
this.anatypes = anatypes
|
62 |
62 |
this.WTAG = wtag
|
63 |
63 |
|
64 |
64 |
try {
|
65 |
|
inputData = xmlFile.toURI().toURL().openStream();
|
66 |
|
factory = XMLInputFactory.newInstance();
|
67 |
|
parser = factory.createXMLStreamReader(inputData);
|
|
65 |
inputData = xmlFile.toURI().toURL().openStream()
|
|
66 |
factory = XMLInputFactory.newInstance()
|
|
67 |
parser = factory.createXMLStreamReader(inputData)
|
68 |
68 |
} catch (Exception ex) {
|
69 |
|
System.err.println("Exception while parsing $xmlFile : "+ex);
|
|
69 |
System.err.println("Exception while parsing $xmlFile : "+ex)
|
70 |
70 |
}
|
71 |
71 |
}
|
72 |
72 |
|
... | ... | |
79 |
79 |
*/
|
80 |
80 |
private boolean createOutput(File f) {
|
81 |
81 |
try {
|
82 |
|
output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(f)) , "UTF-8");
|
83 |
|
return true;
|
|
82 |
output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(f)) , "UTF-8")
|
|
83 |
return true
|
84 |
84 |
} catch (Exception e) {
|
85 |
|
System.err.println(e);
|
86 |
|
return false;
|
|
85 |
System.err.println(e)
|
|
86 |
return false
|
87 |
87 |
}
|
88 |
88 |
}
|
89 |
89 |
|
... | ... | |
94 |
94 |
* @param fileName the file name
|
95 |
95 |
* @return true, if successful
|
96 |
96 |
*/
|
97 |
|
public boolean process()
|
98 |
|
{
|
|
97 |
public boolean process() {
|
99 |
98 |
if (!createOutput(cqpFile)) {
|
100 |
|
return false;
|
|
99 |
return false
|
101 |
100 |
}
|
102 |
101 |
|
103 |
|
String headvalue=""
|
104 |
|
String vAna = "";
|
105 |
|
String vForm = "";
|
106 |
|
String wordid= "";
|
107 |
|
String vHead = "";
|
|
102 |
String headvalue = ""
|
|
103 |
String vAna = ""
|
|
104 |
String vForm = ""
|
|
105 |
String wordid= ""
|
|
106 |
String vHead = ""
|
108 |
107 |
|
109 |
108 |
int p_id = 0;
|
110 |
109 |
int s_id = 0;
|
... | ... | |
125 |
124 |
int nWords = 0;
|
126 |
125 |
try {
|
127 |
126 |
String localname;
|
128 |
|
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
|
129 |
|
{
|
|
127 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
130 |
128 |
switch (event) {
|
131 |
129 |
case XMLStreamConstants.START_ELEMENT:
|
132 |
130 |
localname = parser.getLocalName().toLowerCase();
|
... | ... | |
139 |
137 |
String attrname = parser.getAttributeLocalName(i);
|
140 |
138 |
String attrvalue = parser.getAttributeValue(i)
|
141 |
139 |
|
142 |
|
if (normalizeAttributeValues)
|
|
140 |
if (normalizeAttributeValues) {
|
143 |
141 |
attrvalue = attrvalue.trim();
|
144 |
|
|
145 |
|
if (attrname != ID)
|
146 |
|
output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+QUOTE)
|
|
142 |
}
|
|
143 |
if (attrname != ID) {
|
|
144 |
output.write(" "+attrname.toLowerCase()+"=\""+attrvalue.replace("\"", """)+QUOTE)
|
|
145 |
}
|
147 |
146 |
}
|
148 |
147 |
output.write(">\n");
|
149 |
148 |
|
... | ... | |
180 |
179 |
default:
|
181 |
180 |
if (!foundtei || !foundtext) break;
|
182 |
181 |
|
183 |
|
output.write("<"+localname);
|
|
182 |
output.write("<"+localname)
|
184 |
183 |
|
185 |
184 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
186 |
|
String attrname = parser.getAttributeLocalName(i);
|
|
185 |
String attrname = parser.getAttributeLocalName(i)
|
187 |
186 |
|
188 |
187 |
String attrvalue = parser.getAttributeValue(i)
|
189 |
|
if (normalizeAttributeValues)
|
190 |
|
attrvalue = attrvalue.trim();
|
191 |
|
|
192 |
|
output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+QUOTE)
|
|
188 |
if (normalizeAttributeValues) {
|
|
189 |
attrvalue = attrvalue.trim()
|
|
190 |
}
|
|
191 |
output.write(" "+attrname.toLowerCase()+"=\""+attrvalue.replace("\"", """)+QUOTE)
|
193 |
192 |
}
|
194 |
193 |
if (parser.getAttributeCount() == 0) { // add the n attribute
|
195 |
|
if (!ncounts.containsKey(localname)) ncounts.put(localname, 0);
|
196 |
|
int ncount = ncounts.get(localname);
|
197 |
|
ncounts.put(localname, ncount+1);
|
|
194 |
if (!ncounts.containsKey(localname)) ncounts.put(localname, 0)
|
|
195 |
int ncount = ncounts.get(localname)
|
|
196 |
ncounts.put(localname, ncount+1)
|
198 |
197 |
output.write(" n=\""+ncount+QUOTE)
|
199 |
198 |
}
|
200 |
|
output.write(">\n");
|
|
199 |
output.write(">\n")
|
201 |
200 |
}
|
202 |
201 |
break;
|
203 |
202 |
|
... | ... | |
251 |
250 |
}
|
252 |
251 |
}
|
253 |
252 |
if (flagAna) {
|
254 |
|
if (normalizeAnaValues)
|
|
253 |
if (normalizeAnaValues) {
|
255 |
254 |
anavalue += parser.getText().trim();
|
256 |
|
else
|
|
255 |
} else {
|
257 |
256 |
anavalue += parser.getText();
|
|
257 |
}
|
258 |
258 |
}
|
259 |
259 |
}
|
260 |
260 |
break;
|