Révision 3288
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/URSUnits2CQPStructMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.prototypes.cqp |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.annotation.urs.* |
|
7 |
import org.txm.importer.ValidateXml |
|
8 |
import org.txm.rcp.swt.widget.parameters.* |
|
9 |
import org.txm.searchengine.cqp.corpus.* |
|
10 |
import visuAnalec.elements.* |
|
11 |
|
|
12 |
// BEGINNING OF PARAMETERS |
|
13 |
|
|
14 |
// Open the parameters input dialog box |
|
15 |
//if (!ParametersDialog.open(this)) return; |
|
16 |
|
|
17 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
18 |
println "Selection must be a Corpus" |
|
19 |
return |
|
20 |
} |
|
21 |
|
|
22 |
MainCorpus corpus = corpusViewSelection |
|
23 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
24 |
|
|
25 |
def texts = corpus.getCorpusTextIdsList(); |
|
26 |
def texts_startlimits = corpus.getTextStartLimits() |
|
27 |
def texts_endlimits = corpus.getTextEndLimits() |
|
28 |
|
|
29 |
for (int i = 0 ; i < texts.size() ; i++) { |
|
30 |
|
|
31 |
println "Processing annotations of "+texts[i]+"..." |
|
32 |
|
|
33 |
def text_id = texts[i] |
|
34 |
def text_start = texts_startlimits[i] |
|
35 |
def text_end = texts_endlimits[i] |
|
36 |
|
|
37 |
File xmltxmFile = new File(corpus.getProject().getProjectDirectory(), "txm/"+corpus.getID()+"/"+text_id+".xml") |
|
38 |
File xmltxmFileCopy = new File(corpus.getProject().getProjectDirectory(), text_id+"_copy.xml") |
|
39 |
|
|
40 |
if (!xmltxmFile.exists()) { |
|
41 |
println "Warning: no text file found: "+xmltxmFile |
|
42 |
continue |
|
43 |
} |
|
44 |
|
|
45 |
// WRITE MILESTONES UNITS |
|
46 |
|
|
47 |
println "-> MILESTONES UNITS" |
|
48 |
|
|
49 |
def units = [] |
|
50 |
for (String unit_type : analecCorpus.getStructure().getTypes(Unite.class)) { |
|
51 |
def corpus_units = [] |
|
52 |
ArrayList<Unite> all_units = analecCorpus.getUnites(unit_type) |
|
53 |
corpus_units = all_units.findAll() { |
|
54 |
it.getProp("type") != null && it.getProp("type").trim().length() > 0 && "yes".equals(it.getProp("milestone")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written"))) |
|
55 |
} |
|
56 |
units.addAll(corpus_units) |
|
57 |
} |
|
58 |
|
|
59 |
if (units.size()== 0) { |
|
60 |
println "No milestones to write" |
|
61 |
} else { |
|
62 |
try { |
|
63 |
println "processing milestones Units ${text_id} and its units "+units.size() |
|
64 |
MileStoneInserter inserter = new MileStoneInserter(corpus, xmltxmFile, units); |
|
65 |
if (inserter.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) { |
|
66 |
xmltxmFile.delete() |
|
67 |
xmltxmFileCopy.renameTo(xmltxmFile) |
|
68 |
|
|
69 |
for (Unite unit : units) { |
|
70 |
unit.getProps()["written"] = "true" |
|
71 |
} |
|
72 |
URSCorpora.saveCorpus(corpus); |
|
73 |
println "Done, "+units.size()+ " milestones written" |
|
74 |
|
|
75 |
} else { |
|
76 |
println "Error while processing milestones $xmltxmFile file" |
|
77 |
File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName()) |
|
78 |
error.getParentFile().mkdirs() |
|
79 |
println " moving created file to $error" |
|
80 |
error.delete() |
|
81 |
xmltxmFileCopy.renameTo(error) |
|
82 |
} |
|
83 |
} catch(Exception e) { |
|
84 |
println "Error while processing milestones $xmltxmFile file: "+e |
|
85 |
File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName()) |
|
86 |
error.getParentFile().mkdirs() |
|
87 |
println " moving created file to $error" |
|
88 |
error.delete() |
|
89 |
xmltxmFileCopy.renameTo(error) |
|
90 |
} |
|
91 |
} |
|
92 |
// WRITE NON MILESTONES UNITS |
|
93 |
println "-> OTHER UNITS" |
|
94 |
|
|
95 |
for (String unit_type : analecCorpus.getStructure().getTypes(Unite.class)) { |
|
96 |
|
|
97 |
def corpus_units = [] |
|
98 |
ArrayList<Unite> all_units = analecCorpus.getUnites(unit_type) |
|
99 |
corpus_units = all_units.findAll() { |
|
100 |
it.getProp("type") != null && it.getProp("type").trim().length() > 0 && !"no".equals(it.getProp("milestone")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written"))) |
|
101 |
} |
|
102 |
|
|
103 |
if (corpus_units.size() == 0) continue; |
|
104 |
|
|
105 |
println "processing Units ${text_id} and its $unit_type units "+corpus_units.size() |
|
106 |
UnitsInserter inserter2 = new UnitsInserter(corpus, xmltxmFile, corpus_units, unit_type); |
|
107 |
try { |
|
108 |
if (inserter2.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) { |
|
109 |
xmltxmFile.delete() |
|
110 |
xmltxmFileCopy.renameTo(xmltxmFile) |
|
111 |
|
|
112 |
for (Unite unit : corpus_units) { |
|
113 |
unit.getProps()["written"] = "true" |
|
114 |
} |
|
115 |
URSCorpora.saveCorpus(corpus); |
|
116 |
println "Done, "+corpus_units.size()+ " units written" |
|
117 |
|
|
118 |
} else { |
|
119 |
println "Error while processing milestones $xmltxmFile file" |
|
120 |
File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName()) |
|
121 |
error.getParentFile().mkdirs() |
|
122 |
println " moving created file to $error" |
|
123 |
error.delete() |
|
124 |
xmltxmFileCopy.renameTo(error) |
|
125 |
} |
|
126 |
} catch(Exception e) { |
|
127 |
println "Error while processing milestones $xmltxmFile file: "+e |
|
128 |
File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName()) |
|
129 |
error.getParentFile().mkdirs() |
|
130 |
println " moving created file to $error" |
|
131 |
error.delete() |
|
132 |
xmltxmFileCopy.renameTo(error) |
|
133 |
} |
|
134 |
} |
|
135 |
} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/WordUnitsInserter.groovy (revision 3288) | ||
---|---|---|
1 |
package org.txm.macro.urs.prototypes.cqp |
|
2 |
|
|
3 |
import java.io.IOException |
|
4 |
import java.util.Date |
|
5 |
import java.util.LinkedHashMap |
|
6 |
|
|
7 |
import javax.xml.stream.XMLStreamException |
|
8 |
|
|
9 |
import org.txm.Toolbox |
|
10 |
import org.txm.importer.StaxIdentityParser |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import org.txm.scripts.importer.GetAttributeValue |
|
13 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
14 |
import org.txm.searchengine.cqp.corpus.MainCorpus |
|
15 |
import visuAnalec.elements.Unite |
|
16 |
|
|
17 |
public class WordUnitsInserter extends StaxIdentityParser { |
|
18 |
|
|
19 |
List<Unite> units |
|
20 |
String[] ids |
|
21 |
File inputFile |
|
22 |
def id2Units = [:] |
|
23 |
def unit_properties // "*" or list (String) |
|
24 |
def unit_word // ALL START END |
|
25 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>(); |
|
26 |
|
|
27 |
public WordUnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units, String unit_properties, String unit_word) { |
|
28 |
super(inputFile) |
|
29 |
this.inputFile = inputFile |
|
30 |
|
|
31 |
this.unit_properties = unit_properties |
|
32 |
this.unit_word = unit_word |
|
33 |
if (!("*".equals(unit_properties))) { |
|
34 |
this.unit_properties = this.unit_properties.split(",") |
|
35 |
} |
|
36 |
|
|
37 |
this.units = units |
|
38 |
this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
39 |
|
|
40 |
for (int i = 0 ; i < units.size() ; i++) { |
|
41 |
Unite u = units[i] |
|
42 |
int[] positions = null; |
|
43 |
if ("START".equals(unit_word)) { |
|
44 |
positions = [u.getDeb()] as int[] |
|
45 |
} else if ("END".equals(unit_word)) { |
|
46 |
positions = [u.getFin()] as int[] |
|
47 |
} else { |
|
48 |
positions = u.getDeb()..u.getFin() |
|
49 |
} |
|
50 |
|
|
51 |
ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions) |
|
52 |
|
|
53 |
for (String id : ids) { |
|
54 |
if (!id2Units.containsKey(id)) { |
|
55 |
id2Units[id] = [] |
|
56 |
} |
|
57 |
id2Units[id] << u |
|
58 |
} |
|
59 |
} |
|
60 |
} |
|
61 |
|
|
62 |
boolean inAna = false |
|
63 |
String ana_type, ana_resp, ana_value |
|
64 |
boolean inW = false |
|
65 |
String word_id = null |
|
66 |
|
|
67 |
protected void processStartElement() throws XMLStreamException, IOException { |
|
68 |
|
|
69 |
if ("w".equals(localname)) { |
|
70 |
|
|
71 |
inW = true |
|
72 |
word_id = this.getParserAttributeValue("id") |
|
73 |
|
|
74 |
if (id2Units.containsKey(word_id)) { |
|
75 |
for (Unite u : id2Units[word_id]) { |
|
76 |
def props = u.getProps() |
|
77 |
|
|
78 |
def propNames = props.keySet(); |
|
79 |
if (!("*".equals(unit_properties))) { |
|
80 |
propNames = unit_properties |
|
81 |
} |
|
82 |
|
|
83 |
for (String p : propNames) { |
|
84 |
if ("type".equals(p)) continue; |
|
85 |
if ("written".equals(p)) continue; |
|
86 |
if ("milestone".equals(p)) continue; |
|
87 |
|
|
88 |
if (!anaValues.containsKey(p)) { |
|
89 |
anaValues[p] = "" |
|
90 |
ana_resp = "#txm" |
|
91 |
} |
|
92 |
anaValues[p] = (anaValues[p]+" "+props.get(p)).trim() |
|
93 |
} |
|
94 |
} |
|
95 |
if (anaValues.size() > 0) println anaValues |
|
96 |
} |
|
97 |
|
|
98 |
super.processStartElement(); // write the tag |
|
99 |
|
|
100 |
} else if ("ana".equals(localname) && inW) { |
|
101 |
|
|
102 |
inAna = true |
|
103 |
ana_type = this.getParserAttributeValue("type").substring(1) |
|
104 |
ana_resp = this.getParserAttributeValue("resp") |
|
105 |
ana_value = "" |
|
106 |
|
|
107 |
} else { |
|
108 |
super.processStartElement() |
|
109 |
} |
|
110 |
} |
|
111 |
|
|
112 |
@Override |
|
113 |
public void processCharacters() throws XMLStreamException { |
|
114 |
if (inAna) { |
|
115 |
ana_value += parser.getText().trim() |
|
116 |
} else { |
|
117 |
super.processCharacters() |
|
118 |
} |
|
119 |
} |
|
120 |
|
|
121 |
protected void processEndElement() throws XMLStreamException { |
|
122 |
|
|
123 |
if ("w".equals(localname)) { |
|
124 |
|
|
125 |
// write the last values |
|
126 |
for (String ana_type : anaValues.keySet()) { |
|
127 |
writer.writeStartElement("txm:ana") |
|
128 |
writer.writeAttribute("type", "#" + ana_type) |
|
129 |
writer.writeAttribute("resp", "#txm") // change |
|
130 |
writer.writeCharacters(anaValues[ana_type]) |
|
131 |
writer.writeEndElement() |
|
132 |
} |
|
133 |
|
|
134 |
anaValues.clear() |
|
135 |
super.processEndElement() // finally write word then close annotations |
|
136 |
inW = false |
|
137 |
|
|
138 |
} else if ("ana".equals(localname) && inW) { |
|
139 |
|
|
140 |
if (!anaValues.containsKey(ana_type)) { |
|
141 |
anaValues[ana_type] = ana_value.trim() |
|
142 |
} else { |
|
143 |
ana_resp = "#txm" // set the resp to txm since anaValues update the ana value |
|
144 |
anaValues[ana_type] = (anaValues[ana_type]+" "+ana_value.trim()).trim() |
|
145 |
} |
|
146 |
|
|
147 |
String value = anaValues[ana_type] |
|
148 |
|
|
149 |
writer.writeStartElement("txm:ana") |
|
150 |
writer.writeAttribute("type", "#" + ana_type) |
|
151 |
writer.writeAttribute("resp", ana_resp) // change |
|
152 |
writer.writeCharacters(value) |
|
153 |
writer.writeEndElement() |
|
154 |
|
|
155 |
anaValues.remove(ana_type) |
|
156 |
|
|
157 |
inAna = false |
|
158 |
ana_type = null |
|
159 |
ana_resp = null |
|
160 |
ana_value = null |
|
161 |
|
|
162 |
// write ana later |
|
163 |
} else { |
|
164 |
super.processEndElement() |
|
165 |
} |
|
166 |
} |
|
167 |
} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/MileStoneInserter.groovy (revision 3288) | ||
---|---|---|
1 |
package org.txm.macro.urs.prototypes.cqp |
|
2 |
|
|
3 |
import java.io.IOException |
|
4 |
|
|
5 |
import javax.xml.stream.XMLStreamException |
|
6 |
|
|
7 |
import org.txm.importer.StaxIdentityParser |
|
8 |
import org.txm.macro.urs.AnalecUtils |
|
9 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
10 |
import org.txm.searchengine.cqp.corpus.MainCorpus |
|
11 |
import visuAnalec.elements.Unite |
|
12 |
|
|
13 |
public class MileStoneInserter extends StaxIdentityParser { |
|
14 |
|
|
15 |
List<Unite> units |
|
16 |
String[] ids |
|
17 |
File inputFile |
|
18 |
def id2Units = [:] |
|
19 |
|
|
20 |
public MileStoneInserter(MainCorpus corpus, File inputFile, List<Unite> units) { |
|
21 |
super(inputFile); |
|
22 |
this.inputFile = inputFile; |
|
23 |
|
|
24 |
this.units = units; |
|
25 |
this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
26 |
|
|
27 |
int[] positions = new int[units.size()]; |
|
28 |
for( int i = 0 ; i < units.size() ; i++) { |
|
29 |
positions[i] = units.get(i).getDeb(); |
|
30 |
} |
|
31 |
|
|
32 |
ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions); |
|
33 |
|
|
34 |
for (int i = 0 ; i < ids.length ; i++) { |
|
35 |
String id = ids[i] |
|
36 |
if (id != null) { |
|
37 |
if (!id2Units.containsKey(id)) id2Units[id] = [] |
|
38 |
|
|
39 |
id2Units[id] << units[i] |
|
40 |
} |
|
41 |
} |
|
42 |
} |
|
43 |
|
|
44 |
boolean start = false; |
|
45 |
String word_id = null; |
|
46 |
@Override |
|
47 |
protected void processStartElement() throws XMLStreamException, IOException { |
|
48 |
|
|
49 |
if ("text".equals(localname)) { |
|
50 |
start = true; |
|
51 |
} else if ("w".equals(localname) && start) { |
|
52 |
word_id = getParserAttributeValue("id"); |
|
53 |
if (word_id == null) { |
|
54 |
println "Warning: found <w> without id at line "+parser.getLocation().getLineNumber()+" in "+inputFile |
|
55 |
} else { |
|
56 |
writeAllUnits(word_id, "before") |
|
57 |
} |
|
58 |
} |
|
59 |
|
|
60 |
super.processStartElement(); |
|
61 |
} |
|
62 |
|
|
63 |
protected void writeAllUnits(String id, String position) { |
|
64 |
|
|
65 |
def units = id2Units[id] |
|
66 |
if (units == null) return; // no units to write |
|
67 |
|
|
68 |
for (Unite currentUnit : units) { |
|
69 |
|
|
70 |
if (!position.equals(currentUnit.getProp("position"))) return; |
|
71 |
|
|
72 |
writer.writeStartElement(currentUnit.getProp("type")); |
|
73 |
HashMap props = currentUnit.getProps(); |
|
74 |
for (String p : props.keySet()) { |
|
75 |
if (p.equals("type")) continue; // ignore the type since written in tag name |
|
76 |
writer.writeAttribute(p, ""+props.get(p)); |
|
77 |
} |
|
78 |
writer.writeEndElement(); |
|
79 |
} |
|
80 |
} |
|
81 |
|
|
82 |
@Override |
|
83 |
protected void processEndElement() throws XMLStreamException { |
|
84 |
super.processEndElement(); |
|
85 |
|
|
86 |
if ("w".equals(localname) && start && word_id != null) { |
|
87 |
writeAllUnits(word_id, "after") |
|
88 |
} |
|
89 |
|
|
90 |
if ("w".equals(localname)) { |
|
91 |
word_id = null; |
|
92 |
} |
|
93 |
} |
|
94 |
} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/UnitsInserter.groovy (revision 3288) | ||
---|---|---|
1 |
package org.txm.macro.urs.prototypes.cqp |
|
2 |
|
|
3 |
import java.io.IOException |
|
4 |
|
|
5 |
import javax.xml.stream.XMLStreamException |
|
6 |
|
|
7 |
import org.txm.importer.StaxIdentityParser |
|
8 |
import org.txm.macro.urs.AnalecUtils |
|
9 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
10 |
import org.txm.searchengine.cqp.corpus.MainCorpus |
|
11 |
import visuAnalec.elements.Unite |
|
12 |
|
|
13 |
public class UnitsInserter extends StaxIdentityParser { |
|
14 |
|
|
15 |
List<Unite> units |
|
16 |
String[] ids |
|
17 |
File inputFile |
|
18 |
def open_id2Units = [:] |
|
19 |
def close_id2Units = [:] |
|
20 |
def writing_units = [] |
|
21 |
def writing_stacks = [] |
|
22 |
|
|
23 |
def stack = ""; |
|
24 |
|
|
25 |
def writing_start, writing_end; |
|
26 |
|
|
27 |
def positions2id = [:] // used to relocate end of units |
|
28 |
|
|
29 |
public UnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units, String type) { |
|
30 |
super(inputFile); |
|
31 |
this.inputFile = inputFile; |
|
32 |
this.units = units; |
|
33 |
|
|
34 |
this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: b.getFin() <=> a.getFin() } |
|
35 |
|
|
36 |
// get words id limits |
|
37 |
int[] positions = new int[units.size()]; |
|
38 |
for( int i = 0 ; i < units.size() ; i++) { |
|
39 |
positions[i] = units.get(i).getDeb(); |
|
40 |
} |
|
41 |
ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions); |
|
42 |
for (int i = 0 ; i < ids.length ; i++) { |
|
43 |
String id = ids[i] |
|
44 |
|
|
45 |
if (id != null) { |
|
46 |
positions2id[positions[i]] = id |
|
47 |
if (!open_id2Units.containsKey(id)) open_id2Units[id] = [] |
|
48 |
open_id2Units[id] << units[i] |
|
49 |
} |
|
50 |
} |
|
51 |
|
|
52 |
positions = new int[units.size()]; |
|
53 |
for( int i = 0 ; i < units.size() ; i++) { |
|
54 |
positions[i] = units.get(i).getFin(); |
|
55 |
} |
|
56 |
ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions); |
|
57 |
for (int i = 0 ; i < ids.length ; i++) { |
|
58 |
String id = ids[i] |
|
59 |
if (id != null) { |
|
60 |
positions2id[positions[i]] = id |
|
61 |
if (!close_id2Units.containsKey(id)) close_id2Units[id] = new HashSet<Unite>() |
|
62 |
close_id2Units[id] << units[i] |
|
63 |
} |
|
64 |
} |
|
65 |
} |
|
66 |
|
|
67 |
boolean start = false; |
|
68 |
String word_id = null; |
|
69 |
@Override |
|
70 |
protected void processStartElement() throws XMLStreamException, IOException { |
|
71 |
|
|
72 |
stack += "/"+localname |
|
73 |
|
|
74 |
if ("text".equals(localname)) { |
|
75 |
start = true; |
|
76 |
} else if ("w".equals(localname) && start) { |
|
77 |
word_id = getParserAttributeValue("id"); |
|
78 |
if (word_id == null) { |
|
79 |
println "Warning: found <w> without id at line "+parser.getLocation().getLineNumber()+" in "+inputFile |
|
80 |
} else { |
|
81 |
writeOpenUnits() |
|
82 |
} |
|
83 |
} |
|
84 |
|
|
85 |
super.processStartElement(); |
|
86 |
} |
|
87 |
|
|
88 |
protected void writeOpenUnits() { |
|
89 |
|
|
90 |
def toWrite = open_id2Units[word_id] |
|
91 |
|
|
92 |
if (toWrite != null) { |
|
93 |
for (Unite unite : toWrite) { |
|
94 |
|
|
95 |
for (int i = 0 ; i < writing_units.size() ; i++) { |
|
96 |
Unite u = writing_units.get(i); |
|
97 |
if (unite.getFin() > u.getFin()) { |
|
98 |
// add unite to close_id2Units |
|
99 |
String id = positions2id[u.getFin()] |
|
100 |
close_id2Units[id] << unite // close the unite at the same moment |
|
101 |
} |
|
102 |
} |
|
103 |
|
|
104 |
writeUnit(unite); |
|
105 |
} |
|
106 |
} |
|
107 |
} |
|
108 |
|
|
109 |
protected void writeCloseUnits() { |
|
110 |
if (word_id != null) { |
|
111 |
def toClose = close_id2Units[word_id] |
|
112 |
if (toClose != null) { |
|
113 |
for (int i = 0 ; i < writing_units.size() ; i++) { |
|
114 |
Unite u = writing_units.get(i); |
|
115 |
if (toClose.contains(u)) { |
|
116 |
writing_stacks.remove(i) |
|
117 |
writing_units.remove(i) |
|
118 |
writer.writeEndElement(); |
|
119 |
i--; |
|
120 |
} |
|
121 |
} |
|
122 |
} |
|
123 |
} else { |
|
124 |
for (int i = 0 ; i < writing_stacks.size() ; i++) { |
|
125 |
if (writing_stacks[i].equals(stack)) { |
|
126 |
writing_stacks.remove(i) |
|
127 |
writing_units.remove(i) |
|
128 |
writer.writeEndElement(); |
|
129 |
i-- |
|
130 |
} |
|
131 |
} |
|
132 |
} |
|
133 |
} |
|
134 |
|
|
135 |
protected void writeUnit(Unite currentUnit) { |
|
136 |
|
|
137 |
writing_units << currentUnit |
|
138 |
writing_stacks << currentUnit |
|
139 |
if (currentUnit.getDeb() > writing_start) |
|
140 |
|
|
141 |
writer.writeStartElement(currentUnit.getProp("type")); |
|
142 |
HashMap props = currentUnit.getProps(); |
|
143 |
for (String p : props.keySet()) { |
|
144 |
if (p.equals("type")) continue; // ignore the type since written in tag name |
|
145 |
writer.writeAttribute(p, ""+props.get(p)); |
|
146 |
} |
|
147 |
} |
|
148 |
|
|
149 |
@Override |
|
150 |
protected void processEndElement() throws XMLStreamException { |
|
151 |
|
|
152 |
// println "writing_stacks=$writing_stacks" |
|
153 |
// println "stack=$stack" |
|
154 |
|
|
155 |
if (writing_stacks.size() > 0 && writing_stacks[-1].equals(stack)) { |
|
156 |
writeCloseUnits() |
|
157 |
} |
|
158 |
|
|
159 |
super.processEndElement(); |
|
160 |
|
|
161 |
stack = stack.substring(0, stack.length() - localname.length() - 1); |
|
162 |
|
|
163 |
if ("w".equals(localname)) { |
|
164 |
if (start && word_id != null) { |
|
165 |
writeCloseUnits() |
|
166 |
} |
|
167 |
word_id = null; |
|
168 |
} |
|
169 |
} |
|
170 |
} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/URSUnits2CQPWordsMacro.groovy (revision 3288) | ||
---|---|---|
1 |
package org.txm.macro.urs.prototypes.cqp |
|
2 |
|
|
3 |
import org.kohsuke.args4j.* |
|
4 |
import groovy.transform.Field |
|
5 |
import org.txm.annotation.urs.* |
|
6 |
import org.txm.importer.ValidateXml |
|
7 |
import org.txm.rcp.swt.widget.parameters.* |
|
8 |
import org.txm.searchengine.cqp.corpus.* |
|
9 |
import visuAnalec.elements.* |
|
10 |
|
|
11 |
// BEGINNING OF PARAMETERS |
|
12 |
|
|
13 |
// Open the parameters input dialog box |
|
14 |
//if (!ParametersDialog.open(this)) return; |
|
15 |
|
|
16 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
17 |
println "Selection must be a Corpus" |
|
18 |
return |
|
19 |
} |
|
20 |
|
|
21 |
@Field @Option(name="unit_type", usage="Corpus name in uppercase", widget="String", required=true, def="word") |
|
22 |
String unit_type |
|
23 |
|
|
24 |
@Field @Option(name="unit_properties", usage="Corpus name in uppercase", widget="String", required=true, def="*") |
|
25 |
String unit_properties |
|
26 |
|
|
27 |
@Field @Option(name="unit_word", usage="Corpus name in uppercase", widget="StringArray", metaVar="START END ALL", required=true, def="START") |
|
28 |
String unit_word |
|
29 |
|
|
30 |
if (!ParametersDialog.open(this)) return |
|
31 |
|
|
32 |
MainCorpus corpus = corpusViewSelection |
|
33 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
34 |
|
|
35 |
def texts = corpus.getCorpusTextIdsList(); |
|
36 |
def texts_startlimits = corpus.getTextStartLimits() |
|
37 |
def texts_endlimits = corpus.getTextEndLimits() |
|
38 |
|
|
39 |
for (int i = 0 ; i < texts.size() ; i++) { |
|
40 |
|
|
41 |
println "Processing annotations of "+texts[i]+"..." |
|
42 |
|
|
43 |
def text_id = texts[i] |
|
44 |
def text_start = texts_startlimits[i] |
|
45 |
def text_end = texts_endlimits[i] |
|
46 |
|
|
47 |
File xmltxmFile = new File(corpus.getProject().getProjectDirectory(), "txm/"+corpus.getID()+"/"+text_id+".xml") |
|
48 |
File xmltxmFileCopy = new File(corpus.getProject().getProjectDirectory(), text_id+"_copy.xml") |
|
49 |
|
|
50 |
if (!xmltxmFile.exists()) { |
|
51 |
println "Warning: no text file found: "+xmltxmFile |
|
52 |
continue |
|
53 |
} |
|
54 |
|
|
55 |
for (String uType : analecCorpus.getStructure().getTypes(Unite.class)) { |
|
56 |
def corpus_units = [] |
|
57 |
ArrayList<Unite> all_units = analecCorpus.getUnites(uType) |
|
58 |
corpus_units = all_units.findAll() { |
|
59 |
unit_type.equals(it.getProp("type")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written"))) |
|
60 |
} |
|
61 |
|
|
62 |
if (corpus_units.size() == 0) continue; |
|
63 |
|
|
64 |
try { |
|
65 |
println " processing word Units ${text_id} and its '$uType' units ("+corpus_units.size()+")" |
|
66 |
WordUnitsInserter inserter = new WordUnitsInserter(corpus, xmltxmFile, corpus_units, unit_properties, unit_word); |
|
67 |
if (inserter.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) { |
|
68 |
xmltxmFile.delete() |
|
69 |
xmltxmFileCopy.renameTo(xmltxmFile) |
|
70 |
|
|
71 |
for (Unite unit : corpus_units) { |
|
72 |
unit.getProps()["written"] = "true" |
|
73 |
} |
|
74 |
URSCorpora.saveCorpus(corpus); |
|
75 |
println "Done, "+corpus_units.size()+ " units written" |
|
76 |
|
|
77 |
} else { |
|
78 |
println "Error while processing the XML-TXM $xmltxmFile file" |
|
79 |
File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName()) |
|
80 |
error.getParentFile().mkdirs() |
|
81 |
println " moving created file to $error" |
|
82 |
error.delete() |
|
83 |
xmltxmFileCopy.renameTo(error) |
|
84 |
} |
|
85 |
} catch(Exception e) { |
|
86 |
println "Error while processing milestones $xmltxmFile file: "+e |
|
87 |
e.printStackTrace(); |
|
88 |
File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName()) |
|
89 |
error.getParentFile().mkdirs() |
|
90 |
println " moving created file to $error" |
|
91 |
error.delete() |
|
92 |
xmltxmFileCopy.renameTo(error) |
|
93 |
} |
|
94 |
} |
|
95 |
} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/exploit/AllMesuresMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.prototypes.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
|
|
10 |
import groovy.transform.Field |
|
11 |
|
|
12 |
import org.txm.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.apache.commons.lang.StringUtils; |
|
17 |
|
|
18 |
// BEGINNING OF PARAMETERS |
|
19 |
|
|
20 |
@Field @Option(name="tsvFile",usage="", widget="FileSave", required=true, def="result.tsv") |
|
21 |
File tsvFile |
|
22 |
|
|
23 |
@Field @Option(name="default_schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
24 |
String default_schema_ursql |
|
25 |
|
|
26 |
@Field @Option(name="default_minimum_schema_size", usage="", widget="Integer", required=true, def="3") |
|
27 |
int default_minimum_schema_size |
|
28 |
|
|
29 |
|
|
30 |
@Field @Option(name="schema_property_display_name",usage="", widget="String", required=false, def="REF") |
|
31 |
String schema_property_display_name |
|
32 |
|
|
33 |
@Field @Option(name="default_unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
34 |
String default_unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="default_word_property", usage="", widget="String", required=false, def="word") |
|
37 |
String default_word_property |
|
38 |
@Field @Option(name="default_pos_property", usage="", widget="String", required=false, def="CATEGORIE") |
|
39 |
String default_pos_property |
|
40 |
|
|
41 |
if (!ParametersDialog.open(this)) return; |
|
42 |
// END OF PARAMETERS |
|
43 |
|
|
44 |
println "Corpora selections: "+corpusViewSelections |
|
45 |
|
|
46 |
table = [] // contains all table lines |
|
47 |
mesures = [] |
|
48 |
|
|
49 |
for (def corpus : corpusViewSelections) { // for each corpus selected in the corpora view |
|
50 |
if (!(corpus instanceof MainCorpus)) continue; // check if the corpus is a maincorpus |
|
51 |
def line = [] ; table << line // create and add a new table line |
|
52 |
line << corpus.getID() // add the corpus name in the first column |
|
53 |
|
|
54 |
println "*** Computing mesures for $corpus" // each macro return a "result" and a "data" |
|
55 |
|
|
56 |
params = [ |
|
57 |
"unit_ursql":default_unit_ursql, |
|
58 |
] |
|
59 |
returnedValue = execMesure(UnitsReferentialDensityMacro, line, corpus, params) |
|
60 |
line << returnedValue["result"] |
|
61 |
|
|
62 |
params = [ |
|
63 |
"schema_ursql":default_schema_ursql, |
|
64 |
"minimum_schema_size":default_minimum_schema_size, |
|
65 |
"unit_ursql":default_unit_ursql, |
|
66 |
] |
|
67 |
returnedValue = execMesure(SchemaLengthsMacro, line, corpus, params) |
|
68 |
line << returnedValue["result"] |
|
69 |
|
|
70 |
params = [ |
|
71 |
"schema_ursql":default_schema_ursql, |
|
72 |
"minimum_schema_size":default_minimum_schema_size, |
|
73 |
"unit_ursql":default_unit_ursql, |
|
74 |
] |
|
75 |
returnedValue = execMesure(NumberOfSchemaMacro, line, corpus, params) |
|
76 |
line << returnedValue["result"] |
|
77 |
|
|
78 |
params = [ |
|
79 |
"schema_ursql":default_schema_ursql, |
|
80 |
"minimum_schema_size":default_minimum_schema_size, |
|
81 |
"schema_property_display_name":schema_property_display_name, |
|
82 |
"unit_ursql":default_unit_ursql+"@CATEGORIE=GN Défini|GN Démonstratif|Nom Propre", |
|
83 |
"word_property":default_word_property, |
|
84 |
] |
|
85 |
returnedValue = execMesure(UnitsStabilityScoreMacro, line, corpus, params) |
|
86 |
line << returnedValue["result"] |
|
87 |
|
|
88 |
params = [ |
|
89 |
"schema_ursql":default_schema_ursql, |
|
90 |
"minimum_schema_size":default_minimum_schema_size, |
|
91 |
"unit_ursql":default_unit_ursql, |
|
92 |
] |
|
93 |
returnedValue = execMesure(UnitsInterDistanceMacro, line, corpus, params) |
|
94 |
line << returnedValue["result"] |
|
95 |
|
|
96 |
params = [ |
|
97 |
"schema_ursql":default_schema_ursql, |
|
98 |
"minimum_schema_size":default_minimum_schema_size, |
|
99 |
"unit_ursql":default_unit_ursql, |
|
100 |
"word_property":default_pos_property, |
|
101 |
] |
|
102 |
returnedValue = execMesure(NatureOfTheFirstUnitMacro, line, corpus, params) |
|
103 |
line << returnedValue["data"] |
|
104 |
|
|
105 |
params = [ |
|
106 |
"schema_ursql":default_schema_ursql, |
|
107 |
"minimum_schema_size":default_minimum_schema_size, |
|
108 |
"schema_property_display_name":schema_property_display_name, |
|
109 |
"unit_ursql":default_unit_ursql, |
|
110 |
"word_property":default_pos_property, |
|
111 |
] |
|
112 |
returnedValue = execMesure(GrammaticalCategoryMacro, line, corpus, params) |
|
113 |
line << returnedValue["data"] |
|
114 |
} |
|
115 |
|
|
116 |
// WRITE RESULTS IN THE TSV FILE |
|
117 |
tsvFile.withWriter("UTF-8") { writer -> |
|
118 |
writer.println "\t"+mesures.join("\t") |
|
119 |
table.each { line -> writer.println line.join("\t") } |
|
120 |
} |
|
121 |
|
|
122 |
println "Done. Results are saved in ${tsvFile.getAbsolutePath()} file." |
|
123 |
|
|
124 |
// UTILITY FUNCTIONS |
|
125 |
def execMesure(def mesure, def line, def corpus, def params) { |
|
126 |
def m = mesure.getSimpleName().substring(0, mesure.getSimpleName().indexOf("Macro")) |
|
127 |
mesures << m |
|
128 |
println "***** ${mesures.size()}- $m with parameters: $params" |
|
129 |
def r = gse.run(mesure, ["args":params, "corpusViewSelection":corpus, "monitor":monitor]) |
|
130 |
if (r == null) throw new Exception("Null result"); |
|
131 |
return r; |
|
132 |
} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/exploit/NatureOfTheFirstUnitMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.prototypes.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
|
|
19 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
20 |
println "Corpora selection is not a Corpus" |
|
21 |
return; |
|
22 |
} |
|
23 |
|
|
24 |
// BEGINNING OF PARAMETERS |
|
25 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
26 |
String schema_ursql |
|
27 |
|
|
28 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
29 |
int minimum_schema_size |
|
30 |
|
|
31 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
32 |
String unit_ursql |
|
33 |
|
|
34 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="CATEGORIE") |
|
35 |
String word_property |
|
36 |
|
|
37 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
38 |
debug |
|
39 |
|
|
40 |
if (!ParametersDialog.open(this)) return; |
|
41 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
42 |
|
|
43 |
|
|
44 |
CQPCorpus corpus = corpusViewSelection |
|
45 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
46 |
|
|
47 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
48 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
49 |
return; |
|
50 |
} |
|
51 |
|
|
52 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
53 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
54 |
return; |
|
55 |
} |
|
56 |
|
|
57 |
def CQI = CQPSearchEngine.getCqiClient() |
|
58 |
|
|
59 |
def prop = corpus.getProperty(word_property) |
|
60 |
|
|
61 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
62 |
def freqs = [:] |
|
63 |
|
|
64 |
def distances = 0; |
|
65 |
def nDistances = 0 |
|
66 |
for (def schema : schemas) { |
|
67 |
|
|
68 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
69 |
|
|
70 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
71 |
|
|
72 |
int nUnites = units.size(); |
|
73 |
|
|
74 |
if (units.size() == 0) continue; |
|
75 |
|
|
76 |
def unit = units[0] |
|
77 |
|
|
78 |
String forme = null; |
|
79 |
if (prop == null) { // word_property is the analec unit property to use |
|
80 |
forme = unit.getProp(word_property) |
|
81 |
} else { |
|
82 |
int[] pos = null; |
|
83 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
84 |
else pos = unit.getDeb()..unit.getFin() |
|
85 |
|
|
86 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
87 |
} |
|
88 |
|
|
89 |
if (!freqs.containsKey(forme)) freqs[forme] = 0; |
|
90 |
|
|
91 |
freqs[forme] = freqs[forme] + 1; |
|
92 |
} |
|
93 |
|
|
94 |
println "Index des natures de premier maillon :" |
|
95 |
int max = 0; |
|
96 |
def result = ""; |
|
97 |
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) { |
|
98 |
println "$forme\t"+freqs[forme] |
|
99 |
if (max < freqs[forme]) { |
|
100 |
max = freqs[forme] |
|
101 |
result = "$forme: "+freqs[forme] |
|
102 |
} |
|
103 |
} |
|
104 |
|
|
105 |
["result": result, "data": freqs] |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/exploit/NumberOfSchemaMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.prototypes.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.macro.urs.AnalecUtils |
|
15 |
|
|
16 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
17 |
println "Corpora selection is not a Corpus" |
|
18 |
return; |
|
19 |
} |
|
20 |
|
|
21 |
// BEGINNING OF PARAMETERS |
|
22 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
23 |
String schema_ursql |
|
24 |
|
|
25 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
26 |
int minimum_schema_size |
|
27 |
|
|
28 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
29 |
debug |
|
30 |
|
|
31 |
if (!ParametersDialog.open(this)) return; |
|
32 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
33 |
|
|
34 |
|
|
35 |
CQPCorpus corpus = corpusViewSelection |
|
36 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
37 |
|
|
38 |
// check Schema parameters |
|
39 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
40 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
41 |
return; |
|
42 |
} |
|
43 |
|
|
44 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
45 |
|
|
46 |
int nSchemas = schemas.size(); |
|
47 |
|
|
48 |
println "Nombre de chaînes de référence d'un texte : $nSchemas" |
|
49 |
|
|
50 |
["result":nSchemas, "data":schemas] |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/exploit/GrammaticalCategoryMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.prototypes.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.* |
|
16 |
import org.txm.searchengine.cqp.corpus.* |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
|
|
21 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
22 |
String schema_ursql |
|
23 |
|
|
24 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
25 |
int minimum_schema_size |
|
26 |
|
|
27 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
28 |
String schema_display_property_name |
|
29 |
|
|
30 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
31 |
String unit_ursql |
|
32 |
|
|
33 |
@Field @Option(name="property", usage="", widget="String", required=false, def="CATEGORIE") |
|
34 |
String property |
|
35 |
|
|
36 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
37 |
debug |
|
38 |
|
|
39 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
40 |
println "Corpora selection is not a Corpus" |
|
41 |
return; |
|
42 |
} |
|
43 |
|
|
44 |
// Open the parameters input dialog box |
|
45 |
if (!ParametersDialog.open(this)) return; |
|
46 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
47 |
|
|
48 |
// END OF PARAMETERS |
|
49 |
|
|
50 |
MainCorpus corpus = corpusViewSelection |
|
51 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
52 |
|
|
53 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
54 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
55 |
return; |
|
56 |
} |
|
57 |
|
|
58 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
59 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
60 |
return; |
|
61 |
} |
|
62 |
|
|
63 |
def CQI = CQPSearchEngine.getCqiClient() |
|
64 |
|
|
65 |
def prop = corpus.getProperty(property) |
|
66 |
if (prop == null) { |
|
67 |
analecCorpus.getStructure() |
|
68 |
} |
|
69 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
70 |
def allFreqs = [:] |
|
71 |
def n = 0 |
|
72 |
for (def schema : schemas) { |
|
73 |
n++ |
|
74 |
|
|
75 |
def freqs = [:] |
|
76 |
|
|
77 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
78 |
|
|
79 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
80 |
|
|
81 |
for (def unit : units) { // no need to sort units |
|
82 |
|
|
83 |
String forme = null; |
|
84 |
if (prop == null) { // property is the analec unit property to use |
|
85 |
forme = unit.getProp(property) |
|
86 |
} else { |
|
87 |
int[] pos = null; |
|
88 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
89 |
else pos = unit.getDeb()..unit.getFin() |
|
90 |
|
|
91 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
92 |
} |
|
93 |
|
|
94 |
if (!freqs.containsKey(forme)) freqs[forme] = 0; |
|
95 |
freqs[forme] = freqs[forme] + 1; |
|
96 |
|
|
97 |
if (!allFreqs.containsKey(forme)) allFreqs[forme] = 0; |
|
98 |
allFreqs[forme] = allFreqs[forme] + 1; |
|
99 |
} |
|
100 |
|
|
101 |
if (schema_display_property_name != null) { |
|
102 |
println "Index des natures de $unit_ursql de '"+schema.getProp(schema_display_property_name)+"' : " |
|
103 |
} else { |
|
104 |
println "Index des natures de $schema_ursql - $n : " |
|
105 |
} |
|
106 |
|
|
107 |
int max = 0; |
|
108 |
def result = ""; |
|
109 |
for (def forme : freqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) { |
|
110 |
println forme.key+"\t"+forme.value |
|
111 |
} |
|
112 |
} |
|
113 |
|
|
114 |
int max = 0; |
|
115 |
def result = ""; |
|
116 |
|
|
117 |
println "Index des natures de $schema_ursql : " |
|
118 |
for (def forme : allFreqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) { |
|
119 |
println forme.key+"\t"+forme.value |
|
120 |
if (max < forme.value) { |
|
121 |
max = forme.value |
|
122 |
result = "$forme: "+forme.value |
|
123 |
} |
|
124 |
} |
|
125 |
|
|
126 |
return ["result":result, "data":allFreqs] |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/CompUnitPropertiesMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.Toolbox; |
|
12 |
import org.txm.rcp.swt.widget.parameters.* |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
17 |
import visuAnalec.donnees.Structure; |
|
18 |
import visuAnalec.elements.Unite; |
|
19 |
|
|
20 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
21 |
println "Corpora selection is not a Corpus" |
|
22 |
return; |
|
23 |
} |
|
24 |
|
|
25 |
// BEGINNING OF PARAMETERS |
|
26 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
27 |
String unit_type |
|
28 |
|
|
29 |
@Field @Option(name="print_diff",usage="", widget="Boolean", required=true, def="true") |
|
30 |
boolean print_diff |
|
31 |
|
|
32 |
@Field @Option(name="unit_property_name1", usage="", widget="String", required=false, def="CATEGORIE") |
|
33 |
String unit_property_name1 |
|
34 |
|
|
35 |
@Field @Option(name="unit_property_name2", usage="", widget="String", required=false, def="CATEGORIE_ORIG") |
|
36 |
String unit_property_name2 |
|
37 |
|
|
38 |
if (!ParametersDialog.open(this)) return; |
|
39 |
|
|
40 |
int n = 1; |
|
41 |
int nDiff = 0; |
|
42 |
MainCorpus corpus = corpusViewSelection |
|
43 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
44 |
def word = corpus.getWordProperty() |
|
45 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
46 |
|
|
47 |
def units = analecCorpus.getUnites(unit_type) |
|
48 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
49 |
for (Unite unit : units) { |
|
50 |
int[] pos = null |
|
51 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
52 |
else pos = (unit.getDeb()..unit.getFin()) |
|
53 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
|
54 |
def props = unit.getProps() |
|
55 |
def v1 = props.get(unit_property_name1); |
|
56 |
def v2 = props.get(unit_property_name2); |
|
57 |
|
|
58 |
if (v1 != v2) { |
|
59 |
if (print_diff) println "$n - ${unit.getDeb()} -> ${unit.getFin()} - $props : $form" |
|
60 |
nDiff++ |
|
61 |
} |
|
62 |
n++ |
|
63 |
} |
|
64 |
|
|
65 |
if (nDiff == 0) println "$unit_property_name1 and $unit_property_name2 have the same values." |
|
66 |
else println "$unit_property_name1 and $unit_property_name2 have $nDiff/$n different values." |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/RelationsListMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.apache.tools.ant.types.resources.selectors.InstanceOf; |
|
10 |
import org.kohsuke.args4j.* |
|
11 |
|
|
12 |
import groovy.transform.Field |
|
13 |
|
|
14 |
import org.txm.Toolbox; |
|
15 |
import org.txm.rcp.swt.widget.parameters.* |
|
16 |
import org.txm.annotation.urs.* |
|
17 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
20 |
|
|
21 |
import visuAnalec.donnees.Structure; |
|
22 |
import visuAnalec.elements.Relation |
|
23 |
import visuAnalec.elements.Unite; |
|
24 |
|
|
25 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
26 |
println "Corpora selection is not a Corpus" |
|
27 |
return; |
|
28 |
} |
|
29 |
|
|
30 |
// BEGINNING OF PARAMETERS |
|
31 |
@Field @Option(name="relation_type",usage="", widget="String", required=true, def="ANAPHORE") |
|
32 |
String relation_type |
|
33 |
|
|
34 |
if (!ParametersDialog.open(this)) return; |
|
35 |
|
|
36 |
MainCorpus corpus = corpusViewSelection |
|
37 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
38 |
def word = corpus.getWordProperty() |
|
39 |
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(corpus); |
|
40 |
|
|
41 |
int n = 1; |
|
42 |
def relations = null |
|
43 |
if (relation_type.length() > 0) { |
|
44 |
relations = [] |
|
45 |
for (String type : analecCorpus.getStructure().getTypes(Relation.class)) |
|
46 |
relations.addAll(analecCorpus.getRelations(type)) |
|
47 |
} else { |
|
48 |
relations = analecCorpus.getToutesRelations() |
|
49 |
} |
|
50 |
|
|
51 |
for (Relation relation : relations) { |
|
52 |
def unit1 = relation.getElt1(); |
|
53 |
def unit2 = relation.getElt2(); |
|
54 |
def props = relation.getProps() |
|
55 |
if (unit1 instanceof Unite && unit2 instanceof Unite) { |
|
56 |
int[] pos1 = null |
|
57 |
if (unit1.getDeb() == unit1.getFin()) pos1 = [unit1.getDeb()] |
|
58 |
else pos1 = (unit1.getDeb()..unit1.getFin()) |
|
59 |
def form1 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos1), " ") |
|
60 |
|
|
61 |
int[] pos2 = null |
|
62 |
if (unit2.getDeb() == unit2.getFin()) pos2 = [unit2.getDeb()] |
|
63 |
else pos2 = (unit2.getDeb()..unit2.getFin()) |
|
64 |
def form2 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos2), " ") |
|
65 |
|
|
66 |
println "$n - $props : $form1 -> $form2" |
|
67 |
} else { |
|
68 |
println "$n - $props" |
|
69 |
} |
|
70 |
n++ |
|
71 |
} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/UnitTypesInSchemaMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE") |
|
21 |
String schema_type |
|
22 |
|
|
23 |
if (!ParametersDialog.open(this)) return; |
|
24 |
|
|
25 |
MainCorpus corpus = corpusViewSelection |
|
26 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
27 |
def map = new HashMap() |
|
28 |
def unitesInSchema = [] |
|
29 |
def n = 0 |
|
30 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
31 |
def unites = schema.getUnitesSousjacentes() |
|
32 |
unitesInSchema.addAll(unites) |
|
33 |
n += unites.size() |
|
34 |
} |
|
35 |
|
|
36 |
def counts = unitesInSchema.countBy() { it }; |
|
37 |
for (def c : counts.keySet()) { |
|
38 |
if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()} |
|
39 |
} |
|
40 |
|
|
41 |
def set = new HashSet() |
|
42 |
set.addAll(unitesInSchema) |
|
43 |
for (def s : set.collect { it.getType() }) { |
|
44 |
if (!map.containsKey(s)) map[s] = 0; |
|
45 |
map[s] = map[s] +1 |
|
46 |
} |
|
47 |
|
|
48 |
println "Unites types: "+map.sort() { it -> map[it]} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/UnitTypesNotInSchemaMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE") |
|
21 |
String schema_type |
|
22 |
if (!ParametersDialog.open(this)) return; |
|
23 |
|
|
24 |
MainCorpus corpus = corpusViewSelection |
|
25 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
26 |
|
|
27 |
def unitesInSchema = new HashSet() |
|
28 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
29 |
unitesInSchema.addAll(schema.getUnitesSousjacentes()) |
|
30 |
} |
|
31 |
println "unites: "+analecCorpus.getToutesUnites().size() |
|
32 |
println "unites in schema: "+unitesInSchema.size() |
|
33 |
|
|
34 |
def set = new HashMap() |
|
35 |
for (def u : analecCorpus.getToutesUnites()) { |
|
36 |
if (unitesInSchema.contains(u)) continue; |
|
37 |
|
|
38 |
if (!set.containsKey(u.getType())) set[u.getType()] = 0; |
|
39 |
set[u.getType()] = set[u.getType()] +1 |
|
40 |
} |
|
41 |
|
|
42 |
println "unites not in schema: "+set.sort() { it -> set[it]} |
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/UnitsProgressionMacro.groovy (revision 3288) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import java.util.ArrayList; |
|
9 |
import java.util.List; |
|
10 |
|
|
11 |
import org.apache.commons.lang.StringUtils |
|
12 |
import org.jfree.chart.JFreeChart |
|
13 |
import org.jfree.chart.plot.XYPlot |
|
14 |
import org.kohsuke.args4j.* |
|
15 |
|
|
16 |
import groovy.transform.Field |
|
17 |
|
|
18 |
import org.txm.Toolbox |
|
19 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
20 |
import org.txm.progression.core.functions.Progression |
|
21 |
import org.txm.rcp.swt.widget.parameters.* |
|
22 |
import org.txm.annotation.urs.* |
|
23 |
import org.txm.chartsengine.rcp.editors.ChartEditor |
|
24 |
import org.txm.macro.urs.AnalecUtils |
|
25 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
26 |
import org.txm.searchengine.cqp.corpus.* |
|
27 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
28 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
29 |
import org.txm.rcp.Application |
|
30 |
import org.txm.rcp.IImageKeys |
|
31 |
|
|
32 |
import visuAnalec.donnees.Structure |
|
33 |
import visuAnalec.elements.* |
|
34 |
|
|
35 |
def scriptName = this.class.getSimpleName() |
|
36 |
def parent |
|
37 |
def selection = [] |
|
38 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
39 |
println "** $scriptName please select a Corpus to run the macro" |
|
40 |
} |
|
41 |
selection << corpusViewSelection |
|
42 |
parent = corpusViewSelection |
|
43 |
|
|
44 |
// BEGINNING OF PARAMETERS |
|
45 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE") |
|
46 |
String schema_ursql |
|
47 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
48 |
int minimum_schema_size |
|
49 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
50 |
int maximum_schema_size |
|
51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
52 |
String unit_ursql |
|
53 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
54 |
int limit_distance_in_schema |
|
55 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
56 |
limit_cql |
|
57 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
58 |
boolean strict_inclusion |
|
59 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
60 |
int limit_distance |
|
61 |
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=true, def="CATEGORIE") |
|
62 |
String unit_property_display |
|
63 |
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div") |
|
64 |
String struct_name |
|
65 |
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n") |
|
66 |
String struct_prop |
|
67 |
@Field @Option(name="line_width", usage="line width", widget="Integer", required=true, def="1") |
|
68 |
int line_width = 2 |
|
69 |
@Field @Option(name="bande_width", usage="bande width", widget="Float", required=true, def="1.0f") |
|
70 |
float bande_width = 1.0f |
|
71 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
72 |
debug |
|
73 |
if (!ParametersDialog.open(this)) return |
|
74 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
75 |
|
|
76 |
|
|
77 |
def CQI = CQPSearchEngine.getCqiClient() |
|
78 |
|
|
79 |
def queries = [] |
|
80 |
def queryResults = [] |
|
81 |
def informations = [] |
|
82 |
for (def corpus : selection) { |
|
83 |
|
|
84 |
mainCorpus = corpus.getMainCorpus() |
|
85 |
|
|
86 |
def word = mainCorpus.getWordProperty() |
|
87 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
88 |
|
|
89 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
|
90 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance); |
|
91 |
|
|
92 |
def query = "" |
|
93 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) query += limit_cql |
|
94 |
if (schema_ursql != null && schema_ursql.length() > 0) { if (query.length() > 0) query += " & "; query += ""+schema_ursql+ " >"} |
|
95 |
if (unit_ursql != null && unit_ursql.length() > 0) query += " "+unit_ursql |
|
96 |
query = new CQLQuery(query) |
|
97 |
int[] starts = new int[selectedUnits.size()]; |
|
98 |
int[] ends = new int[selectedUnits.size()]; |
|
99 |
def unitsinformations = [] |
|
100 |
int n = 0; |
|
101 |
for (Unite unite : selectedUnits) { |
|
102 |
starts[n] = unite.getDeb() |
|
103 |
ends[n] = unite.getFin() |
|
104 |
unitsinformations << AnalecUtils.toString(CQI, word, unite); |
|
105 |
n++ |
|
106 |
} |
|
107 |
def queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null) |
|
108 |
queries << query |
|
109 |
queryResults << queryResult |
|
110 |
informations << unitsinformations |
|
111 |
|
|
112 |
if (unit_property_display != null && unit_property_display.length() > 0) { |
|
113 |
def propvalues = [:] |
|
114 |
for (def unit : selectedUnits) { |
|
115 |
def v = unit.getProp(unit_property_display) |
|
116 |
if (v == null) v = "<null>" |
|
117 |
else if (v.length() == 0) v = "<empty>" |
|
118 |
|
Formats disponibles : Unified diff