Révision 2032
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/BuildDemocratCorpusVersion1Macro.groovy (revision 2032) | ||
---|---|---|
66 | 66 |
|
67 | 67 |
def xpathProperties = new Properties() |
68 | 68 |
xpathProperties.load(IOUtils.getReader(xpathFile)) |
69 |
// add default xpaths for measures |
|
70 |
for (def s : ["number of words", "number of coreference chains", "number of referring chains", "number of referring expressions"]) { |
|
71 |
if (!xpathProperties.containsKey(s)) { |
|
72 |
xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity") |
|
73 |
} |
|
74 |
} |
|
75 | 69 |
|
76 | 70 |
println "xpath properties: "+xpathProperties.keySet() |
77 | 71 |
|
... | ... | |
154 | 148 |
|
155 | 149 |
|
156 | 150 |
println " compute measures..." |
157 |
def data = new ComputeDemocratMeasureMetadata().getStats(corpus) |
|
151 |
def data = new ComputeDemocratMeasureMetadata().getStats(corpus, xpathProperties)
|
|
158 | 152 |
|
159 | 153 |
Metadata2TEiHeader mthm = new Metadata2TEiHeader(debug); |
160 | 154 |
for (def h : header) data[h] = csvReader.get(h) |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/ComputeDemocratMeasureMetadata.groovy (revision 2032) | ||
---|---|---|
1 | 1 |
package org.txm.macro.urs.democrat |
2 | 2 |
|
3 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
3 | 4 |
import org.txm.searchengine.cqp.corpus.MainCorpus |
5 |
import org.txm.searchengine.cqp.corpus.StructuralUnit |
|
4 | 6 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
5 | 7 |
import visuAnalec.donnees.Corpus |
6 | 8 |
import org.txm.annotation.urs.URSCorpora |
9 |
import org.txm.properties.core.functions.* |
|
7 | 10 |
|
8 | 11 |
class ComputeDemocratMeasureMetadata { |
9 |
def getStats(MainCorpus corpus) { |
|
12 |
|
|
13 |
def ignored = ["txmcorpus", "text", "body"] |
|
14 |
|
|
15 |
def getStats(MainCorpus corpus, def xpathProperties) { |
|
10 | 16 |
def data = [:] |
11 | 17 |
|
12 | 18 |
data["number of words"] = corpus.query(new CQLQuery("[]"), "TMP", false).getNMatch(); |
... | ... | |
23 | 29 |
data["number of referring chains"] = refchaines.size() |
24 | 30 |
data["number of referring expressions"] = analecCorpus.getUnites("MENTION").size() |
25 | 31 |
|
32 |
// add default xpaths for measures |
|
33 |
for (String s : ["number of words", "number of coreference chains", "number of referring chains", "number of referring expressions"]) { |
|
34 |
if (!xpathProperties.containsKey(s)) { |
|
35 |
xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity") |
|
36 |
} |
|
37 |
} |
|
38 |
|
|
39 |
// add structure measures |
|
40 |
for (StructuralUnit su : corpus.getStructuralUnits()) { |
|
41 |
if (!ignored.contains(su.getName())) { |
|
42 |
String s = "structure "+su.getName() |
|
43 |
data[s] = corpus.query(new CQLQuery("<"+su.getName()+">[]"), "TMP", false).getNMatch(); |
|
44 |
if (!xpathProperties.containsKey(s)) { |
|
45 |
xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity") |
|
46 |
} |
|
47 |
} |
|
48 |
} |
|
49 |
|
|
26 | 50 |
return data |
27 | 51 |
} |
28 | 52 |
} |
Formats disponibles : Unified diff