Révision 2032
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/BuildDemocratCorpusVersion1Macro.groovy (revision 2032) | ||
|---|---|---|
| 66 | 66 |
|
| 67 | 67 |
def xpathProperties = new Properties() |
| 68 | 68 |
xpathProperties.load(IOUtils.getReader(xpathFile)) |
| 69 |
// add default xpaths for measures |
|
| 70 |
for (def s : ["number of words", "number of coreference chains", "number of referring chains", "number of referring expressions"]) {
|
|
| 71 |
if (!xpathProperties.containsKey(s)) {
|
|
| 72 |
xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity") |
|
| 73 |
} |
|
| 74 |
} |
|
| 75 | 69 |
|
| 76 | 70 |
println "xpath properties: "+xpathProperties.keySet() |
| 77 | 71 |
|
| ... | ... | |
| 154 | 148 |
|
| 155 | 149 |
|
| 156 | 150 |
println " compute measures..." |
| 157 |
def data = new ComputeDemocratMeasureMetadata().getStats(corpus) |
|
| 151 |
def data = new ComputeDemocratMeasureMetadata().getStats(corpus, xpathProperties)
|
|
| 158 | 152 |
|
| 159 | 153 |
Metadata2TEiHeader mthm = new Metadata2TEiHeader(debug); |
| 160 | 154 |
for (def h : header) data[h] = csvReader.get(h) |
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/ComputeDemocratMeasureMetadata.groovy (revision 2032) | ||
|---|---|---|
| 1 | 1 |
package org.txm.macro.urs.democrat |
| 2 | 2 |
|
| 3 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
| 3 | 4 |
import org.txm.searchengine.cqp.corpus.MainCorpus |
| 5 |
import org.txm.searchengine.cqp.corpus.StructuralUnit |
|
| 4 | 6 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
| 5 | 7 |
import visuAnalec.donnees.Corpus |
| 6 | 8 |
import org.txm.annotation.urs.URSCorpora |
| 9 |
import org.txm.properties.core.functions.* |
|
| 7 | 10 |
|
| 8 | 11 |
class ComputeDemocratMeasureMetadata {
|
| 9 |
def getStats(MainCorpus corpus) {
|
|
| 12 |
|
|
| 13 |
def ignored = ["txmcorpus", "text", "body"] |
|
| 14 |
|
|
| 15 |
def getStats(MainCorpus corpus, def xpathProperties) {
|
|
| 10 | 16 |
def data = [:] |
| 11 | 17 |
|
| 12 | 18 |
data["number of words"] = corpus.query(new CQLQuery("[]"), "TMP", false).getNMatch();
|
| ... | ... | |
| 23 | 29 |
data["number of referring chains"] = refchaines.size() |
| 24 | 30 |
data["number of referring expressions"] = analecCorpus.getUnites("MENTION").size()
|
| 25 | 31 |
|
| 32 |
// add default xpaths for measures |
|
| 33 |
for (String s : ["number of words", "number of coreference chains", "number of referring chains", "number of referring expressions"]) {
|
|
| 34 |
if (!xpathProperties.containsKey(s)) {
|
|
| 35 |
xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity") |
|
| 36 |
} |
|
| 37 |
} |
|
| 38 |
|
|
| 39 |
// add structure measures |
|
| 40 |
for (StructuralUnit su : corpus.getStructuralUnits()) {
|
|
| 41 |
if (!ignored.contains(su.getName())) {
|
|
| 42 |
String s = "structure "+su.getName() |
|
| 43 |
data[s] = corpus.query(new CQLQuery("<"+su.getName()+">[]"), "TMP", false).getNMatch();
|
|
| 44 |
if (!xpathProperties.containsKey(s)) {
|
|
| 45 |
xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity") |
|
| 46 |
} |
|
| 47 |
} |
|
| 48 |
} |
|
| 49 |
|
|
| 26 | 50 |
return data |
| 27 | 51 |
} |
| 28 | 52 |
} |
Formats disponibles : Unified diff