Révision 2032

tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/BuildDemocratCorpusVersion1Macro.groovy (revision 2032)
66 66

  
67 67
def xpathProperties = new Properties()
68 68
xpathProperties.load(IOUtils.getReader(xpathFile))
69
// add default xpaths for measures
70
for (def s : ["number of words", "number of coreference chains", "number of referring chains", "number of referring expressions"]) {
71
	if (!xpathProperties.containsKey(s)) {
72
		xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity")
73
	}
74
}
75 69

  
76 70
println "xpath properties: "+xpathProperties.keySet()
77 71

  
......
154 148
	
155 149
	
156 150
	println " compute measures..."
157
	def data = new ComputeDemocratMeasureMetadata().getStats(corpus)
151
	def data = new ComputeDemocratMeasureMetadata().getStats(corpus, xpathProperties)
158 152
	
159 153
	Metadata2TEiHeader mthm = new Metadata2TEiHeader(debug);
160 154
	for (def h : header) data[h] = csvReader.get(h)
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/ComputeDemocratMeasureMetadata.groovy (revision 2032)
1 1
package org.txm.macro.urs.democrat
2 2

  
3
import org.txm.searchengine.cqp.CQPSearchEngine
3 4
import org.txm.searchengine.cqp.corpus.MainCorpus
5
import org.txm.searchengine.cqp.corpus.StructuralUnit
4 6
import org.txm.searchengine.cqp.corpus.query.CQLQuery
5 7
import visuAnalec.donnees.Corpus
6 8
import org.txm.annotation.urs.URSCorpora
9
import org.txm.properties.core.functions.*
7 10

  
8 11
class ComputeDemocratMeasureMetadata {
9
	def getStats(MainCorpus corpus) {
12
	
13
	def ignored = ["txmcorpus", "text", "body"]
14
	
15
	def getStats(MainCorpus corpus, def xpathProperties) {
10 16
		def data = [:]
11 17
		
12 18
		data["number of words"] = corpus.query(new CQLQuery("[]"), "TMP", false).getNMatch();
......
23 29
		data["number of referring chains"] = refchaines.size()
24 30
		data["number of referring expressions"] = analecCorpus.getUnites("MENTION").size()
25 31
		
32
		// add default xpaths for measures
33
		for (String s : ["number of words", "number of coreference chains", "number of referring chains", "number of referring expressions"]) {
34
			if (!xpathProperties.containsKey(s)) {
35
				xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity")
36
			}
37
		}
38
		
39
		// add structure measures
40
		for (StructuralUnit su : corpus.getStructuralUnits()) {
41
			if (!ignored.contains(su.getName())) {
42
				String s = "structure "+su.getName()
43
				data[s] = corpus.query(new CQLQuery("<"+su.getName()+">[]"), "TMP", false).getNMatch();
44
				if (!xpathProperties.containsKey(s)) {
45
					xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity")
46
				}
47
			}
48
		}
49
		
26 50
		return data
27 51
	}
28 52
}

Formats disponibles : Unified diff