Révision 3351

TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/URSUnits2CQPWordsMacro.groovy (revision 3351)
19 19
}
20 20

  
21 21
@Field @Option(name="unit_type", usage="Corpus name in uppercase", widget="String", required=true, def="Entity")
22
String unit_type
22
		String unit_type
23 23

  
24
@Field @Option(name="unit_properties", usage="Corpus name in uppercase", widget="String", required=true, def="Property")
25
String unit_properties
24
@Field @Option(name="unit_properties", usage=" * or comma separated list of properties names", widget="String", required=true, def="Property")
25
		String unit_properties
26
		
27
@Field @Option(name="unit_internal_properties", usage="Corpus name in uppercase", widget="Boolean", required=true, def="false")
28
def unit_internal_properties
26 29

  
27
@Field @Option(name="unit_word", usage="Corpus name in uppercase", widget="StringArray", metaVar="START	END", required=true, def="START")
28
String unit_word
30
@Field @Option(name="unit_word", usage="CQP word position to annotate", widget="StringArray", metaVar="START	END", required=true, def="START")
31
		String unit_word
29 32

  
30 33
if (!ParametersDialog.open(this)) return
31 34

  
32
MainCorpus corpus = corpusViewSelection
35
def corpus = corpusViewSelection
33 36
def analecCorpus = URSCorpora.getCorpus(corpus)
34 37

  
35 38
def texts = corpus.getCorpusTextIdsList();
......
39 42
for (int i = 0 ; i < texts.size() ; i++) {
40 43
	
41 44
	println "Processing annotations of "+texts[i]+"..."
42

  
45
	
43 46
	def text_id = texts[i]
44 47
	def text_start = texts_startlimits[i]
45 48
	def text_end = texts_endlimits[i]
46

  
49
	
47 50
	File xmltxmFile = new File(corpus.getProject().getProjectDirectory(), "txm/"+corpus.getID()+"/"+text_id+".xml")
48 51
	File xmltxmFileCopy = new File(corpus.getProject().getProjectDirectory(), text_id+"_copy.xml")
49

  
52
	
50 53
	if (!xmltxmFile.exists()) {
51 54
		println "Warning: no text file found: "+xmltxmFile
52 55
		continue
53 56
	}
54 57
	
55
	for (String uType : analecCorpus.getStructure().getTypes(Unite.class)) {
56
		def corpus_units = []
57
		ArrayList<Unite> all_units = analecCorpus.getUnites(uType)
58
		corpus_units = all_units.findAll() {
59
			unit_type.equals(it.getProp("type")) && text_start <= it.getDeb() && it.getFin() < text_end
60
		}
61
		
62
		if (corpus_units.size() == 0) continue;
63
		
64
		try {
65
			println " processing word Units ${text_id} and its '$uType' units ("+corpus_units.size()+")"
66
			WordUnitsInserter inserter = new WordUnitsInserter(corpus, xmltxmFile, corpus_units, unit_properties, unit_word);
67
			if (inserter.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) {
68
				xmltxmFile.delete()
69
				xmltxmFileCopy.renameTo(xmltxmFile)
70

  
71
				println "Done, "+corpus_units.size()+ " units written"
72

  
73
			} else {
74
				println "Error while processing the XML-TXM $xmltxmFile file"
75
				File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
76
				error.getParentFile().mkdirs()
77
				println "	moving created file to $error"
78
				error.delete()
79
				xmltxmFileCopy.renameTo(error)
80
			}
81
		} catch(Exception e) {
82
			println "Error while processing milestones $xmltxmFile file: "+e
83
			e.printStackTrace();
58
	def corpus_units = []
59
	ArrayList<Unite> all_units = analecCorpus.getUnites(unit_type)
60
	corpus_units = all_units.findAll() {
61
		text_start <= it.getDeb() && it.getFin() < text_end
62
	}
63
	
64
	if (corpus_units.size() == 0) {
65
		println " No '$unit_type' unit found."
66
		continue;
67
	}
68
	
69
	try {
70
		println " Processing word Units ${text_id} and its '$unit_type' units ("+corpus_units.size()+")"
71
		WordUnitsInserter inserter = new WordUnitsInserter(corpus, xmltxmFile, corpus_units, unit_properties, unit_word, unit_internal_properties);
72
		if (inserter.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) {
73
			xmltxmFile.delete()
74
			xmltxmFileCopy.renameTo(xmltxmFile)
75
			
76
			println "Done, "+corpus_units.size()+ " units written"
77
			
78
		} else {
79
			println "Error while processing the XML-TXM $xmltxmFile file"
84 80
			File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
85 81
			error.getParentFile().mkdirs()
86 82
			println "	moving created file to $error"
87 83
			error.delete()
88 84
			xmltxmFileCopy.renameTo(error)
89 85
		}
86
	} catch(Exception e) {
87
		println "Error while processing milestones $xmltxmFile file: "+e
88
		e.printStackTrace();
89
		File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
90
		error.getParentFile().mkdirs()
91
		println "	moving created file to $error"
92
		error.delete()
93
		xmltxmFileCopy.renameTo(error)
90 94
	}
91 95
}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/WordUnitsInserter.groovy (revision 3351)
21 21
	File inputFile
22 22
	def id2Units = [:]
23 23
	def unit_properties // "*" or list (String)
24
	def unit_internal_properties = false
24 25
	def unit_word // ALL START END
25 26
	LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>();
26 27

  
27
	public WordUnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units, String unit_properties, String unit_word) {
28
	public WordUnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units, String unit_properties, String unit_word, boolean unit_internal_properties) {
28 29
		super(inputFile)
29 30
		this.inputFile = inputFile
30 31
		
32
		this.unit_internal_properties = unit_internal_properties
31 33
		this.unit_properties = unit_properties
32 34
		this.unit_word = unit_word
33 35
		if (!("*".equals(unit_properties))) {
......
84 86
						
85 87
						if (!anaValues.containsKey(p)) {
86 88
							anaValues[p] = ""
87
							ana_resp = "#txm"
89
							ana_resp = "#urs"
88 90
						}
89 91
						anaValues[p] = (anaValues[p]+" "+props.get(p)).trim()
90 92
					}
93
					if (unit_internal_properties) {
94
	
95
						anaValues["urs-type"] = u.getType()
96
						anaValues["urs-start"] = ""+u.getDeb()
97
						anaValues["urs-end"] = ""+u.getFin()
98
					}
91 99
				}
92 100
				if (anaValues.size() > 0) println anaValues
93 101
			}

Formats disponibles : Unified diff