Révision 476

tmp/org.txm.oriflamms.rcp/.classpath (revision 476)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
5
		<accessrules>
6
			<accessrule kind="accessible" pattern="**"/>
7
		</accessrules>
8
	</classpathentry>
9
	<classpathentry kind="src" path="src"/>
10
	<classpathentry kind="src" path="res"/>
11
	<classpathentry kind="output" path="bin"/>
12
</classpath>
0 13

  
tmp/org.txm.oriflamms.rcp/META-INF/MANIFEST.MF (revision 476)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: Oriflamms
4
Bundle-SymbolicName: Oriflamms;singleton:=true
5
Bundle-Version: 1.0.0.qualifier
6
Bundle-Activator: oriflamms.Activator
7
Require-Bundle: org.txm.core;bundle-version="0.7.0",
8
 org.txm.rcp,
9
 org.eclipse.ui,
10
 org.eclipse.core.runtime,
11
 org.txm.searchengine.cqp.core,
12
 org.txm.utils
13
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
14
Bundle-ActivationPolicy: lazy
15
Bundle-Vendor: Textometrie.org
0 16

  
tmp/org.txm.oriflamms.rcp/.project (revision 476)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>OriflammsRCP</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/UpdateCorpusImagePaths.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.FileFilter;
6
import java.io.IOException;
7

  
8
import javax.xml.stream.XMLStreamException;
9

  
10
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
11
import org.txm.searchengine.cqp.corpus.MainCorpus;
12
import org.txm.searchengine.cqp.serverException.CqiServerError;
13

  
14
public class UpdateCorpusImagePaths {
15
	
16
	public boolean process(MainCorpus corpus, String newpathprefix) throws CqiClientException, IOException, CqiServerError, XMLStreamException {
17

  
18
		File binDir = corpus.getBaseDirectory();
19
		File htmlDir = new File(binDir, "HTML");
20
		if (!htmlDir.exists()) {
21
			System.out.println("Error: no html directory: "+htmlDir);
22
			return false;
23
		}
24
		File corpushtmlDir = new File(htmlDir, corpus.getName());
25
		if (!corpushtmlDir.exists()) {
26
			System.out.println("Error: no html corpus directory: "+corpushtmlDir);
27
			return false;
28
		}
29
		File corpusdefaulthtmlDir = new File(corpushtmlDir, "facsimile");
30
		if (!corpusdefaulthtmlDir.exists()) {
31
			System.out.println("Error: no html 'facsimile' corpus directory: "+corpusdefaulthtmlDir);
32
			return false;
33
		}
34
		if (newpathprefix == null) {
35
			newpathprefix = corpusdefaulthtmlDir.getAbsolutePath()+"/images/";
36
			System.out.println("No image path prefix specified, using corpus 'facsimile' HTML directory path: "+newpathprefix);
37
		}
38
		
39
		File[] files = corpusdefaulthtmlDir.listFiles(new FileFilter() {
40
			@Override
41
			public boolean accept(File f) {
42
				return f.isFile() && f.getName().endsWith(".html") && !f.isHidden();
43
			}
44
		});
45
		
46
		if (files == null || files.length == 0) {
47
			System.out.println("Error: no html files in HTML default corpus directory: "+corpusdefaulthtmlDir);
48
			return false;
49
		}
50
		
51
		for (File htmlFile : files) {
52
			UpdateHTMLFileImagePaths uhfip = new UpdateHTMLFileImagePaths(htmlFile, newpathprefix);
53
			File outhtmlFile = new File(htmlFile.getAbsolutePath()+".tmp");
54
			if (!uhfip.process(outhtmlFile)) {
55
				System.out.println("Fail to process HTML file: "+htmlFile);
56
				return false;
57
			}
58
			
59
			if (htmlFile.delete() && outhtmlFile.renameTo(htmlFile)) {
60
				//ok
61
			} else {
62
				System.out.println("Fail to replace HTML file: "+htmlFile+" with "+outhtmlFile);
63
				return false;
64
			}
65
		}
66
		return true;
67
	}
68
}
0 69

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/BuildAllProjects.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.util.HashMap;
6

  
7
public class BuildAllProjects {
8
	public BuildAllProjects(File projectsDirectory, File xslDirectory) {
9

  
10
		HashMap<String, HashMap> todo = new HashMap<String, HashMap>();
11
		
12
		HashMap c1 = new HashMap<>();
13
		//["xmlFile":new File(projectsDirectory, "Charrette_Ms_A.xml"),xslFile:new File(projectsDirectory, "oriflamms-convert-mss-dates-oriflammsxml.xsl"),imageDirectory:new File(projectsDirectory, "images"), createArchive:false],
14
		todo.put("CHARETTE", c1);
15
		HashMap c2 = new HashMap<>();
16
		//"CHARETTEBIS":["xmlFile":new File(projectsDirectory, "Charrette_Ms_A.xml"),xslFile:new File(projectsDirectory, "oriflamms-convert-mss-dates-oriflammsxml.xsl"),imageDirectory:new File(projectsDirectory, "images"), createArchive:false],
17
		todo.put("CHARETTEBIS", c2);
18
		
19
		for (String k : todo.keySet()) {
20
			System.out.println("*** BUILD "+k+" ***");
21
			try {
22
				//TEI2Project p = new TEI2Project();
23
			} catch(Exception e) { 
24
				System.out.println( "ERROR WHILE PROCESSING "+k+": "+e); 
25
				e.printStackTrace();
26
			}
27
		} 
28
	}
29
	
30
	public static void main(String[] args) {
31
		File projectsDirectory = new File("/home/mdecorde/TEMP/testori/corpus/");
32
		File xslDirectory = new File("/home/mdecorde/TXM/scripts/macro/org/txm/macro/oriflamms/prepare");
33
		BuildAllProjects bp = new BuildAllProjects(projectsDirectory, xslDirectory);
34
		
35
	}
36
}
37

  
0 38

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/AbbreviationsAndLines.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.IOException;
6

  
7
import org.txm.Toolbox;
8
import org.txm.searchengine.cqp.AbstractCqiClient;
9
import org.txm.searchengine.cqp.CQPEngine;
10
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
11
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
12
import org.txm.searchengine.cqp.corpus.Corpus;
13
import org.txm.searchengine.cqp.serverException.CqiServerError;
14

  
15
public class AbbreviationsAndLines extends OriflammsFunction {
16

  
17
	protected int dist_start;
18
	protected int dist_end;
19

  
20
	public AbbreviationsAndLines(Corpus corpus, File tsvFile, int dist_start, int dist_end) throws CqiClientException, IOException, CqiServerError {
21
		super(corpus, tsvFile);
22
	}
23
	
24
	public boolean process() throws CqiClientException, IOException, CqiServerError {
25

  
26
		dist_start = Math.abs(dist_start);
27
		dist_end = Math.abs(dist_end);
28
		
29
		System.out.println("Dénombrement des abbréviations de "+corpus+" en '"+(wordCorpus?"mots":"lettres")+"' pour des distances au début de "+dist_start+" et à la fin "+dist_end+" de la ligne");
30
		return super.process();
31
	}
32

  
33
	boolean processLine(String text_id, String pb_id, String cb_id, String lb_id, int length, Object[] infos) {
34

  
35
		String[] abbrNs = (String[]) infos[0];
36
		String[] allLetters = (String[]) infos[1];
37
		String[] alignableLetters = (String[]) infos[2];
38
		String[] characters = (String[]) infos[3];
39
		String[] words = (String[]) infos[4];
40
		
41
		int Nabbr = 0, NsupAbbr = 0, Ntotal = 0;
42
		int p2 = length - dist_end;
43
		if (p2 < 0) p2 = length+1;
44
		if (p2 < dist_start) p2 = dist_start;
45

  
46
		for (int i = 0 ; i < length ; i++) {
47

  
48
			if (i == dist_start) {
49
				writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\ts\t"+Nabbr+"\t"+NsupAbbr+"\t"+Ntotal+"\t"+(100.0*(float)NsupAbbr/(float)Ntotal));
50

  
51
				Nabbr = 0;
52
				NsupAbbr = 0;
53
				Ntotal = 0;
54
			} 
55
			if (i == p2) {
56
				writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\tm\t"+Nabbr+"\t"+NsupAbbr+"\t"+Ntotal+"\t"+(100.0*(float)NsupAbbr/(float)Ntotal));
57

  
58
				Nabbr = 0;
59
				NsupAbbr = 0;
60
				Ntotal = 0;
61
			}
62

  
63
			Nabbr += Integer.parseInt(abbrNs[i]);
64
			NsupAbbr += allLetters[i].length() - characters[i].length();
65
			Ntotal += allLetters[i].length();
66

  
67
		}
68
		writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\te\t"+Nabbr+"\t"+NsupAbbr+"\t"+Ntotal+"\t"+(100.0*(float)NsupAbbr/(float)Ntotal));
69
		return true;
70
	}
71

  
72
	Object[] getInfos(int from ,int length) throws UnexpectedAnswerException, IOException, CqiServerError {
73

  
74
		int[] positions = new int[length];
75
		for (int i = 0 ; i < length ; i++) positions[i] = from++;
76
		AbstractCqiClient CQI = CQPEngine.getCqiClient();
77

  
78
		String[] abbrNs = CQI.cpos2Str(abbrn.getQualifiedName(), positions);
79
		String[] allLetters = CQI.cpos2Str(lettersAll.getQualifiedName(), positions);
80
		String[] alignableLetters = CQI.cpos2Str(lettersAlignable.getQualifiedName(), positions);
81
		String[] characters = CQI.cpos2Str(charactersP.getQualifiedName(), positions);
82
		String[] words = CQI.cpos2Str(form.getQualifiedName(), positions);
83

  
84
		String[][] rez = {abbrNs,allLetters,alignableLetters,characters,words};
85
		return rez;
86
	}
87
}
0 88

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/AbbreviationsAndSementics.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.IOException;
6
import java.util.ArrayList;
7
import java.util.HashMap;
8

  
9
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
10
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
11
import org.txm.searchengine.cqp.corpus.Corpus;
12
import org.txm.searchengine.cqp.corpus.StructuralUnit;
13
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty;
14
import org.txm.searchengine.cqp.serverException.CqiServerError;
15

  
16
public class AbbreviationsAndSementics extends OriflammsFunction {
17

  
18
	protected String entities;
19
	protected String[] entitiesArray;
20
	protected ArrayList<StructuralUnitProperty> structures;
21

  
22
	public AbbreviationsAndSementics(Corpus corpus, File tsvFile, String entities) throws CqiClientException, IOException,
23
			CqiServerError {
24
		super(corpus, tsvFile);
25
		
26
		entitiesArray = entities.split(",");
27
		structures = new ArrayList<StructuralUnitProperty>();
28
		for (String structName : entitiesArray) {
29
			StructuralUnit su = corpus.getStructuralUnit(structName);
30
			if (su != null) {
31
				StructuralUnitProperty sup = su.getProperty("n");
32
				if (sup != null)
33
					structures.add(sup);
34
			}
35
		}
36

  
37
	}
38
	
39
	public boolean process() throws CqiClientException, IOException, CqiServerError {
40
		
41
		System.out.println("Dénombrement des abbréviations de "+corpus+" en '"+(wordCorpus?"mots":"lettres")+"' pour les entités "+entities);
42
		if (structures.size() == 0) {
43
			System.out.println("Erreur: pas de structures disponibles pour les entités suivantes "+entities);
44
			return false;
45
		}
46
		return super.process();
47
	}
48

  
49
	Object[] getInfos(int from ,int length) throws UnexpectedAnswerException, IOException, CqiServerError {
50

  
51
		int[] positions = new int[length];
52
		for (int i = 0 ; i < length ; i++) positions[i] = from++;
53

  
54
		String[] abbrNs = CQI.cpos2Str(abbrn.getQualifiedName(), positions);
55
		String[] allLetters = CQI.cpos2Str(lettersAll.getQualifiedName(), positions);
56
		String[] alignableLetters = CQI.cpos2Str(lettersAlignable.getQualifiedName(), positions);
57
		String[] characters = CQI.cpos2Str(charactersP.getQualifiedName(), positions);
58
		String[] words = CQI.cpos2Str(form.getQualifiedName(), positions);
59
		HashMap<String, int[]> structuresPositions = new HashMap<String, int[]>();
60
		for (StructuralUnitProperty structProp : structures) {
61
			structuresPositions.put(structProp.getFullName(), CQI.cpos2Struc(structProp.getQualifiedName(), positions));
62
		}
63

  
64
		Object[] rez = {abbrNs,allLetters,alignableLetters,characters,words, structuresPositions};
65
		return rez;
66
	}
67

  
68
	boolean processLine(String text_id, String pb_id, String cb_id, String lb_id, int length, Object[] infos) {
69

  
70
		String[] abbrNs = (String[]) infos[0];
71
		String[] allLetters = (String[]) infos[1];
72
		String[] alignableLetters = (String[]) infos[2];
73
		String[] characters = (String[]) infos[3];
74
		String[] words = (String[]) infos[4];
75
		HashMap<String, int[]> structuresPositions = (HashMap<String, int[]>) infos[5];
76

  
77
		int NabbrTotal = 0, NsupAbbrTotal = 0, NtotalTotal = 0;
78
		for (int i = 0 ; i < length ; i++) {
79
			NabbrTotal += Integer.parseInt(abbrNs[i]);
80
			NsupAbbrTotal += allLetters[i].length() - characters[i].length();
81
			NtotalTotal += allLetters[i].length();
82
		}
83

  
84
		for (StructuralUnitProperty strutcProp : structures) {
85
			int Nabbr = 0, NsupAbbr = 0, Ntotal = 0;
86
			int[] structureP = structuresPositions.get(strutcProp.getFullName());
87

  
88
			for (int i = 0 ; i < length ; i++) {
89
				if (structureP[i] >= 0) { // the position is in the structure
90
					Nabbr += Integer.parseInt(abbrNs[i]);
91
					NsupAbbr += allLetters[i].length() - characters[i].length();
92
					Ntotal += allLetters[i].length();
93
				}
94
			}
95

  
96
			writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\t"+strutcProp.getStructuralUnit()+"\t"+Nabbr+"\t"+NsupAbbr+"\t"+Ntotal+"\t"+((float)NsupAbbr/(float)Ntotal));
97
			NabbrTotal -= Nabbr;
98
			NsupAbbrTotal -= NsupAbbr;
99
			NtotalTotal -= Ntotal;
100
		}
101

  
102
		int Nabbr = 0, NsupAbbr = 0, Ntotal = 0;	
103
		writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\t#REST\t"+NabbrTotal+"\t"+NsupAbbrTotal+"\t"+NtotalTotal+"\t"+((float)NsupAbbrTotal/(float)NtotalTotal));
104
		return true;
105
	}
106
}
0 107

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/CoordsProjection.java (revision 476)
1
package org.txm.oriflamms.functions;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.util.HashMap;
6

  
7
import javax.xml.stream.XMLStreamException;
8

  
9
import org.txm.importer.StaxIdentityParser;
10
import org.txm.importer.StaxParser;
11

  
12
class CoordsProjection extends StaxIdentityParser {
13

  
14
	File xmlFile;
15
	File img_links_directory;
16
	File zones_directory;
17

  
18
	String wordTag;
19
	String textname;
20
	String milestone;
21

  
22
	String current_img_file = "";
23
	String current_zone_file = "";
24

  
25
	String xmlType;
26
	String group;
27

  
28
	HashMap<String, String[]> zones = new HashMap<String, String[]>();
29
	HashMap<String, String> links = new HashMap<String, String>();
30

  
31
	public CoordsProjection(File xmlFile, File img_links_directory, File zones_directory, String wordTag) throws IOException, XMLStreamException {
32
		super(xmlFile);
33

  
34
		this.xmlFile = xmlFile;
35
		this.img_links_directory = img_links_directory;
36
		this.zones_directory = zones_directory;
37
		this.wordTag = wordTag;
38

  
39
		textname = xmlFile.getName();
40
		int idx = textname.indexOf(".xml");
41
		if (idx > 0) textname = textname.substring(0, idx);
42

  
43
		idx = textname.indexOf("-w");
44
		if (idx > 0) {
45
			textname = textname.substring(0, idx);
46
			xmlType = "word";
47
		}
48

  
49
		idx = textname.indexOf("-c");
50
		if (idx > 0) {
51
			textname = textname.substring(0, idx);
52
			xmlType = "character";
53
		}
54
	}
55

  
56
	public void processStartElement() throws XMLStreamException, IOException {
57
		super.processStartElement();
58
		if (localname.equals("milestone")) {
59
			String id = "";
60
			String unit= "";
61
			for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
62
				if (parser.getAttributeLocalName(i).equals("id")) {
63
					id = parser.getAttributeValue(i);
64
				} else if (parser.getAttributeLocalName(i).equals("unit")) {
65
					unit = parser.getAttributeValue(i);
66
				}
67
			}
68

  
69
			if (unit.equals("surface")) {
70
				milestone = id;
71
			}
72
		} else if (localname.equals(wordTag)) {
73
			String id = "";
74
			for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
75
				if (parser.getAttributeLocalName(i).equals("id")) {
76
					id = parser.getAttributeValue(i);
77
					break;
78
				}
79
			}
80

  
81
			// load next data if needed
82
			String img_file_name = textname+"_"+milestone+"-links.xml";
83
			if (!current_img_file.equals(img_file_name)) { // rebuild hashmaps
84
				String zone_file_name = textname+"_"+milestone+"-zones.xml";
85
				loadNextData(img_file_name, zone_file_name);
86
			}
87

  
88
			//			println "Find coords for word_id="+id+" in "+img_file_name+" and "+zone_file_name
89
			//			println "zone: "+links[id]
90
			//			println "coords: "+zones[links[id]]
91
			if (zones.size() > 0 && links.size() > 0) {
92
				String[] coords = zones.get(links.get(id));
93
				if (coords != null) {
94
					if (coords[0] == null || coords[1] == null || coords[2] == null || coords[3] == null) {
95
						System.out.println("WARNING one of coordinates is missing: "+coords);
96
					} else {
97
						try {
98
							writer.writeAttribute("x1", coords[0]);
99
							writer.writeAttribute("y1", coords[1]);
100
							writer.writeAttribute("x2", coords[2]);
101
							writer.writeAttribute("y2", coords[3]);
102
						} catch (XMLStreamException e) {
103
							// TODO Auto-generated catch block
104
							e.printStackTrace();
105
						}
106
					}
107
				} else {
108
					System.out.println("WARNING No group for word id="+id+" and link id="+links.get(id)+" in text "+textname);
109
				}
110
			}
111
		}
112
	}
113

  
114
	protected void loadNextData(String img_file_name, String zone_file_name) {
115
		File img_link_file = new File(img_links_directory, img_file_name);
116
		File zone_file = new File(zones_directory, zone_file_name);
117

  
118
		zones.clear();
119
		links.clear();
120
		if (zone_file.exists()) {
121
			StaxParser pZones = new StaxParser(zone_file) {
122
						public void processStartElement() {
123
							if (localname.equals("zone")) {
124
								String type = "";
125
								String idZone = "";
126
								String ulx = "", uly = "", lrx = "", lry = "";
127

  
128
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
129
									if (parser.getAttributeLocalName(i).equals("id")) {
130
										idZone = parser.getAttributeValue(i);
131
									} else if (parser.getAttributeLocalName(i).equals("type")) {
132
										type = parser.getAttributeValue(i);
133
									} else if (parser.getAttributeLocalName(i).equals("ulx")) {
134
										ulx = parser.getAttributeValue(i);
135
									} else if (parser.getAttributeLocalName(i).equals("uly")) {
136
										uly = parser.getAttributeValue(i);
137
									} else if (parser.getAttributeLocalName(i).equals("lrx")) {
138
										lrx = parser.getAttributeValue(i);
139
									} else if (parser.getAttributeLocalName(i).equals("lry")) {
140
										lry = parser.getAttributeValue(i);
141
									}
142
								}
143

  
144
								if (type.equals(xmlType)) {
145
									zones.put(idZone, new String[]{ulx, uly, lrx, lry});
146
								}
147

  
148
							}
149
						}
150
					};
151
			pZones.process();
152
		}
153
		if (img_link_file.exists()) {
154
			StaxParser pLinks = new StaxParser(img_link_file) {
155
						public void processStartElement() {
156
							if (localname.equals("linkGrp")) {
157
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
158
									if (parser.getAttributeLocalName(i).equals("type")) {
159
										group = parser.getAttributeValue(i);
160
										break;
161
									}
162
								}
163
							} else if (localname.equals("link") && group.startsWith(xmlType)) {
164
								String target = "";
165

  
166
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
167
									if (parser.getAttributeLocalName(i).equals("target")) {
168
										target = parser.getAttributeValue(i);
169
										break;
170
									}
171
								}
172

  
173
								String[] split = target.split(" ");
174
								links.put(split[0].substring(4), split[1].substring(4));
175
							}
176
						}
177
					};
178
			pLinks.process();
179
		}
180
		//println "zones size: "+zones.size()
181
		//println "links size: "+links.size()
182

  
183
		current_img_file = img_file_name;
184
	}
185

  
186
	public static void main(String[] args) {
187
		File corpusDirectory = new File("/home/mdecorde/TEMP/testori/FontenatTestAlignement");
188
		File xmlFile = new File(corpusDirectory, "txm/FontenayTest-w/FontenayTest-w.xml");
189
		File img_links_directory = new File(corpusDirectory, "img_links");
190
		File zones_directory = new File(corpusDirectory, "zones");
191

  
192
		File outputFile = new File(corpusDirectory, "txm/FontenayTest-w/FontenayTest-w-coords2.xml");
193

  
194
		CoordsProjection cp;
195
		try {
196
			cp = new CoordsProjection(xmlFile, img_links_directory, zones_directory, "w");
197
			System.out.println(cp.process(outputFile));
198
		} catch (IOException e) {
199
			// TODO Auto-generated catch block
200
			e.printStackTrace();
201
		} catch (XMLStreamException e) {
202
			// TODO Auto-generated catch block
203
			e.printStackTrace();
204
		}
205
		
206
	}
207
}
0 208

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/TEI2Project.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.IOException;
6
import java.io.PrintWriter;
7
import java.util.Arrays;
8

  
9
import javax.xml.stream.XMLStreamException;
10
import javax.xml.transform.TransformerException;
11

  
12
import org.eclipse.core.runtime.Platform;
13
import org.osgi.framework.Bundle;
14
import org.txm.Toolbox;
15
import org.txm.functions.ProgressWatcher;
16
import org.txm.importer.ApplyXsl2;
17
import org.txm.importer.WriteIdAndNAttributes;
18
import org.txm.utils.BundleUtils;
19
import org.txm.utils.DeleteDir;
20
import org.txm.utils.io.FileCopy;
21
import org.txm.utils.io.IOUtils;
22
import org.txm.utils.logger.Log;
23
import org.txm.utils.zip.Zip;
24

  
25
public class TEI2Project {
26

  
27
	File xmlFile;
28
	File xslFile;
29
	File imagesDirectory;
30
	boolean createArchive;
31

  
32
	public TEI2Project(File xmlFile, File xslFile, File imagesDirectory, boolean createArchive) {
33
		this.xmlFile = xmlFile;
34
		this.xslFile = xslFile;
35
		this.imagesDirectory = imagesDirectory;
36
		this.createArchive = createArchive;
37
	}
38
	
39
	public boolean process(ProgressWatcher monitor) throws IOException, XMLStreamException, TransformerException {
40
		if (!xmlFile.exists()) {
41
			System.out.println("Could not read input XML input file: "+xmlFile);
42
			return false;
43
		}
44

  
45
		File oriflammsMacroDirectory = new File(BundleUtils.getBundleFile("Oriflamms"), "res");
46
		System.out.println("Ressources files directory: "+oriflammsMacroDirectory);
47
		if (!oriflammsMacroDirectory.exists()) {
48
			System.out.println("Oriflamms macro directory not found: "+oriflammsMacroDirectory);
49
			return false;
50
		}
51
		File xslTokenizer = new File(oriflammsMacroDirectory, "oriflamms-tokenize-words.xsl");
52
		File xslPatchLbInWords = new File(oriflammsMacroDirectory,"oriflamms-patch-words-with-lb.xsl");
53
		File xslMissingMilestones = new File(oriflammsMacroDirectory, "oriflamms-patch-milestones.xsl");
54
		File xslCharactersTokenizer = new File(oriflammsMacroDirectory, "oriflamms-tokenize-chars-1-tag.xsl");
55
		File xslCharactersIdentifier = new File(oriflammsMacroDirectory, "oriflamms-tokenize-chars-2-identify.xsl");
56
		File xslZones = new File(oriflammsMacroDirectory, "oriflamms-convert-transcriptions-orizones.xsl");
57

  
58
		if (!xslTokenizer.exists()  || !xslPatchLbInWords.exists() || !xslMissingMilestones.exists() ||	
59
				!xslCharactersTokenizer.exists() || !xslCharactersIdentifier.exists() || !xslZones.exists()) {
60
			System.out.println("Could not find one of TXM's XSL file : "+
61
					Arrays.asList(xslTokenizer, xslPatchLbInWords, xslMissingMilestones, 
62
					 xslCharactersTokenizer, xslCharactersIdentifier, xslZones));
63

  
64
			System.out.println(Arrays.asList(xslTokenizer.exists(), xslPatchLbInWords.exists(), xslMissingMilestones.exists()
65
			            , xslCharactersTokenizer.exists(), xslCharactersIdentifier.exists(), xslZones.exists()).toString());
66
			            		 return false;
67
		}
68

  
69
		File xmlFileParentDirectory = xmlFile.getParentFile();
70
				String projectName = xmlFile.getName();
71
				if (projectName.indexOf(".") > 0) projectName = projectName.substring(0, projectName.indexOf("."));
72
				File projectDirectory = new File(xmlFileParentDirectory, projectName);
73
		DeleteDir.deleteDirectory(projectDirectory);
74
		if (projectDirectory.exists()) {
75
			System.out.println("Could not delete previous project directory: "+projectDirectory);
76
			 return false;
77
		}
78

  
79
		projectDirectory.mkdir();
80

  
81
		if (!projectDirectory.exists()) {
82
			System.out.println("Could not create project directory: "+projectDirectory);
83
			return false;
84
		}
85

  
86
		System.out.println("Oriflamms project directory: "+projectDirectory);
87

  
88
		File xmlFileCopy = new File(projectDirectory, xmlFile.getName());
89
		System.out.println("Copying XML files: "+xmlFile+" to "+projectDirectory);
90
		CopyXMLFiles cdf = new CopyXMLFiles(xmlFile);
91
		projectDirectory.mkdir();
92
		System.out.println("Files copied: "+cdf.copy(projectDirectory));
93
		if (!xmlFileCopy.exists()) {
94
			System.out.println("Could not copy input XML input file: "+xmlFile+" to "+xmlFileCopy);
95
			 return false;
96
		}
97

  
98
		if (xslFile != null) {
99
			if (xslFile.exists()) {
100
				System.out.println("Applying "+xslFile+" to "+xmlFileCopy+"...");
101
				ApplyXsl2 builder = new ApplyXsl2(xslFile);
102
				if (!builder.process(xmlFileCopy, xmlFileCopy)) {
103
					System.out.println("Failed to process "+xmlFileCopy+" with "+xslFile);
104
					return false;
105
				}
106
			}
107
		}
108

  
109
		File textsDirectory = new File(projectDirectory, "texts");
110
		File imgDirectory = new File(projectDirectory, "img");
111
		File img_linksDirectory = new File(projectDirectory, "img_links");
112
		File ontologiesDirectory = new File(projectDirectory, "ontologies");
113
		File ontologies_linksDirectory = new File(projectDirectory, "ontologies_links");
114
		File zonesDirectory = new File(projectDirectory, "zones");
115
		textsDirectory.mkdir();
116
		imgDirectory.mkdir();
117
		img_linksDirectory.mkdir();
118
		ontologiesDirectory.mkdir();
119
		ontologies_linksDirectory.mkdir();
120
		zonesDirectory.mkdir();
121

  
122
		File xmlWFile = new File(textsDirectory, projectName+"-w.xml");
123
		File xmlWCFile = new File(textsDirectory, projectName+"-c.xml");
124

  
125
		try {
126

  
127
			System.out.println("Applying "+xslMissingMilestones+" to "+xmlWFile+"...");
128
			if (monitor != null) monitor.worked(1, "Applying "+xslMissingMilestones+" to "+xmlWFile+"...");
129
			ApplyXsl2 builder = new ApplyXsl2(xslMissingMilestones);
130
			if (!builder.process(xmlFileCopy, xmlWFile)) {
131
				System.out.println("Failed to process "+xmlWFile+" with "+xslMissingMilestones);
132
				return false;
133
			}
134

  
135
			System.out.println("Applying "+xslTokenizer+" to "+xmlWFile+"...");
136
			if (monitor != null) monitor.worked(15, "Applying "+xslTokenizer+" to "+xmlWFile+"...");
137
			builder = new ApplyXsl2(xslTokenizer);
138
			if (!builder.process(xmlWFile, xmlWFile)) {
139
				System.out.println("Failed to process "+xmlFileCopy+" with "+xslTokenizer);
140
				return false;
141
			}
142

  
143
			System.out.println("Merging words </w><w>");
144
			if (monitor != null) monitor.worked(15, "Merging words </w><w>");
145
			String content = IOUtils.getText(xmlWFile, "UTF-8");
146
			content = content.replaceAll("</w><w[^>]*>", "");
147
			content = content.replaceAll("</w>\\s*(<milestone[^>]*>)?\\s*(<pb[^>]*>)?\\s*(<cb[^>]*>)?\\s*(<lb[^>]*break=\"no\"[^>]*>)\\s*<w[^>]*>", "$1$2$3$4");
148
			try {
149
				PrintWriter writer = IOUtils.getWriter(xmlWFile);
150
						writer.print(content);
151
						writer.close();
152
			} catch (Exception e2) {
153
				System.out.println("Error while fixing words: "+e2);
154
				return false;
155
			}
156

  
157
			System.out.println("Applying "+xslPatchLbInWords+" to "+xmlWFile+"...");
158
			if (monitor != null) monitor.worked(15, "Applying "+xslPatchLbInWords+" to "+xmlWFile+"...");
159
			builder = new ApplyXsl2(xslPatchLbInWords);
160
			if (!builder.process(xmlWFile, xmlWFile)) {
161
				System.out.println("Failed to process "+xmlFileCopy+" with "+xslPatchLbInWords);
162
				return false;
163
			}
164

  
165
			System.out.println("Fixing 'id' and 'n' attributes in "+xmlWFile+"...");
166
			if (monitor != null) monitor.worked(15, "Fixing 'id' and 'n' attributes in "+xmlWFile+"...");
167
			WriteIdAndNAttributes wiana = new WriteIdAndNAttributes(xmlWFile, projectName);
168
			File tmp = new File(xmlWFile.getParentFile(), "tmp_"+xmlWFile.getName());
169
			if (!wiana.process(tmp)) {
170
				System.out.println("Failed to fix id and n attributes with of "+xmlWFile+" file");
171
				return false;
172
			} else {
173
				wiana = null;
174
				xmlWFile.delete();
175
				tmp.renameTo(xmlWFile);
176
				if (tmp.exists()) {
177
					System.out.println("Failed to replace "+xmlWFile+" with result file "+tmp);
178
					return false;
179
				}
180
			}
181

  
182
			System.out.println("Applying "+xslCharactersTokenizer+" to "+xmlWFile+"...");
183
			if (monitor != null) monitor.worked(15, "Applying "+xslCharactersTokenizer+" to "+xmlWFile+"...");
184
			builder = new ApplyXsl2(xslCharactersTokenizer);
185
			if (!builder.process(xmlWFile, xmlWCFile)) {
186
				System.out.println("Failed to process "+xmlWFile+" with "+xslCharactersTokenizer);
187
				return false;
188
			} 
189

  
190
			System.out.println("Applying "+xslCharactersIdentifier+" to "+xmlWCFile+"...");
191
			if (monitor != null) monitor.worked(1, "Applying "+xslCharactersIdentifier+" to "+xmlWCFile+"...");
192
			builder = new ApplyXsl2(xslCharactersIdentifier);
193
			if (!builder.process(xmlWCFile, xmlWCFile)) {
194
				System.out.println("Failed to process "+xmlWCFile+" with "+xslCharactersIdentifier);
195
				return false;
196
			}
197

  
198
			System.out.println("Applying "+xslZones+" to "+xmlWFile+"...");
199
			if (monitor != null) monitor.worked(15, "Applying "+xslZones+" to "+xmlWFile+"...");
200
			builder = new ApplyXsl2(xslZones);
201
			if (!builder.process(xmlWFile, null)) {
202
				System.out.println("Failed to process "+xmlFileCopy+" with "+xslZones);
203
				return false;
204
			}
205

  
206
			if (imagesDirectory.exists() && imagesDirectory.listFiles().length > 0) {
207
				System.out.println("Copying images files from "+imagesDirectory+" to "+imgDirectory+"...");
208
				FileCopy.copyFiles(imagesDirectory, imgDirectory);
209
				File[] files = imgDirectory.listFiles();
210
				if (files != null) System.out.println(""+files.length+" images copied.");
211
			}
212

  
213
			if (createArchive) {
214
				if (monitor != null) monitor.worked(15, "Building Oriflamms binary project... ");
215
				File zipFile = new File(xmlFileParentDirectory, projectName+".oriflamms");
216
				zipFile.delete();
217
				Zip.compress(projectDirectory, zipFile);
218

  
219
				if (zipFile.exists()) {
220
					System.out.println("Project oriflamms exported to "+zipFile);
221
					DeleteDir.deleteDirectory(projectDirectory);
222
				} else {
223
					System.out.println("Fail to export project "+projectDirectory);
224
				}
225
			}
226

  
227
		} catch (Exception e) {
228
			System.out.println("Error while applying a XSL file: "+e);
229
			Log.printStackTrace(e);
230
		}
231
		if (monitor != null) monitor.done();
232
		return true;
233
	}
234
}
0 235

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/UpdateHTMLFileImagePaths.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.IOException;
6

  
7
import javax.xml.stream.XMLStreamException;
8

  
9
import org.txm.importer.StaxIdentityParser;
10

  
11
public class UpdateHTMLFileImagePaths extends StaxIdentityParser {
12
	String newpathprefix;
13

  
14
	public UpdateHTMLFileImagePaths(File htmlFile, String newpathprefix) throws IOException, XMLStreamException {
15
		super(htmlFile);
16
		this.newpathprefix = newpathprefix;
17
		
18
		if (newpathprefix.endsWith("/")) newpathprefix = newpathprefix.substring(0, newpathprefix.length()-1);
19
	}
20
	
21
	String src = null;
22
	protected void processStartElement() throws XMLStreamException, IOException {
23
		src = null;
24
		if (localname.equals("img")) {
25
			//System.out.println("start element img");
26
			int n = parser.getAttributeCount();
27
			for (int i = 0 ; i < n ; i++) {
28
				if (parser.getAttributeLocalName(i).equals("src")) {
29
					src = parser.getAttributeValue(i);
30
					//System.out.println("start element img@src="+src);
31
					break;
32
				}
33
			}
34
			
35
			if (src != null) {
36
				int idx = src.lastIndexOf("/");
37
				String name = src.substring(idx);
38
				src = newpathprefix+name;
39
			}
40
		}
41
		
42
		super.processStartElement();	
43
	}
44
	
45
	protected void writeAttributes() throws XMLStreamException {
46
		for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
47
			if (src != null && "src".equals(parser.getAttributeLocalName(i))) {
48
				writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), src);
49
				src = null;
50
			} else {
51
				writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i));
52
			}
53
		}
54
	}
55
	
56
	public static void main(String[] args) {
57
		File htmlFile = new File("/home/mdecorde/TXM/corpora/graal/HTML/GRAAL/ms-colonne/qgraal_cm_160a.html");
58
		File outfile = new File("/home/mdecorde/TXM/corpora/graal/HTML/GRAAL/ms-colonne/qgraal_cm_160a-o.html");
59
		String prefix = "AAAA";
60
		UpdateHTMLFileImagePaths p;
61
		try {
62
			p = new UpdateHTMLFileImagePaths(htmlFile, prefix);
63
			System.out.println(p.process(outfile));
64
		} catch (IOException e) {
65
			// TODO Auto-generated catch block
66
			e.printStackTrace();
67
		} catch (XMLStreamException e) {
68
			// TODO Auto-generated catch block
69
			e.printStackTrace();
70
		}
71
		
72
	}
73
}
0 74

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/Allographs.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.IOException;
6
import java.util.HashMap;
7
import java.util.regex.Pattern;
8

  
9
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
10
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
11
import org.txm.searchengine.cqp.corpus.Corpus;
12
import org.txm.searchengine.cqp.corpus.Property;
13
import org.txm.searchengine.cqp.corpus.query.Query;
14
import org.txm.searchengine.cqp.serverException.CqiServerError;
15

  
16
public class Allographs extends OriflammsFunction {
17

  
18
	protected String sign, allograph;
19
	Query query;
20
	private Property sign_property;
21
	private Property allograph_auto_property;
22
	private Property allograph_expert_property;
23

  
24
	Pattern signRegExp, characterRegExp;
25
	
26
	public Allographs(Corpus corpus, File tsvFile, String sign, Query query, String allograph) throws CqiClientException, IOException, CqiServerError {
27
		super(corpus, tsvFile);
28
		
29
		this.query = query;
30
		this.allograph = allograph;
31
		this.sign = sign;
32
	}
33

  
34
	public boolean process() throws CqiClientException, IOException, CqiServerError {
35
		String[] props = {"sign", "allograph-expert", "allograph-auto", "characters"};
36
		System.out.println("Dénombrement des allographes '"+allograph+"' de signe '"+sign+"' dans le contexte '"+query+"'");
37

  
38
		for (String prop : props) {
39
			if (corpus.getProperty(prop) == null) {
40
				System.out.println("Le corpus '"+corpus+"' n'a pas de propriété de mot '"+prop+"'. Abandon.");
41
				return false;
42
			}
43
		}
44

  
45
		sign_property = corpus.getProperty("sign");
46
		allograph_expert_property = corpus.getProperty("allograph-expert");
47
		allograph_auto_property = corpus.getProperty("allograph-auto");
48
		signRegExp = Pattern.compile(sign);
49
		characterRegExp = Pattern.compile(allograph);
50
		
51
		return super.process();
52
	}
53
	
54
	Object[] getInfos(int from ,int length) throws UnexpectedAnswerException, IOException, CqiServerError {
55

  
56
		int[] positions = new int[length];
57
		for (int i = 0 ; i < length ; i++) positions[i] = from++;
58

  
59
		String[] signs = CQI.cpos2Str(sign_property.getQualifiedName(), positions);
60
		String[]  allographs_expert = CQI.cpos2Str(allograph_expert_property.getQualifiedName(), positions);
61
		String[]  allographs_auto = CQI.cpos2Str(allograph_auto_property.getQualifiedName(), positions);
62
		String[]  characters = CQI.cpos2Str(charactersP.getQualifiedName(), positions);
63

  
64
		Object[] rez = {signs,allographs_expert,allographs_auto, characters};
65
		return rez;
66
	}
67

  
68
	boolean processLine(String text_id, String pb_id, String cb_id, String lb_id, int length, Object[] infos) {
69

  
70
		String[] signs = (String[]) infos[0];
71
		String[] allographs_expert = (String[]) infos[1];
72
		String[] allographs_auto = (String[]) infos[2];
73
		String[] characters = (String[]) infos[3];
74
		HashMap<String, HashMap<String, Integer>> count_signs = new HashMap<String, HashMap<String, Integer>>();
75

  
76
		for (int i = 0 ; i < length ; i++) {
77
			String s = signs[i];
78
			String c = characters[i];
79
			
80
			if (signRegExp.matcher(s).find() && characterRegExp.matcher(c).find()) {
81
				if (!count_signs.containsKey(s)) count_signs.put(s, new HashMap<String, Integer>());
82
				HashMap<String, Integer> counts = count_signs.get(s);
83

  
84
				if (!counts.containsKey(c)) counts.put(c, 0);
85
				counts.put(c, counts.get(c) + 1);
86
			}
87
		}
88

  
89
		for (String s : count_signs.keySet()) {
90
			HashMap<String, Integer> counts = count_signs.get(s);
91
			int sum = 0;
92
			for (Integer i : counts.values()) sum += i;
93
			
94
			for (String c : counts.keySet()) {
95
				writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\t"+s+"\t"+c+"\t"+counts.get(c)+"\t"+((float)counts.get(c)/(float)sum));
96
			}
97
		}
98
		return true;
99
	}
100
}
0 101

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/CopyXMLFiles.java (revision 476)
1
package org.txm.oriflamms.functions;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.io.InputStream;
6
import java.util.ArrayList;
7

  
8
import javax.xml.stream.XMLInputFactory;
9
import javax.xml.stream.XMLResolver;
10
import javax.xml.stream.XMLStreamConstants;
11
import javax.xml.stream.XMLStreamException;
12
import javax.xml.stream.XMLStreamReader;
13

  
14
import org.txm.utils.io.FileCopy;
15

  
16
public class CopyXMLFiles {
17
	File xmlFile;
18
	File outDir;
19
	ArrayList<File> dtdFiles = new ArrayList<File>();
20
	
21
	public CopyXMLFiles(File xmlFile) {
22
		this.xmlFile = xmlFile;
23
	}
24
	
25
	public ArrayList<File> copy(File outDir) throws IOException, XMLStreamException {
26
		XMLInputFactory factory;
27
		XMLStreamReader parser;
28
		InputStream inputData = xmlFile.toURI().toURL().openStream();
29
		factory = XMLInputFactory.newInstance();
30
		factory.setXMLResolver(new XMLResolver() {
31
			@Override
32
			public Object resolveEntity(String publicID, String systemID,
33
					String baseURI, String namespace) throws XMLStreamException {
34
				File srcFile = new File(xmlFile.getParentFile(), systemID);
35
				dtdFiles.add(srcFile);
36
				try {
37
					return srcFile.toURI().toURL().openStream();
38
				} catch (IOException e) {
39
					// TODO Auto-generated catch block
40
					e.printStackTrace();
41
					return new ArrayList<>();
42
				}
43
			}
44
		});
45
	
46
		parser = factory.createXMLStreamReader(inputData);
47
		
48
		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
49
			
50
		}
51
		
52
		dtdFiles.add(xmlFile);
53
		for (File dtd : dtdFiles) {
54
			File cpy = new File(outDir, dtd.getName());
55
			FileCopy.copy(dtd, cpy);
56
		}
57
		return dtdFiles;
58
	}
59
	
60
	public static void main(String[] args) throws IOException, XMLStreamException {
61
		File xmlFile = new File("/home/mdecorde/Téléchargements/Inscriptions1.xml");
62
		File outDir = new File("/home/mdecorde/Téléchargements/test");
63
		outDir.mkdir();
64
		CopyXMLFiles cdf = new CopyXMLFiles(xmlFile);
65
		System.out.println(cdf.copy(outDir));
66
	}
67
}
0 68

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/Project2XTZ.java (revision 476)
1
package org.txm.oriflamms.functions;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.util.HashMap;
6

  
7
import javax.xml.parsers.ParserConfigurationException;
8
import javax.xml.stream.XMLStreamException;
9
import javax.xml.transform.TransformerException;
10

  
11
import org.txm.Toolbox;
12
import org.txm.importer.ApplyXsl2;
13
import org.txm.objects.BaseParameters;
14
import org.txm.utils.AsciiUtils;
15
import org.txm.utils.BundleUtils;
16
import org.txm.utils.DeleteDir;
17
import org.txm.utils.io.FileCopy;
18
import org.txm.utils.xml.UpdateXSLParameters;
19
import org.xml.sax.SAXException;
20

  
21
public class Project2XTZ {
22
	File projectDirectory;
23
	public Project2XTZ(File projectDirectory) {
24
		this.projectDirectory = projectDirectory;
25
	}
26

  
27
	public boolean process() throws IOException, TransformerException, ParserConfigurationException, SAXException, XMLStreamException {
28
		File oriflammsMacroDirectory = new File(BundleUtils.getBundleFile("Oriflamms"), "res");
29
		System.out.println("Ressources files directory: "+oriflammsMacroDirectory);
30
		if (!oriflammsMacroDirectory.exists()) {
31
			System.out.println("Oriflamms macro directory not found: "+oriflammsMacroDirectory+". Aborting");
32
			return false;
33
		}
34
		File wFrontXSLFile = new File(oriflammsMacroDirectory, "txm-front-teioriflammsw-xtz.xsl");
35
		if (!wFrontXSLFile.exists()) {
36
			System.out.println("Oriflamms to XML-XTZ front XSL file is missing: "+wFrontXSLFile+". Aborting");
37
			return false;
38
		}
39
		File cFrontXSLFile = new File(oriflammsMacroDirectory, "txm-front-teioriflammsc-xtz.xsl");
40
		if (!cFrontXSLFile.exists()) {
41
			System.out.println("Oriflamms to XML-XTZ front XSL file is missing: "+cFrontXSLFile+". Aborting");
42
			return false;
43
		}
44

  
45
		File cSplitXSLFile = new File(oriflammsMacroDirectory, "1-oriflamms-split-surfaces.xsl");
46
		if (!cSplitXSLFile.exists()) {
47
			System.out.println("Oriflamms to XML-XTZ split XSL file is missing: "+cSplitXSLFile+". Aborting");
48
			return false;
49
		}
50

  
51
		File editionXSLFile1 = new File(oriflammsMacroDirectory, "1-default-html.xsl");
52
		if (!editionXSLFile1.exists()) {
53
			System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile1+".");
54
			return false;
55
		}
56
		File editionXSLFile2 = new File(oriflammsMacroDirectory, "2-default-pager.xsl");
57
		if (!editionXSLFile2.exists()) {
58
			System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile2+".");
59
			return false;
60
		}
61
		File editionXSLFile3 = new File(oriflammsMacroDirectory, "3-facsimile-pager.xsl");
62
		if (!editionXSLFile3.exists()) {
63
			System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile3+".");
64
			return false;
65
		}
66
		File cssDirectory = new File(oriflammsMacroDirectory, "css");
67
		if (!cssDirectory.exists()) {
68
			System.out.println("Oriflamms css directory is missing: "+cssDirectory+".");
69
			return false;
70
		}
71
		File jsDirectory = new File(oriflammsMacroDirectory, "js");
72
		if (!jsDirectory.exists()) {
73
			System.out.println("Oriflamms js directory is missing: "+jsDirectory+".");
74
			return false;
75
		}
76
		File imagesDirectory = new File(oriflammsMacroDirectory, "images");
77
		if (!imagesDirectory.exists()) {
78
			System.out.println("Oriflamms images directory is missing: "+imagesDirectory+".");
79
			return false;
80
		}
81

  
82
		File textDirectory = new File(projectDirectory, "texts");
83

  
84
		File txmDirectory = new File(projectDirectory, "txm");
85
		if (txmDirectory.exists()) DeleteDir.deleteDirectory(txmDirectory);
86
		txmDirectory.mkdir();
87
		if (!txmDirectory.exists()) {
88
			System.out.println("Error: the 'txm' directory could not be created: "+txmDirectory+". Aborting.");
89
			return false;
90
		}
91

  
92
		File wDirectory = null;
93
				File cDirectory = null;
94
				File wFile = null;
95
				File cFile = null;
96

  
97
				File[] xmlFiles = textDirectory.listFiles();
98
				if (xmlFiles == null) return false;
99

  
100
		for (File xmlFile : xmlFiles) {
101
			if (xmlFile.getName().endsWith("-w.xml")) {
102
				String name = xmlFile.getName().substring(0, xmlFile.getName().indexOf(".xml"));
103
						wDirectory = new File(txmDirectory, name);
104
				wFile = xmlFile;
105
			} else if (xmlFile.getName().endsWith("-c.xml")) {
106
				String name = xmlFile.getName().substring(0, xmlFile.getName().indexOf(".xml"));
107
						cDirectory = new File(txmDirectory, name);
108
				cFile = xmlFile;
109
			}
110
		}
111

  
112
		if (wDirectory == null) {
113
			System.out.println("The Word corpus XML file was not found in "+textDirectory+". Aborting.");
114
			return false;
115
		}
116
		if (cDirectory == null) {
117
			System.out.println("The Letter corpus XML file was not found in "+textDirectory+". Aborting.");
118
			return false;
119
		}
120

  
121
		//Create XML-XTZ source directories
122
		wDirectory.mkdirs();
123
		cDirectory.mkdirs();
124

  
125
		// Copy XML files and split character XML file
126
		FileCopy.copy(wFile, new File(wDirectory, wFile.getName()));
127

  
128
		ApplyXsl2 builder = new ApplyXsl2(cSplitXSLFile);
129
		HashMap<String, String> xslParams = new HashMap<String, String>();
130
		xslParams.put("output-directory", cDirectory.getAbsoluteFile().toURI().toString());
131
		for (String name : xslParams.keySet()) builder.setParam(name, xslParams.get(name));
132
		if (!builder.process(cFile, null)) {
133
			System.out.println("Error: fail to split "+cFile);
134
			return false;
135
		}
136
		if (!ApplyXsl2.processImportSources(cFrontXSLFile, ApplyXsl2.listFiles(cDirectory), new HashMap<String, Object>())) {
137
			System.out.println("Error: fail to apply front XSL with "+cDirectory+" files");
138
			return false;
139
		}
140
		// INJECT ontologies 
141
		System.out.println("Injecting ontologies...");
142
		for (File f : cDirectory.listFiles()) {
143
			if (f.getName().startsWith(cDirectory.getName())) {
144
				OntologiesProjection cp = new OntologiesProjection(f, projectDirectory);
145
				File outputFile = new File(cDirectory, "temp.xml");
146
				cp.process(outputFile);
147
				if (outputFile.exists() && f.delete() && outputFile.renameTo(f)) {
148

  
149
				} else {
150
					System.out.println("Failed to replace XML file "+f+" with "+outputFile);
151
					return false;
152
				}
153
			}
154
		}
155

  
156
		// INJECT word's coordinates
157
		System.out.println("Injecting coordinates...");
158
		File xmlFile = new File(wDirectory, wFile.getName());
159
		File img_links_directory = new File(projectDirectory, "img_links");
160
		File zones_directory = new File(projectDirectory, "zones");
161
		File outputFile = new File(wDirectory, "temp.xml");
162
		CoordsProjection cp = new CoordsProjection(xmlFile, img_links_directory, zones_directory, "w");
163
		if (cp.process(outputFile)) {
164
			if (outputFile.exists() && xmlFile.delete() && outputFile.renameTo(xmlFile)) {
165

  
166
			} else {
167
				System.out.println("Failed to replace XML file "+xmlFile+" with "+outputFile);
168
				return false;
169
			}
170
		} else {
171
			System.out.println("Coordinates injection failed. Aborting");
172
			return false;
173
		}
174

  
175
		// Create XSL directories
176

  
177
		File wXSLDirectory = new File(wDirectory, "xsl");
178
		File cXSLDirectory = new File(cDirectory, "xsl");
179

  
180
		//File cSplitXSLDirectory = new File(cXSLDirectory, "1-split-merge")
181
		//cSplitXSLDirectory.mkdirs()
182

  
183
		File wFrontXSLDirectory = new File(wXSLDirectory, "2-front");
184
		//File cFrontXSLDirectory = new File(cXSLDirectory, "2-front")
185
		wFrontXSLDirectory.mkdirs();
186
		//cFrontXSLDirectory.mkdirs()
187

  
188
		// Copy Split XSL file
189
		//File newCSplitXSLFile = new File(cSplitXSLDirectory, cSplitXSLFile.getName())
190
		//FileCopy.copy(cSplitXSLFile, newCSplitXSLFile);
191

  
192
		// Copy Front XSL file
193
		File newWFrontXSLFile = new File(wFrontXSLDirectory, wFrontXSLFile.getName());
194
		//File newCFrontXSLFile = new File(cFrontXSLDirectory, cFrontXSLFile.getName())
195
		FileCopy.copy(wFrontXSLFile, newWFrontXSLFile);
196
		//FileCopy.copy(cFrontXSLFile, newCFrontXSLFile);
197

  
198
		// Copy edition XSL file
199
		File wEditionXSLDirectory = new File(wXSLDirectory, "4-edition");
200
		File cEditionXSLDirectory = new File(cXSLDirectory, "4-edition");
201
		wEditionXSLDirectory.mkdirs();
202
		cEditionXSLDirectory.mkdirs();
203
		File newWEditionXSLFile1 = new File(wEditionXSLDirectory, editionXSLFile1.getName());
204
		File newCEditionXSLFile1 = new File(cEditionXSLDirectory, editionXSLFile1.getName());
205
		FileCopy.copy(editionXSLFile1, newWEditionXSLFile1);
206
		FileCopy.copy(editionXSLFile1, newCEditionXSLFile1);
207
		File newWEditionXSLFile2 = new File(wEditionXSLDirectory, editionXSLFile2.getName());
208
		File newCEditionXSLFile2 = new File(cEditionXSLDirectory, editionXSLFile2.getName());
209
		FileCopy.copy(editionXSLFile2, newWEditionXSLFile2);
210
		FileCopy.copy(editionXSLFile2, newCEditionXSLFile2);
211
		File newWEditionXSLFile3 = new File(wEditionXSLDirectory, editionXSLFile3.getName());
212
		File newCEditionXSLFile3 = new File(cEditionXSLDirectory, editionXSLFile3.getName());
213
		FileCopy.copy(editionXSLFile3, newWEditionXSLFile3);
214
		FileCopy.copy(editionXSLFile3, newCEditionXSLFile3);
215

  
216
		//patch XSL files with image directory path and set the 'word-element' xsl param
217
		File projectImgDirectory = new File(projectDirectory, "img");
218
		HashMap<String, String> parameters = new HashMap<String, String>();
219
		parameters.put("image-directory", projectImgDirectory.getAbsolutePath());
220
		parameters.put("word-element", "w");
221
				System.out.println("update "+newWEditionXSLFile3+" with "+parameters);
222
				UpdateXSLParameters p = new UpdateXSLParameters(newWEditionXSLFile3);
223
		if (!p.process(parameters)) {
224
			System.out.println("Fail to patch "+newWEditionXSLFile3);
225
			return false;
226
		}
227
		parameters = new HashMap<String, String>();
228
		parameters.put("image-directory", projectImgDirectory.getAbsolutePath());
229
		parameters.put("word-element", "c");
230
				System.out.println("update "+newCEditionXSLFile3+" with "+parameters);
231
				UpdateXSLParameters p2 = new UpdateXSLParameters(newCEditionXSLFile3);
232
		if (!p2.process(parameters)) {
233
			System.out.println("Fail to patch "+newCEditionXSLFile3);
234
			return false;
235
		}
236

  
237
		// Copy js and images directories
238
		File wCSSDirectory =  new File(wDirectory, cssDirectory.getName());
239
		wCSSDirectory.mkdir();
240
		File wJsDirectory =  new File(wDirectory, jsDirectory.getName());
241
		wJsDirectory.mkdir();
242
		File wImagesDirectory =  new File(wDirectory, imagesDirectory.getName());
243
		wImagesDirectory.mkdir();
244
		File cCSSDirectory =  new File(cDirectory, cssDirectory.getName());
245
		cCSSDirectory.mkdir();
246
		File cJsDirectory =  new File(cDirectory, jsDirectory.getName());
247
		cJsDirectory.mkdir();
248
		File cImagesDirectory =  new File(cDirectory, imagesDirectory.getName());
249
		cImagesDirectory.mkdir();
250
		FileCopy.copyFiles(cssDirectory, wCSSDirectory);
251
		FileCopy.copyFiles(jsDirectory, wJsDirectory);
252
		FileCopy.copyFiles(imagesDirectory, wImagesDirectory);
253
		FileCopy.copyFiles(cssDirectory, cCSSDirectory);
254
		FileCopy.copyFiles(jsDirectory, cJsDirectory);
255
		FileCopy.copyFiles(imagesDirectory, cImagesDirectory);
256

  
257
		// Prepare import.xml files
258
		File wImportXMLFile = new File(wDirectory, "import.xml");
259
		File cImportXMLFile = new File(cDirectory, "import.xml");
260

  
261
		BaseParameters.createEmptyParams(wImportXMLFile, AsciiUtils.buildId(wDirectory.getName()).toUpperCase());
262
		BaseParameters wParams = new BaseParameters(wImportXMLFile);
263
		wParams.load();
264
		wParams.setSkipTokenization(true);
265
		wParams.setWordElement("w");
266
		wParams.setDoAnnotation(false);
267
		wParams.setAnnotationLang("fr");
268
		wParams.setWordsPerPage(9999999);
269
		wParams.setTextualPlans("", "note", "teiHeader,facsimile","pb,cb,lb");
270
		wParams.getCorpusElement().setAttribute("font", "Junicode");
271
		wParams.getEditionsElement(wParams.getCorpusElement()).setAttribute("default", "default,facsimile");
272
		wParams.getCorpusElement().setAttribute("name", AsciiUtils.buildId(wDirectory.getName()).toUpperCase());
273
		
274

  
275
		BaseParameters.createEmptyParams(cImportXMLFile, AsciiUtils.buildId(cDirectory.getName()).toUpperCase());
276
		BaseParameters cParams = new BaseParameters(cImportXMLFile);
277
		cParams.load();
278
		cParams.setSkipTokenization(true);
279
		cParams.setWordElement("c");
280
		cParams.setDoAnnotation(false);
281
		cParams.setAnnotationLang("fr");
282
		cParams.setWordsPerPage(9999999);
283
		cParams.setTextualPlans("", "note", "teiHeader,facsimile","pb,cb,lb");
284
		cParams.getCorpusElement().setAttribute("font", "Junicode");
285
		cParams.getEditionsElement(cParams.getCorpusElement()).setAttribute("default", "default,facsimile");
286
		cParams.getCorpusElement().setAttribute("name", AsciiUtils.buildId(cDirectory.getName()).toUpperCase());
287
		
288
		return cParams.save() && wParams.save();
289
	}
290
}
0 291

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/OntologiesProjection.java (revision 476)
1
package org.txm.oriflamms.functions;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.io.Serializable;
6
import java.util.ArrayList;
7
import java.util.Arrays;
8
import java.util.HashMap;
9
import java.util.List;
10
import java.util.regex.Pattern;
11

  
12
import javax.xml.stream.XMLStreamException;
13

  
14
import org.txm.importer.StaxIdentityParser;
15
import org.txm.importer.StaxParser;
16

  
17
class OntologiesProjection extends StaxIdentityParser {
18

  
19
	File xmlFile;
20

  
21
	String wordTag;
22
	String textname;
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff