Revision 476

tmp/org.txm.oriflamms.rcp/.settings/org.eclipse.jdt.core.prefs (revision 476)
1
eclipse.preferences.version=1
2
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
4
org.eclipse.jdt.core.compiler.compliance=1.7
5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7
org.eclipse.jdt.core.compiler.source=1.7
0 8

  
tmp/org.txm.oriflamms.rcp/.classpath (revision 476)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
5
		<accessrules>
6
			<accessrule kind="accessible" pattern="**"/>
7
		</accessrules>
8
	</classpathentry>
9
	<classpathentry kind="src" path="src"/>
10
	<classpathentry kind="src" path="res"/>
11
	<classpathentry kind="output" path="bin"/>
12
</classpath>
0 13

  
tmp/org.txm.oriflamms.rcp/META-INF/MANIFEST.MF (revision 476)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: Oriflamms
4
Bundle-SymbolicName: Oriflamms;singleton:=true
5
Bundle-Version: 1.0.0.qualifier
6
Bundle-Activator: oriflamms.Activator
7
Require-Bundle: org.txm.core;bundle-version="0.7.0",
8
 org.txm.rcp,
9
 org.eclipse.ui,
10
 org.eclipse.core.runtime,
11
 org.txm.searchengine.cqp.core,
12
 org.txm.utils
13
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
14
Bundle-ActivationPolicy: lazy
15
Bundle-Vendor: Textometrie.org
0 16

  
tmp/org.txm.oriflamms.rcp/.project (revision 476)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>OriflammsRCP</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/TEI2Project.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.IOException;
6
import java.io.PrintWriter;
7
import java.util.Arrays;
8

  
9
import javax.xml.stream.XMLStreamException;
10
import javax.xml.transform.TransformerException;
11

  
12
import org.eclipse.core.runtime.Platform;
13
import org.osgi.framework.Bundle;
14
import org.txm.Toolbox;
15
import org.txm.functions.ProgressWatcher;
16
import org.txm.importer.ApplyXsl2;
17
import org.txm.importer.WriteIdAndNAttributes;
18
import org.txm.utils.BundleUtils;
19
import org.txm.utils.DeleteDir;
20
import org.txm.utils.io.FileCopy;
21
import org.txm.utils.io.IOUtils;
22
import org.txm.utils.logger.Log;
23
import org.txm.utils.zip.Zip;
24

  
25
public class TEI2Project {
26

  
27
	File xmlFile;
28
	File xslFile;
29
	File imagesDirectory;
30
	boolean createArchive;
31

  
32
	public TEI2Project(File xmlFile, File xslFile, File imagesDirectory, boolean createArchive) {
33
		this.xmlFile = xmlFile;
34
		this.xslFile = xslFile;
35
		this.imagesDirectory = imagesDirectory;
36
		this.createArchive = createArchive;
37
	}
38
	
39
	public boolean process(ProgressWatcher monitor) throws IOException, XMLStreamException, TransformerException {
40
		if (!xmlFile.exists()) {
41
			System.out.println("Could not read input XML input file: "+xmlFile);
42
			return false;
43
		}
44

  
45
		File oriflammsMacroDirectory = new File(BundleUtils.getBundleFile("Oriflamms"), "res");
46
		System.out.println("Ressources files directory: "+oriflammsMacroDirectory);
47
		if (!oriflammsMacroDirectory.exists()) {
48
			System.out.println("Oriflamms macro directory not found: "+oriflammsMacroDirectory);
49
			return false;
50
		}
51
		File xslTokenizer = new File(oriflammsMacroDirectory, "oriflamms-tokenize-words.xsl");
52
		File xslPatchLbInWords = new File(oriflammsMacroDirectory,"oriflamms-patch-words-with-lb.xsl");
53
		File xslMissingMilestones = new File(oriflammsMacroDirectory, "oriflamms-patch-milestones.xsl");
54
		File xslCharactersTokenizer = new File(oriflammsMacroDirectory, "oriflamms-tokenize-chars-1-tag.xsl");
55
		File xslCharactersIdentifier = new File(oriflammsMacroDirectory, "oriflamms-tokenize-chars-2-identify.xsl");
56
		File xslZones = new File(oriflammsMacroDirectory, "oriflamms-convert-transcriptions-orizones.xsl");
57

  
58
		if (!xslTokenizer.exists()  || !xslPatchLbInWords.exists() || !xslMissingMilestones.exists() ||	
59
				!xslCharactersTokenizer.exists() || !xslCharactersIdentifier.exists() || !xslZones.exists()) {
60
			System.out.println("Could not find one of TXM's XSL file : "+
61
					Arrays.asList(xslTokenizer, xslPatchLbInWords, xslMissingMilestones, 
62
					 xslCharactersTokenizer, xslCharactersIdentifier, xslZones));
63

  
64
			System.out.println(Arrays.asList(xslTokenizer.exists(), xslPatchLbInWords.exists(), xslMissingMilestones.exists()
65
			            , xslCharactersTokenizer.exists(), xslCharactersIdentifier.exists(), xslZones.exists()).toString());
66
			            		 return false;
67
		}
68

  
69
		File xmlFileParentDirectory = xmlFile.getParentFile();
70
				String projectName = xmlFile.getName();
71
				if (projectName.indexOf(".") > 0) projectName = projectName.substring(0, projectName.indexOf("."));
72
				File projectDirectory = new File(xmlFileParentDirectory, projectName);
73
		DeleteDir.deleteDirectory(projectDirectory);
74
		if (projectDirectory.exists()) {
75
			System.out.println("Could not delete previous project directory: "+projectDirectory);
76
			 return false;
77
		}
78

  
79
		projectDirectory.mkdir();
80

  
81
		if (!projectDirectory.exists()) {
82
			System.out.println("Could not create project directory: "+projectDirectory);
83
			return false;
84
		}
85

  
86
		System.out.println("Oriflamms project directory: "+projectDirectory);
87

  
88
		File xmlFileCopy = new File(projectDirectory, xmlFile.getName());
89
		System.out.println("Copying XML files: "+xmlFile+" to "+projectDirectory);
90
		CopyXMLFiles cdf = new CopyXMLFiles(xmlFile);
91
		projectDirectory.mkdir();
92
		System.out.println("Files copied: "+cdf.copy(projectDirectory));
93
		if (!xmlFileCopy.exists()) {
94
			System.out.println("Could not copy input XML input file: "+xmlFile+" to "+xmlFileCopy);
95
			 return false;
96
		}
97

  
98
		if (xslFile != null) {
99
			if (xslFile.exists()) {
100
				System.out.println("Applying "+xslFile+" to "+xmlFileCopy+"...");
101
				ApplyXsl2 builder = new ApplyXsl2(xslFile);
102
				if (!builder.process(xmlFileCopy, xmlFileCopy)) {
103
					System.out.println("Failed to process "+xmlFileCopy+" with "+xslFile);
104
					return false;
105
				}
106
			}
107
		}
108

  
109
		File textsDirectory = new File(projectDirectory, "texts");
110
		File imgDirectory = new File(projectDirectory, "img");
111
		File img_linksDirectory = new File(projectDirectory, "img_links");
112
		File ontologiesDirectory = new File(projectDirectory, "ontologies");
113
		File ontologies_linksDirectory = new File(projectDirectory, "ontologies_links");
114
		File zonesDirectory = new File(projectDirectory, "zones");
115
		textsDirectory.mkdir();
116
		imgDirectory.mkdir();
117
		img_linksDirectory.mkdir();
118
		ontologiesDirectory.mkdir();
119
		ontologies_linksDirectory.mkdir();
120
		zonesDirectory.mkdir();
121

  
122
		File xmlWFile = new File(textsDirectory, projectName+"-w.xml");
123
		File xmlWCFile = new File(textsDirectory, projectName+"-c.xml");
124

  
125
		try {
126

  
127
			System.out.println("Applying "+xslMissingMilestones+" to "+xmlWFile+"...");
128
			if (monitor != null) monitor.worked(1, "Applying "+xslMissingMilestones+" to "+xmlWFile+"...");
129
			ApplyXsl2 builder = new ApplyXsl2(xslMissingMilestones);
130
			if (!builder.process(xmlFileCopy, xmlWFile)) {
131
				System.out.println("Failed to process "+xmlWFile+" with "+xslMissingMilestones);
132
				return false;
133
			}
134

  
135
			System.out.println("Applying "+xslTokenizer+" to "+xmlWFile+"...");
136
			if (monitor != null) monitor.worked(15, "Applying "+xslTokenizer+" to "+xmlWFile+"...");
137
			builder = new ApplyXsl2(xslTokenizer);
138
			if (!builder.process(xmlWFile, xmlWFile)) {
139
				System.out.println("Failed to process "+xmlFileCopy+" with "+xslTokenizer);
140
				return false;
141
			}
142

  
143
			System.out.println("Merging words </w><w>");
144
			if (monitor != null) monitor.worked(15, "Merging words </w><w>");
145
			String content = IOUtils.getText(xmlWFile, "UTF-8");
146
			content = content.replaceAll("</w><w[^>]*>", "");
147
			content = content.replaceAll("</w>\\s*(<milestone[^>]*>)?\\s*(<pb[^>]*>)?\\s*(<cb[^>]*>)?\\s*(<lb[^>]*break=\"no\"[^>]*>)\\s*<w[^>]*>", "$1$2$3$4");
148
			try {
149
				PrintWriter writer = IOUtils.getWriter(xmlWFile);
150
						writer.print(content);
151
						writer.close();
152
			} catch (Exception e2) {
153
				System.out.println("Error while fixing words: "+e2);
154
				return false;
155
			}
156

  
157
			System.out.println("Applying "+xslPatchLbInWords+" to "+xmlWFile+"...");
158
			if (monitor != null) monitor.worked(15, "Applying "+xslPatchLbInWords+" to "+xmlWFile+"...");
159
			builder = new ApplyXsl2(xslPatchLbInWords);
160
			if (!builder.process(xmlWFile, xmlWFile)) {
161
				System.out.println("Failed to process "+xmlFileCopy+" with "+xslPatchLbInWords);
162
				return false;
163
			}
164

  
165
			System.out.println("Fixing 'id' and 'n' attributes in "+xmlWFile+"...");
166
			if (monitor != null) monitor.worked(15, "Fixing 'id' and 'n' attributes in "+xmlWFile+"...");
167
			WriteIdAndNAttributes wiana = new WriteIdAndNAttributes(xmlWFile, projectName);
168
			File tmp = new File(xmlWFile.getParentFile(), "tmp_"+xmlWFile.getName());
169
			if (!wiana.process(tmp)) {
170
				System.out.println("Failed to fix id and n attributes with of "+xmlWFile+" file");
171
				return false;
172
			} else {
173
				wiana = null;
174
				xmlWFile.delete();
175
				tmp.renameTo(xmlWFile);
176
				if (tmp.exists()) {
177
					System.out.println("Failed to replace "+xmlWFile+" with result file "+tmp);
178
					return false;
179
				}
180
			}
181

  
182
			System.out.println("Applying "+xslCharactersTokenizer+" to "+xmlWFile+"...");
183
			if (monitor != null) monitor.worked(15, "Applying "+xslCharactersTokenizer+" to "+xmlWFile+"...");
184
			builder = new ApplyXsl2(xslCharactersTokenizer);
185
			if (!builder.process(xmlWFile, xmlWCFile)) {
186
				System.out.println("Failed to process "+xmlWFile+" with "+xslCharactersTokenizer);
187
				return false;
188
			} 
189

  
190
			System.out.println("Applying "+xslCharactersIdentifier+" to "+xmlWCFile+"...");
191
			if (monitor != null) monitor.worked(1, "Applying "+xslCharactersIdentifier+" to "+xmlWCFile+"...");
192
			builder = new ApplyXsl2(xslCharactersIdentifier);
193
			if (!builder.process(xmlWCFile, xmlWCFile)) {
194
				System.out.println("Failed to process "+xmlWCFile+" with "+xslCharactersIdentifier);
195
				return false;
196
			}
197

  
198
			System.out.println("Applying "+xslZones+" to "+xmlWFile+"...");
199
			if (monitor != null) monitor.worked(15, "Applying "+xslZones+" to "+xmlWFile+"...");
200
			builder = new ApplyXsl2(xslZones);
201
			if (!builder.process(xmlWFile, null)) {
202
				System.out.println("Failed to process "+xmlFileCopy+" with "+xslZones);
203
				return false;
204
			}
205

  
206
			if (imagesDirectory.exists() && imagesDirectory.listFiles().length > 0) {
207
				System.out.println("Copying images files from "+imagesDirectory+" to "+imgDirectory+"...");
208
				FileCopy.copyFiles(imagesDirectory, imgDirectory);
209
				File[] files = imgDirectory.listFiles();
210
				if (files != null) System.out.println(""+files.length+" images copied.");
211
			}
212

  
213
			if (createArchive) {
214
				if (monitor != null) monitor.worked(15, "Building Oriflamms binary project... ");
215
				File zipFile = new File(xmlFileParentDirectory, projectName+".oriflamms");
216
				zipFile.delete();
217
				Zip.compress(projectDirectory, zipFile);
218

  
219
				if (zipFile.exists()) {
220
					System.out.println("Project oriflamms exported to "+zipFile);
221
					DeleteDir.deleteDirectory(projectDirectory);
222
				} else {
223
					System.out.println("Fail to export project "+projectDirectory);
224
				}
225
			}
226

  
227
		} catch (Exception e) {
228
			System.out.println("Error while applying a XSL file: "+e);
229
			Log.printStackTrace(e);
230
		}
231
		if (monitor != null) monitor.done();
232
		return true;
233
	}
234
}
0 235

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/UpdateHTMLFileImagePaths.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.IOException;
6

  
7
import javax.xml.stream.XMLStreamException;
8

  
9
import org.txm.importer.StaxIdentityParser;
10

  
11
public class UpdateHTMLFileImagePaths extends StaxIdentityParser {
12
	String newpathprefix;
13

  
14
	public UpdateHTMLFileImagePaths(File htmlFile, String newpathprefix) throws IOException, XMLStreamException {
15
		super(htmlFile);
16
		this.newpathprefix = newpathprefix;
17
		
18
		if (newpathprefix.endsWith("/")) newpathprefix = newpathprefix.substring(0, newpathprefix.length()-1);
19
	}
20
	
21
	String src = null;
22
	protected void processStartElement() throws XMLStreamException, IOException {
23
		src = null;
24
		if (localname.equals("img")) {
25
			//System.out.println("start element img");
26
			int n = parser.getAttributeCount();
27
			for (int i = 0 ; i < n ; i++) {
28
				if (parser.getAttributeLocalName(i).equals("src")) {
29
					src = parser.getAttributeValue(i);
30
					//System.out.println("start element img@src="+src);
31
					break;
32
				}
33
			}
34
			
35
			if (src != null) {
36
				int idx = src.lastIndexOf("/");
37
				String name = src.substring(idx);
38
				src = newpathprefix+name;
39
			}
40
		}
41
		
42
		super.processStartElement();	
43
	}
44
	
45
	protected void writeAttributes() throws XMLStreamException {
46
		for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
47
			if (src != null && "src".equals(parser.getAttributeLocalName(i))) {
48
				writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), src);
49
				src = null;
50
			} else {
51
				writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i));
52
			}
53
		}
54
	}
55
	
56
	public static void main(String[] args) {
57
		File htmlFile = new File("/home/mdecorde/TXM/corpora/graal/HTML/GRAAL/ms-colonne/qgraal_cm_160a.html");
58
		File outfile = new File("/home/mdecorde/TXM/corpora/graal/HTML/GRAAL/ms-colonne/qgraal_cm_160a-o.html");
59
		String prefix = "AAAA";
60
		UpdateHTMLFileImagePaths p;
61
		try {
62
			p = new UpdateHTMLFileImagePaths(htmlFile, prefix);
63
			System.out.println(p.process(outfile));
64
		} catch (IOException e) {
65
			// TODO Auto-generated catch block
66
			e.printStackTrace();
67
		} catch (XMLStreamException e) {
68
			// TODO Auto-generated catch block
69
			e.printStackTrace();
70
		}
71
		
72
	}
73
}
0 74

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/Allographs.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.File;
5
import java.io.IOException;
6
import java.util.HashMap;
7
import java.util.regex.Pattern;
8

  
9
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
10
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
11
import org.txm.searchengine.cqp.corpus.Corpus;
12
import org.txm.searchengine.cqp.corpus.Property;
13
import org.txm.searchengine.cqp.corpus.query.Query;
14
import org.txm.searchengine.cqp.serverException.CqiServerError;
15

  
16
public class Allographs extends OriflammsFunction {
17

  
18
	protected String sign, allograph;
19
	Query query;
20
	private Property sign_property;
21
	private Property allograph_auto_property;
22
	private Property allograph_expert_property;
23

  
24
	Pattern signRegExp, characterRegExp;
25
	
26
	public Allographs(Corpus corpus, File tsvFile, String sign, Query query, String allograph) throws CqiClientException, IOException, CqiServerError {
27
		super(corpus, tsvFile);
28
		
29
		this.query = query;
30
		this.allograph = allograph;
31
		this.sign = sign;
32
	}
33

  
34
	public boolean process() throws CqiClientException, IOException, CqiServerError {
35
		String[] props = {"sign", "allograph-expert", "allograph-auto", "characters"};
36
		System.out.println("Dénombrement des allographes '"+allograph+"' de signe '"+sign+"' dans le contexte '"+query+"'");
37

  
38
		for (String prop : props) {
39
			if (corpus.getProperty(prop) == null) {
40
				System.out.println("Le corpus '"+corpus+"' n'a pas de propriété de mot '"+prop+"'. Abandon.");
41
				return false;
42
			}
43
		}
44

  
45
		sign_property = corpus.getProperty("sign");
46
		allograph_expert_property = corpus.getProperty("allograph-expert");
47
		allograph_auto_property = corpus.getProperty("allograph-auto");
48
		signRegExp = Pattern.compile(sign);
49
		characterRegExp = Pattern.compile(allograph);
50
		
51
		return super.process();
52
	}
53
	
54
	Object[] getInfos(int from ,int length) throws UnexpectedAnswerException, IOException, CqiServerError {
55

  
56
		int[] positions = new int[length];
57
		for (int i = 0 ; i < length ; i++) positions[i] = from++;
58

  
59
		String[] signs = CQI.cpos2Str(sign_property.getQualifiedName(), positions);
60
		String[]  allographs_expert = CQI.cpos2Str(allograph_expert_property.getQualifiedName(), positions);
61
		String[]  allographs_auto = CQI.cpos2Str(allograph_auto_property.getQualifiedName(), positions);
62
		String[]  characters = CQI.cpos2Str(charactersP.getQualifiedName(), positions);
63

  
64
		Object[] rez = {signs,allographs_expert,allographs_auto, characters};
65
		return rez;
66
	}
67

  
68
	boolean processLine(String text_id, String pb_id, String cb_id, String lb_id, int length, Object[] infos) {
69

  
70
		String[] signs = (String[]) infos[0];
71
		String[] allographs_expert = (String[]) infos[1];
72
		String[] allographs_auto = (String[]) infos[2];
73
		String[] characters = (String[]) infos[3];
74
		HashMap<String, HashMap<String, Integer>> count_signs = new HashMap<String, HashMap<String, Integer>>();
75

  
76
		for (int i = 0 ; i < length ; i++) {
77
			String s = signs[i];
78
			String c = characters[i];
79
			
80
			if (signRegExp.matcher(s).find() && characterRegExp.matcher(c).find()) {
81
				if (!count_signs.containsKey(s)) count_signs.put(s, new HashMap<String, Integer>());
82
				HashMap<String, Integer> counts = count_signs.get(s);
83

  
84
				if (!counts.containsKey(c)) counts.put(c, 0);
85
				counts.put(c, counts.get(c) + 1);
86
			}
87
		}
88

  
89
		for (String s : count_signs.keySet()) {
90
			HashMap<String, Integer> counts = count_signs.get(s);
91
			int sum = 0;
92
			for (Integer i : counts.values()) sum += i;
93
			
94
			for (String c : counts.keySet()) {
95
				writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\t"+s+"\t"+c+"\t"+counts.get(c)+"\t"+((float)counts.get(c)/(float)sum));
96
			}
97
		}
98
		return true;
99
	}
100
}
0 101

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/CopyXMLFiles.java (revision 476)
1
package org.txm.oriflamms.functions;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.io.InputStream;
6
import java.util.ArrayList;
7

  
8
import javax.xml.stream.XMLInputFactory;
9
import javax.xml.stream.XMLResolver;
10
import javax.xml.stream.XMLStreamConstants;
11
import javax.xml.stream.XMLStreamException;
12
import javax.xml.stream.XMLStreamReader;
13

  
14
import org.txm.utils.io.FileCopy;
15

  
16
public class CopyXMLFiles {
17
	File xmlFile;
18
	File outDir;
19
	ArrayList<File> dtdFiles = new ArrayList<File>();
20
	
21
	public CopyXMLFiles(File xmlFile) {
22
		this.xmlFile = xmlFile;
23
	}
24
	
25
	public ArrayList<File> copy(File outDir) throws IOException, XMLStreamException {
26
		XMLInputFactory factory;
27
		XMLStreamReader parser;
28
		InputStream inputData = xmlFile.toURI().toURL().openStream();
29
		factory = XMLInputFactory.newInstance();
30
		factory.setXMLResolver(new XMLResolver() {
31
			@Override
32
			public Object resolveEntity(String publicID, String systemID,
33
					String baseURI, String namespace) throws XMLStreamException {
34
				File srcFile = new File(xmlFile.getParentFile(), systemID);
35
				dtdFiles.add(srcFile);
36
				try {
37
					return srcFile.toURI().toURL().openStream();
38
				} catch (IOException e) {
39
					// TODO Auto-generated catch block
40
					e.printStackTrace();
41
					return new ArrayList<>();
42
				}
43
			}
44
		});
45
	
46
		parser = factory.createXMLStreamReader(inputData);
47
		
48
		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
49
			
50
		}
51
		
52
		dtdFiles.add(xmlFile);
53
		for (File dtd : dtdFiles) {
54
			File cpy = new File(outDir, dtd.getName());
55
			FileCopy.copy(dtd, cpy);
56
		}
57
		return dtdFiles;
58
	}
59
	
60
	public static void main(String[] args) throws IOException, XMLStreamException {
61
		File xmlFile = new File("/home/mdecorde/Téléchargements/Inscriptions1.xml");
62
		File outDir = new File("/home/mdecorde/Téléchargements/test");
63
		outDir.mkdir();
64
		CopyXMLFiles cdf = new CopyXMLFiles(xmlFile);
65
		System.out.println(cdf.copy(outDir));
66
	}
67
}
0 68

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/Project2XTZ.java (revision 476)
1
package org.txm.oriflamms.functions;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.util.HashMap;
6

  
7
import javax.xml.parsers.ParserConfigurationException;
8
import javax.xml.stream.XMLStreamException;
9
import javax.xml.transform.TransformerException;
10

  
11
import org.txm.Toolbox;
12
import org.txm.importer.ApplyXsl2;
13
import org.txm.objects.BaseParameters;
14
import org.txm.utils.AsciiUtils;
15
import org.txm.utils.BundleUtils;
16
import org.txm.utils.DeleteDir;
17
import org.txm.utils.io.FileCopy;
18
import org.txm.utils.xml.UpdateXSLParameters;
19
import org.xml.sax.SAXException;
20

  
21
public class Project2XTZ {
22
	File projectDirectory;
23
	public Project2XTZ(File projectDirectory) {
24
		this.projectDirectory = projectDirectory;
25
	}
26

  
27
	public boolean process() throws IOException, TransformerException, ParserConfigurationException, SAXException, XMLStreamException {
28
		File oriflammsMacroDirectory = new File(BundleUtils.getBundleFile("Oriflamms"), "res");
29
		System.out.println("Ressources files directory: "+oriflammsMacroDirectory);
30
		if (!oriflammsMacroDirectory.exists()) {
31
			System.out.println("Oriflamms macro directory not found: "+oriflammsMacroDirectory+". Aborting");
32
			return false;
33
		}
34
		File wFrontXSLFile = new File(oriflammsMacroDirectory, "txm-front-teioriflammsw-xtz.xsl");
35
		if (!wFrontXSLFile.exists()) {
36
			System.out.println("Oriflamms to XML-XTZ front XSL file is missing: "+wFrontXSLFile+". Aborting");
37
			return false;
38
		}
39
		File cFrontXSLFile = new File(oriflammsMacroDirectory, "txm-front-teioriflammsc-xtz.xsl");
40
		if (!cFrontXSLFile.exists()) {
41
			System.out.println("Oriflamms to XML-XTZ front XSL file is missing: "+cFrontXSLFile+". Aborting");
42
			return false;
43
		}
44

  
45
		File cSplitXSLFile = new File(oriflammsMacroDirectory, "1-oriflamms-split-surfaces.xsl");
46
		if (!cSplitXSLFile.exists()) {
47
			System.out.println("Oriflamms to XML-XTZ split XSL file is missing: "+cSplitXSLFile+". Aborting");
48
			return false;
49
		}
50

  
51
		File editionXSLFile1 = new File(oriflammsMacroDirectory, "1-default-html.xsl");
52
		if (!editionXSLFile1.exists()) {
53
			System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile1+".");
54
			return false;
55
		}
56
		File editionXSLFile2 = new File(oriflammsMacroDirectory, "2-default-pager.xsl");
57
		if (!editionXSLFile2.exists()) {
58
			System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile2+".");
59
			return false;
60
		}
61
		File editionXSLFile3 = new File(oriflammsMacroDirectory, "3-facsimile-pager.xsl");
62
		if (!editionXSLFile3.exists()) {
63
			System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile3+".");
64
			return false;
65
		}
66
		File cssDirectory = new File(oriflammsMacroDirectory, "css");
67
		if (!cssDirectory.exists()) {
68
			System.out.println("Oriflamms css directory is missing: "+cssDirectory+".");
69
			return false;
70
		}
71
		File jsDirectory = new File(oriflammsMacroDirectory, "js");
72
		if (!jsDirectory.exists()) {
73
			System.out.println("Oriflamms js directory is missing: "+jsDirectory+".");
74
			return false;
75
		}
76
		File imagesDirectory = new File(oriflammsMacroDirectory, "images");
77
		if (!imagesDirectory.exists()) {
78
			System.out.println("Oriflamms images directory is missing: "+imagesDirectory+".");
79
			return false;
80
		}
81

  
82
		File textDirectory = new File(projectDirectory, "texts");
83

  
84
		File txmDirectory = new File(projectDirectory, "txm");
85
		if (txmDirectory.exists()) DeleteDir.deleteDirectory(txmDirectory);
86
		txmDirectory.mkdir();
87
		if (!txmDirectory.exists()) {
88
			System.out.println("Error: the 'txm' directory could not be created: "+txmDirectory+". Aborting.");
89
			return false;
90
		}
91

  
92
		File wDirectory = null;
93
				File cDirectory = null;
94
				File wFile = null;
95
				File cFile = null;
96

  
97
				File[] xmlFiles = textDirectory.listFiles();
98
				if (xmlFiles == null) return false;
99

  
100
		for (File xmlFile : xmlFiles) {
101
			if (xmlFile.getName().endsWith("-w.xml")) {
102
				String name = xmlFile.getName().substring(0, xmlFile.getName().indexOf(".xml"));
103
						wDirectory = new File(txmDirectory, name);
104
				wFile = xmlFile;
105
			} else if (xmlFile.getName().endsWith("-c.xml")) {
106
				String name = xmlFile.getName().substring(0, xmlFile.getName().indexOf(".xml"));
107
						cDirectory = new File(txmDirectory, name);
108
				cFile = xmlFile;
109
			}
110
		}
111

  
112
		if (wDirectory == null) {
113
			System.out.println("The Word corpus XML file was not found in "+textDirectory+". Aborting.");
114
			return false;
115
		}
116
		if (cDirectory == null) {
117
			System.out.println("The Letter corpus XML file was not found in "+textDirectory+". Aborting.");
118
			return false;
119
		}
120

  
121
		//Create XML-XTZ source directories
122
		wDirectory.mkdirs();
123
		cDirectory.mkdirs();
124

  
125
		// Copy XML files and split character XML file
126
		FileCopy.copy(wFile, new File(wDirectory, wFile.getName()));
127

  
128
		ApplyXsl2 builder = new ApplyXsl2(cSplitXSLFile);
129
		HashMap<String, String> xslParams = new HashMap<String, String>();
130
		xslParams.put("output-directory", cDirectory.getAbsoluteFile().toURI().toString());
131
		for (String name : xslParams.keySet()) builder.setParam(name, xslParams.get(name));
132
		if (!builder.process(cFile, null)) {
133
			System.out.println("Error: fail to split "+cFile);
134
			return false;
135
		}
136
		if (!ApplyXsl2.processImportSources(cFrontXSLFile, ApplyXsl2.listFiles(cDirectory), new HashMap<String, Object>())) {
137
			System.out.println("Error: fail to apply front XSL with "+cDirectory+" files");
138
			return false;
139
		}
140
		// INJECT ontologies 
141
		System.out.println("Injecting ontologies...");
142
		for (File f : cDirectory.listFiles()) {
143
			if (f.getName().startsWith(cDirectory.getName())) {
144
				OntologiesProjection cp = new OntologiesProjection(f, projectDirectory);
145
				File outputFile = new File(cDirectory, "temp.xml");
146
				cp.process(outputFile);
147
				if (outputFile.exists() && f.delete() && outputFile.renameTo(f)) {
148

  
149
				} else {
150
					System.out.println("Failed to replace XML file "+f+" with "+outputFile);
151
					return false;
152
				}
153
			}
154
		}
155

  
156
		// INJECT word's coordinates
157
		System.out.println("Injecting coordinates...");
158
		File xmlFile = new File(wDirectory, wFile.getName());
159
		File img_links_directory = new File(projectDirectory, "img_links");
160
		File zones_directory = new File(projectDirectory, "zones");
161
		File outputFile = new File(wDirectory, "temp.xml");
162
		CoordsProjection cp = new CoordsProjection(xmlFile, img_links_directory, zones_directory, "w");
163
		if (cp.process(outputFile)) {
164
			if (outputFile.exists() && xmlFile.delete() && outputFile.renameTo(xmlFile)) {
165

  
166
			} else {
167
				System.out.println("Failed to replace XML file "+xmlFile+" with "+outputFile);
168
				return false;
169
			}
170
		} else {
171
			System.out.println("Coordinates injection failed. Aborting");
172
			return false;
173
		}
174

  
175
		// Create XSL directories
176

  
177
		File wXSLDirectory = new File(wDirectory, "xsl");
178
		File cXSLDirectory = new File(cDirectory, "xsl");
179

  
180
		//File cSplitXSLDirectory = new File(cXSLDirectory, "1-split-merge")
181
		//cSplitXSLDirectory.mkdirs()
182

  
183
		File wFrontXSLDirectory = new File(wXSLDirectory, "2-front");
184
		//File cFrontXSLDirectory = new File(cXSLDirectory, "2-front")
185
		wFrontXSLDirectory.mkdirs();
186
		//cFrontXSLDirectory.mkdirs()
187

  
188
		// Copy Split XSL file
189
		//File newCSplitXSLFile = new File(cSplitXSLDirectory, cSplitXSLFile.getName())
190
		//FileCopy.copy(cSplitXSLFile, newCSplitXSLFile);
191

  
192
		// Copy Front XSL file
193
		File newWFrontXSLFile = new File(wFrontXSLDirectory, wFrontXSLFile.getName());
194
		//File newCFrontXSLFile = new File(cFrontXSLDirectory, cFrontXSLFile.getName())
195
		FileCopy.copy(wFrontXSLFile, newWFrontXSLFile);
196
		//FileCopy.copy(cFrontXSLFile, newCFrontXSLFile);
197

  
198
		// Copy edition XSL file
199
		File wEditionXSLDirectory = new File(wXSLDirectory, "4-edition");
200
		File cEditionXSLDirectory = new File(cXSLDirectory, "4-edition");
201
		wEditionXSLDirectory.mkdirs();
202
		cEditionXSLDirectory.mkdirs();
203
		File newWEditionXSLFile1 = new File(wEditionXSLDirectory, editionXSLFile1.getName());
204
		File newCEditionXSLFile1 = new File(cEditionXSLDirectory, editionXSLFile1.getName());
205
		FileCopy.copy(editionXSLFile1, newWEditionXSLFile1);
206
		FileCopy.copy(editionXSLFile1, newCEditionXSLFile1);
207
		File newWEditionXSLFile2 = new File(wEditionXSLDirectory, editionXSLFile2.getName());
208
		File newCEditionXSLFile2 = new File(cEditionXSLDirectory, editionXSLFile2.getName());
209
		FileCopy.copy(editionXSLFile2, newWEditionXSLFile2);
210
		FileCopy.copy(editionXSLFile2, newCEditionXSLFile2);
211
		File newWEditionXSLFile3 = new File(wEditionXSLDirectory, editionXSLFile3.getName());
212
		File newCEditionXSLFile3 = new File(cEditionXSLDirectory, editionXSLFile3.getName());
213
		FileCopy.copy(editionXSLFile3, newWEditionXSLFile3);
214
		FileCopy.copy(editionXSLFile3, newCEditionXSLFile3);
215

  
216
		//patch XSL files with image directory path and set the 'word-element' xsl param
217
		File projectImgDirectory = new File(projectDirectory, "img");
218
		HashMap<String, String> parameters = new HashMap<String, String>();
219
		parameters.put("image-directory", projectImgDirectory.getAbsolutePath());
220
		parameters.put("word-element", "w");
221
				System.out.println("update "+newWEditionXSLFile3+" with "+parameters);
222
				UpdateXSLParameters p = new UpdateXSLParameters(newWEditionXSLFile3);
223
		if (!p.process(parameters)) {
224
			System.out.println("Fail to patch "+newWEditionXSLFile3);
225
			return false;
226
		}
227
		parameters = new HashMap<String, String>();
228
		parameters.put("image-directory", projectImgDirectory.getAbsolutePath());
229
		parameters.put("word-element", "c");
230
				System.out.println("update "+newCEditionXSLFile3+" with "+parameters);
231
				UpdateXSLParameters p2 = new UpdateXSLParameters(newCEditionXSLFile3);
232
		if (!p2.process(parameters)) {
233
			System.out.println("Fail to patch "+newCEditionXSLFile3);
234
			return false;
235
		}
236

  
237
		// Copy js and images directories
238
		File wCSSDirectory =  new File(wDirectory, cssDirectory.getName());
239
		wCSSDirectory.mkdir();
240
		File wJsDirectory =  new File(wDirectory, jsDirectory.getName());
241
		wJsDirectory.mkdir();
242
		File wImagesDirectory =  new File(wDirectory, imagesDirectory.getName());
243
		wImagesDirectory.mkdir();
244
		File cCSSDirectory =  new File(cDirectory, cssDirectory.getName());
245
		cCSSDirectory.mkdir();
246
		File cJsDirectory =  new File(cDirectory, jsDirectory.getName());
247
		cJsDirectory.mkdir();
248
		File cImagesDirectory =  new File(cDirectory, imagesDirectory.getName());
249
		cImagesDirectory.mkdir();
250
		FileCopy.copyFiles(cssDirectory, wCSSDirectory);
251
		FileCopy.copyFiles(jsDirectory, wJsDirectory);
252
		FileCopy.copyFiles(imagesDirectory, wImagesDirectory);
253
		FileCopy.copyFiles(cssDirectory, cCSSDirectory);
254
		FileCopy.copyFiles(jsDirectory, cJsDirectory);
255
		FileCopy.copyFiles(imagesDirectory, cImagesDirectory);
256

  
257
		// Prepare import.xml files
258
		File wImportXMLFile = new File(wDirectory, "import.xml");
259
		File cImportXMLFile = new File(cDirectory, "import.xml");
260

  
261
		BaseParameters.createEmptyParams(wImportXMLFile, AsciiUtils.buildId(wDirectory.getName()).toUpperCase());
262
		BaseParameters wParams = new BaseParameters(wImportXMLFile);
263
		wParams.load();
264
		wParams.setSkipTokenization(true);
265
		wParams.setWordElement("w");
266
		wParams.setDoAnnotation(false);
267
		wParams.setAnnotationLang("fr");
268
		wParams.setWordsPerPage(9999999);
269
		wParams.setTextualPlans("", "note", "teiHeader,facsimile","pb,cb,lb");
270
		wParams.getCorpusElement().setAttribute("font", "Junicode");
271
		wParams.getEditionsElement(wParams.getCorpusElement()).setAttribute("default", "default,facsimile");
272
		wParams.getCorpusElement().setAttribute("name", AsciiUtils.buildId(wDirectory.getName()).toUpperCase());
273
		
274

  
275
		BaseParameters.createEmptyParams(cImportXMLFile, AsciiUtils.buildId(cDirectory.getName()).toUpperCase());
276
		BaseParameters cParams = new BaseParameters(cImportXMLFile);
277
		cParams.load();
278
		cParams.setSkipTokenization(true);
279
		cParams.setWordElement("c");
280
		cParams.setDoAnnotation(false);
281
		cParams.setAnnotationLang("fr");
282
		cParams.setWordsPerPage(9999999);
283
		cParams.setTextualPlans("", "note", "teiHeader,facsimile","pb,cb,lb");
284
		cParams.getCorpusElement().setAttribute("font", "Junicode");
285
		cParams.getEditionsElement(cParams.getCorpusElement()).setAttribute("default", "default,facsimile");
286
		cParams.getCorpusElement().setAttribute("name", AsciiUtils.buildId(cDirectory.getName()).toUpperCase());
287
		
288
		return cParams.save() && wParams.save();
289
	}
290
}
0 291

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/OntologiesProjection.java (revision 476)
1
package org.txm.oriflamms.functions;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.io.Serializable;
6
import java.util.ArrayList;
7
import java.util.Arrays;
8
import java.util.HashMap;
9
import java.util.List;
10
import java.util.regex.Pattern;
11

  
12
import javax.xml.stream.XMLStreamException;
13

  
14
import org.txm.importer.StaxIdentityParser;
15
import org.txm.importer.StaxParser;
16

  
17
class OntologiesProjection extends StaxIdentityParser {
18

  
19
	File xmlFile;
20

  
21
	String wordTag;
22
	String textname;
23
	String milestone;
24
	String group;
25

  
26
	HashMap<String, String[]> links = new HashMap<String, String[]>();
27
	HashMap<String, List<Serializable>> prefixDefsPatterns = new HashMap<String, List<Serializable>>();
28
	HashMap<String, HashMap<String, HashMap<String, String>>> ggly_ontologies = new HashMap<String, HashMap<String, HashMap<String, String>>>();
29
	HashMap<String, HashMap<String, String>> ggly_ontologies_unicodechars = new HashMap<String, HashMap<String, String>>();
30
	HashMap<String, HashMap> lgly_ontologies = new HashMap<String, HashMap>();
31

  
32
	String current_ontology_link_file_name = "";
33

  
34
	File ontologies_links_directory;
35

  
36
	public OntologiesProjection(File xmlFile, File corpusDirectory) throws IOException, XMLStreamException {
37
		super(xmlFile);
38

  
39
		this.xmlFile = xmlFile;
40
		this.ontologies_links_directory = new File(corpusDirectory, "ontologies_links");
41

  
42
		textname = xmlFile.getName();
43
		int idx = textname.lastIndexOf(".xml");
44
		if (idx > 0) textname = textname.substring(0, idx);
45
		textname = textname.replaceAll("-c", "");
46

  
47
		this.wordTag = "c";
48
	}
49

  
50
	public boolean buildGGlyOntology(String prefix) {
51
		String path = (String)prefixDefsPatterns.get(prefix).get(1);
52
		int idx = path.indexOf("#");
53
		if (idx > 0) path = path.substring(0, idx);
54

  
55
		File ggly_ontology_file = new File(xmlFile.getParentFile(), "../"+path);
56
		if (!ggly_ontology_file.exists()) {
57
			System.out.println("WARNING: cannot found global ontology file: "+ggly_ontology_file);
58
			return false;
59
		}
60
		final HashMap<String, HashMap<String, String>> global_ontologies = new HashMap<String, HashMap<String, String>>();
61
		final HashMap<String, String> unicode_global_ontologies = new HashMap<String, String>();
62
		StaxParser pontologies = new StaxParser(ggly_ontology_file) {
63
			boolean startChar = false, startLocalName = false, startValue = false, startMapping = false;
64
			String unicodeChar, standardizedChar, subtype, type;
65
			String id, charLocalName, charValue;
66
			StringBuilder c = new StringBuilder();
67

  
68
			public void processStartElement() {
69
				if (localname.equals("char")) {
70
					// get id
71
					for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
72
						if (parser.getAttributeLocalName(i).equals("id")) {
73
							id = parser.getAttributeValue(i);
74
							break;
75
						}
76
					}
77
					startChar = true;
78
					c.setLength(0);
79
				} else if (localname.equals("mapping")) {
80
					subtype = "";
81
					type = "";
82
					for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
83
						if (parser.getAttributeLocalName(i).equals("subtype")) {
84
							subtype = parser.getAttributeValue(i);
85
						} else if (parser.getAttributeLocalName(i).equals("type")) {
86
							type = parser.getAttributeValue(i);
87
						}
88
					}
89
					startMapping = true;
90
					c.setLength(0);
91
				} else if (localname.equals("localName")) {
92
					startLocalName = true;
93
					c.setLength(0);
94
				} else if (localname.equals("value")) {
95
					startLocalName = true;
96
					c.setLength(0);
97
				}
98
			}
99

  
100
			public void processCharacters() {
101
				if (startMapping) c.append(parser.getText());
102
				else if (startLocalName) c.append(parser.getText());
103
				else if (startValue) c.append(parser.getText());
104
			}
105

  
106
			public void processEndElement() {
107
				if (localname.equals("char")) {
108
					startChar = false;
109
					HashMap<String, String> h = new HashMap<String, String>();
110
					h.put("standard",standardizedChar);
111
					h.put("unicode", unicodeChar);
112
					h.put("value",charValue);
113
					h.put("localname",charLocalName);
114
					global_ontologies.put(id, h);
115
					unicode_global_ontologies.put(unicodeChar, standardizedChar);
116
				} else if (localname.equals("mapping")) {
117
					if (subtype.equals("Unicode")) {
118
						unicodeChar = c.toString().trim();
119
					} else if (type.equals("standardized")) {
120
						standardizedChar = c.toString().trim();
121
					}
122
					startMapping = false;
123
				} else if (localname.equals("localName")) {
124
					charLocalName = c.toString().trim();
125
					startLocalName = false;
126
				} else if (localname.equals("value")) {
127
					charValue = c.toString().trim();
128
					startValue = false;
129
				}
130
			}
131
				};
132
		pontologies.process();
133
		ggly_ontologies.put(prefix, global_ontologies);
134
		ggly_ontologies_unicodechars.put(prefix, unicode_global_ontologies);
135
		//System.out.println(ggly_ontologies
136
		return true;
137
	}
138

  
139
	public boolean buildLGlyOntology(String prefix) {
140
		String path = (String)prefixDefsPatterns.get(prefix).get(1);
141
		int idx = path.indexOf("#");
142
		if (idx > 0) path = path.substring(0, idx);
143

  
144
		File lgly_ontology_file = new File(ontologies_links_directory, textname+"-ontolinks.xml"); // add "../" because we are in txm/<corpus>-c directory
145
		if (!lgly_ontology_file.exists()) {
146
			System.out.println("WARNING: cannot find Local ontology file "+lgly_ontology_file);
147
			return false;
148
		}
149

  
150
		final HashMap<String, HashMap> local_ontologies = new HashMap<String, HashMap>();
151
		StaxParser pontologies = new StaxParser(lgly_ontology_file) {
152
					boolean startNote = false;
153
					String id, change, parent;
154
					StringBuilder c = new StringBuilder();
155
					HashMap<String, String> glyph = new HashMap<String, String>();
156

  
157
					public void processStartElement() {
158
						if (localname.equals("glyph")) {
159
							// get id
160
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
161
								change = "";
162
								if (parser.getAttributeLocalName(i).equals("id")) {
163
									id = parser.getAttributeValue(i);
164
								} else if (parser.getAttributeLocalName(i).equals("change")) {
165
									change = parser.getAttributeValue(i);
166
								}
167
							}
168
							glyph = new HashMap<String, String>();
169
							glyph.put("change",change);
170
							glyph.put("id", id); // new glyph
171
							parent = null;
172
						} else if (localname.equals("note")) {
173
							startNote = true;
174
							c.setLength(0);
175
						}
176
					}
177

  
178
					public void processCharacters() {
179
						if (startNote) c.append(parser.getText());
180
					}
181

  
182
					public void processEndElement() {
183
						if (localname.equals("char")) {
184
							if (parent != null)
185
								glyph.put("parent", glyph.get(parent));
186
							local_ontologies.put(id, glyph);
187
						} else if (localname.equals("note")) {
188
							parent = c.toString().trim();
189
							startNote = false;
190
						}
191
					}
192
				};
193
		pontologies.process();
194
		lgly_ontologies.put(prefix, local_ontologies);
195

  
196
		return true;
197
	}
198

  
199
	public void loadOntologyLinkFile(String name) {
200
		links = new HashMap<>();
201
		prefixDefsPatterns = new HashMap<>();
202
		prefixDefsPatterns.put("ggly", Arrays.asList(Pattern.compile("([a-z]+)"), "../../charDecl.xml#$1"));
203
		prefixDefsPatterns.put("lgly", Arrays.asList(Pattern.compile("([a-z]+)"), "../ontologies/"+textname+".xml#$1"));
204
		prefixDefsPatterns.put("txt", Arrays.asList(Pattern.compile("([a-z]+)"), "../texts/"+textname+".xml#$1"));
205
			
206
		lgly_ontologies = new HashMap();
207
		ggly_ontologies = new HashMap();
208
		File ontology_link_file = new File(ontologies_links_directory, name);
209
		if (!ontology_link_file.exists()) {
210
			System.out.println("WARNING: no ontology link file: "+ontology_link_file);
211
			return;
212
		}
213

  
214
		StaxParser pLinks = new StaxParser(ontology_link_file) {
215
					public void processStartElement() {
216
						if (localname.equals("linkGrp")) {
217
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
218
								if (parser.getAttributeLocalName(i).equals("type")) {
219
									group = parser.getAttributeValue(i);
220
									break;
221
								}
222
							}
223
						} else if (localname.equals("prefixDef")) {
224
							String ident = null, matchPattern = null, replacementPattern = null;
225
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
226
								if (parser.getAttributeLocalName(i).equals("ident")) {
227
									ident = parser.getAttributeValue(i);
228
								} else if (parser.getAttributeLocalName(i).equals("matchPattern")) {
229
									matchPattern = parser.getAttributeValue(i);
230
								} else if (parser.getAttributeLocalName(i).equals("replacementPattern")) {
231
									replacementPattern = parser.getAttributeValue(i);
232
								}
233
							}
234
							if (ident != null && matchPattern != null && replacementPattern != null && !ident.equals("txt")) {
235
								prefixDefsPatterns.put(ident, Arrays.asList(Pattern.compile(matchPattern), replacementPattern));
236
								OntologiesProjection.this.getOntology(ident);
237
							}
238
						} else if (localname.equals("link")) {
239
							String target = "";
240

  
241
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
242
								if (parser.getAttributeLocalName(i).equals("target")) {
243
									target = parser.getAttributeValue(i);
244
									break;
245
								}
246
							}
247

  
248
							String[] split = target.split(" ", 2); // first part word id next part are the ontologies id
249
							links.put(split[0].substring(4), split[1].split(" "));
250
						}
251
					}
252
				};
253
		pLinks.process();
254
		//		System.out.println("links size: "+links.size()
255
		//		System.out.println("ggly_ontologies size: "+ggly_ontologies.size()
256
		//		System.out.println("lgly_ontologies size: "+lgly_ontologies.size()
257
	}
258

  
259
	public HashMap<String, HashMap<String, String>> getOntology(String prefix) {
260
		if (prefix.startsWith("ggly")) {
261
			if (!ggly_ontologies.containsKey(prefix)) buildGGlyOntology(prefix);
262
			return ggly_ontologies.get(prefix);
263
		} else if (prefix.startsWith("lgly")) {
264
			if (!lgly_ontologies.containsKey(prefix)) buildLGlyOntology(prefix);
265
			return lgly_ontologies.get(prefix);
266
		}
267
		return null;
268
	}
269

  
270
	public void processStartElement() throws XMLStreamException, IOException {
271
		super.processStartElement();
272
		if (localname.equals("milestone")) {
273
			String id = "";
274
			String unit= "";
275
			for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
276
				if (parser.getAttributeLocalName(i).equals("id")) {
277
					id = parser.getAttributeValue(i);
278
				} else if (parser.getAttributeLocalName(i).equals("unit")) {
279
					unit = parser.getAttributeValue(i);
280
				}
281
			}
282

  
283
			if (unit.equals("surface")) {
284
				milestone = id;
285
			}
286
		} else if (localname.equals(wordTag)) {
287
			String id = "";
288
			String characters = "";
289
			for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
290
				if (parser.getAttributeLocalName(i).equals("id")) {
291
					id = parser.getAttributeValue(i);
292
				} else if (parser.getAttributeLocalName(i).equals("characters")) {
293
					characters = parser.getAttributeValue(i);
294
				}
295
			}
296

  
297
			String ontology_link_file_name = textname+"-ontolinks.xml";
298
			if (!current_ontology_link_file_name.equals(ontology_link_file_name)) { // rebuild hashmaps
299
				current_ontology_link_file_name = ontology_link_file_name;
300
				loadOntologyLinkFile(ontology_link_file_name);
301
				getOntology("ggly");
302
			}
303

  
304
			String sign = null, allographExpert = null, allographAutomatic = null; // default value is attribute characters
305

  
306
			//AUTO ALLOGRAPH
307
			if (links.containsKey(id))
308
				for (String link : links.get(id)) { // automatic allograph loop
309
					if (link.startsWith("lgly")) {
310
						int idx = link.indexOf(":");
311
						link = link.substring(idx+1);
312
						if (link.startsWith("auto_")) { // automatic lgly
313
							if (allographAutomatic == null) allographAutomatic = link.substring(5);
314
							else if (allographAutomatic.length()+5 < link.length()) allographAutomatic = link.substring(5);
315
						} else { // manual lgly
316

  
317
						}
318
					}
319
				}
320
			if (allographAutomatic == null) allographAutomatic = characters;
321

  
322
			//EXPERT ALLOGRAPH
323
			if (links.containsKey(id))
324
				for (String link : links.get(id)) { // expert allograph loop, try to find a ggly entity
325
					//getOntology("ggly")
326
					if (link.startsWith("ggly")) {
327
						int idx = link.indexOf(":");
328
						String prefix = link.substring(0, idx);
329
						link = link.substring(idx+1);
330

  
331
						HashMap<String, HashMap<String, String>> onto = getOntology(prefix);
332
						if (onto != null) {
333
							HashMap<String, String> charOnto = onto.get(link);
334
							if (charOnto != null) {
335
								String localname = charOnto.get("localname");
336
								String value =  charOnto.get("value");
337
								if ("entity".equals(localname)) {
338
									allographExpert = value;
339
								}
340
							}
341
						}
342
					}
343
				}
344
			if (allographExpert == null)
345
				if (links.containsKey(id))
346
					for (String link : links.get(id)) { // expert allograph loop, try to find the longest non-autolgly entity
347
						if (link.startsWith("lgly")) {
348
							int idx = link.indexOf(":");
349
							link = link.substring(idx+1);
350
							if (!link.startsWith("auto_")) { // non automatic lgly
351
								//System.out.println("link= "+link
352
								if (allographExpert == null) allographExpert = link;
353
								else if (allographExpert.length()+5 < link.length()) allographExpert = link;
354
							}
355
						}
356
					}
357
			if (allographExpert == null) allographExpert = allographAutomatic;
358

  
359
			//SIGN
360
			if (sign == null)
361
				if (links.containsKey(id))
362
					for (String link : links.get(id)) { // expert allograph loop, try to find the shortest ggly entity
363
						//getOntology("ggly")
364
						if (link.startsWith("ggly")) {
365
							int idx = link.indexOf(":");
366
							String prefix = link.substring(0, idx);
367
							link = link.substring(idx+1);
368

  
369
							HashMap<String, HashMap<String, String>> onto = getOntology(prefix);
370
							if (onto != null) {
371
								HashMap<String, String> charOnto = onto.get(link);
372
								if (charOnto != null) {
373
									sign = charOnto.get("standard");
374
								}
375
							}
376
						}
377
					}
378
			if (sign == null)
379
				if (links.containsKey(id))
380
					for (String link : links.get(id)) { // sign loop, try to find the shortest non-autolgly entity
381
						if (link.startsWith("lgly")) {
382
							int idx = link.indexOf(":");
383
							link = link.substring(idx+1);
384
							if (!link.startsWith("auto_")) { // non automatic lgly
385
								if (sign == null) sign = link;
386
								else if (sign.length()+5 > link.length()) sign = link;
387
							}
388
						}
389
					}
390
			if (sign == null) {
391
				for (HashMap<String, String> chars : ggly_ontologies_unicodechars.values()) {
392
					//HashMap<String, String> chars = (HashMap<String, String>)ggly.get(1);
393
					if (chars.containsKey(characters)) sign = chars.get(characters);
394
				}
395
			}
396
			if (sign == null) sign = characters.toLowerCase();
397

  
398
			try {
399
				writer.writeAttribute("sign", sign);
400
				writer.writeAttribute("allograph-expert", allographExpert);
401
				writer.writeAttribute("allograph-auto", allographAutomatic);
402
			} catch (XMLStreamException e) {
403
				// TODO Auto-generated catch block
404
				e.printStackTrace();
405
			}
406
		}
407
	}
408

  
409
	public static void main(String[] args) {
410
		File corpusDirectory = new File("/home/mdecorde/TEMP/testori/qgraal_cmTest");
411
		File xmlFile = new File(corpusDirectory, "txm/qgraal_cmTest-c/qgraal_cmTest-c_surf_qgraal_cmTest_lyonbm_pa77-160.xml");
412
		File outputFile = new File(corpusDirectory, "txm/qgraal_cmTest-c/out2.xml");
413
		
414
		try {
415
			OntologiesProjection cp = new OntologiesProjection(xmlFile, corpusDirectory);
416
			System.out.println(cp.process(outputFile));
417
		} catch (XMLStreamException e) {
418
			// TODO Auto-generated catch block
419
			e.printStackTrace();
420
		} catch (IOException e) {
421
			// TODO Auto-generated catch block
422
			e.printStackTrace();
423
		}
424
	}
425
}
0 426

  
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/OriflammsFunction.java (revision 476)
1
// STANDARD DECLARATIONS
2
package org.txm.oriflamms.functions;
3

  
4
import java.io.BufferedWriter;
5
import java.io.File;
6
import java.io.FileOutputStream;
7
import java.io.IOException;
8
import java.io.OutputStreamWriter;
9
import java.io.PrintWriter;
10
import java.util.List;
11

  
12
import org.txm.Toolbox;
13
import org.txm.searchengine.cqp.AbstractCqiClient;
14
import org.txm.searchengine.cqp.CQPEngine;
15
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
16
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
17
import org.txm.searchengine.cqp.corpus.Corpus;
18
import org.txm.searchengine.cqp.corpus.Part;
19
import org.txm.searchengine.cqp.corpus.Partition;
20
import org.txm.searchengine.cqp.corpus.Property;
21
import org.txm.searchengine.cqp.corpus.QueryResult;
22
import org.txm.searchengine.cqp.corpus.StructuralUnit;
23
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty;
24
import org.txm.searchengine.cqp.corpus.query.Match;
25
import org.txm.searchengine.cqp.corpus.query.Query;
26
import org.txm.searchengine.cqp.serverException.CqiServerError;
27
import org.txm.stat.utils.ConsoleProgressBar;
28

  
29
public abstract class OriflammsFunction {
30

  
31
	protected Corpus corpus;
32
	protected File tsvFile;
33

  
34
	protected AbstractCqiClient CQI = CQPEngine.getCqiClient();
35

  
36
	protected StructuralUnit text_su;
37
	protected StructuralUnitProperty text_id;
38
	protected Property pb_id;
39
	protected Property cb_id;
40
	protected Property lb_id;
41
	protected Property lettersAll;
42
	protected Property lettersAlignable;
43
	protected Property charactersP;
44
	protected Property abbrn;
45
	protected Property form;
46
	protected PrintWriter writer;
47
	protected boolean wordCorpus;
48

  
49
	public OriflammsFunction(Corpus corpus, File tsvFile) throws CqiClientException, IOException, CqiServerError {
50
		this.tsvFile = tsvFile;
51
		this.corpus = corpus;
52
	}
53

  
54
	boolean process() throws CqiClientException, IOException, CqiServerError {
55
		
56
		StructuralUnit w = corpus.getStructuralUnit("w");
57

  
58
		wordCorpus = (w == null);
59
		if (wordCorpus) {
60
			System.out.println("Corpus de mots");
61
		} else {
62
			System.out.println("Corpus de lettres");
63
		}
64
		
65
		String[] props = {"pbid", "pbstart", "pbend","cbid", 
66
				"cbstart","cbend","lbid", "lbstart", "lbend", "letters-all",
67
				"letters-alignable", "characters","abbr-n"};
68
		for (String prop : props) {
69
			if (corpus.getProperty(prop) == null) {
70
				System.out.println("Le corpus '"+corpus+"' n'a pas de propriété de mot '"+prop+"'. Abandon.");
71
				return false;
72
			}
73
		}
74
		
75
		text_su = corpus.getStructuralUnit("text");
76
		text_id = text_su.getProperty("id");
77
		pb_id = corpus.getProperty("pbid");
78
		cb_id = corpus.getProperty("cbid");
79
		lb_id = corpus.getProperty("lbid");
80
		lettersAll = corpus.getProperty("letters-all");
81
		lettersAlignable = corpus.getProperty("letters-alignable");
82
		charactersP = corpus.getProperty("characters");
83
		abbrn = corpus.getProperty("abbr-n");
84
		form = corpus.getProperty("word");
85
		
86
		writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tsvFile), "UTF-8")));
87
		writer.println("text_id\tpb_id\tcb_id\tlb_id\tline part\tNabbr\tNcharAbbr\ttotal\t%=NcharAbbr/Ntotal");
88
		Partition p = corpus.createPartition("tmp", text_su,text_id);
89
		for (Part part : p.getParts()) {
90
			processText(part, part.getName());
91
		}
92
		writer.close();
93
		p.delete();
94
		System.out.println("Result saved in "+tsvFile.getAbsolutePath());
95
		return true;
96
	}
97
	
98
	boolean processText(Corpus corpus, String text_id) throws CqiClientException, IOException, CqiServerError {
99

  
100
		List<Match> matches = corpus.getMatches();
101
		Match last_match = matches.get(matches.size()-1);
102
		int end = last_match.getEnd();
103

  
104
		
105
		QueryResult r = corpus.query(new Query("[pbstart=\"0\"]"), "ABBRORI1", false);
106
		int[] pb_pos = r.getStarts();
107
		r.drop();
108
		r = corpus.query(new Query("[cbstart=\"0\"]"), "ABBRORI2", false);
109
		int[] cb_pos = r.getStarts();
110
		r.drop();
111
		r = corpus.query(new Query("[lbstart=\"0\"]"), "ABBRORI3", false);
112
		int[] lb_pos = r.getStarts();
113
		r.drop();
114

  
115
		System.out.println("N pb = "+pb_pos.length);
116
		System.out.println("N cb = "+cb_pos.length);
117
		System.out.println("N lb = "+lb_pos.length);
118

  
119
		int[] pb_idx = CQI.cpos2Id(pb_id.getQualifiedName(), pb_pos);
120
		int[] cb_idx = CQI.cpos2Id(cb_id.getQualifiedName(), cb_pos);
121
		int[] lb_idx = CQI.cpos2Id(lb_id.getQualifiedName(), lb_pos);
122

  
123
		String[] pb_idx_str = CQI.id2Str(pb_id.getQualifiedName(), pb_idx);
124
		String[] cb_idx_str = CQI.id2Str(cb_id.getQualifiedName(), cb_idx);
125
		String[] lb_idx_str = CQI.id2Str(lb_id.getQualifiedName(), lb_idx);
126

  
127
		ConsoleProgressBar cpb = new ConsoleProgressBar(lb_pos.length);
128
		int p = 0 ;
129
		int c = 0 ;
130

  
131
		for (int l = 0 ; l < lb_pos.length ; l++) {
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff