Révision 476
tmp/org.txm.oriflamms.rcp/.classpath (revision 476) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<classpath> |
|
3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
5 |
<accessrules> |
|
6 |
<accessrule kind="accessible" pattern="**"/> |
|
7 |
</accessrules> |
|
8 |
</classpathentry> |
|
9 |
<classpathentry kind="src" path="src"/> |
|
10 |
<classpathentry kind="src" path="res"/> |
|
11 |
<classpathentry kind="output" path="bin"/> |
|
12 |
</classpath> |
|
0 | 13 |
tmp/org.txm.oriflamms.rcp/META-INF/MANIFEST.MF (revision 476) | ||
---|---|---|
1 |
Manifest-Version: 1.0 |
|
2 |
Bundle-ManifestVersion: 2 |
|
3 |
Bundle-Name: Oriflamms |
|
4 |
Bundle-SymbolicName: Oriflamms;singleton:=true |
|
5 |
Bundle-Version: 1.0.0.qualifier |
|
6 |
Bundle-Activator: oriflamms.Activator |
|
7 |
Require-Bundle: org.txm.core;bundle-version="0.7.0", |
|
8 |
org.txm.rcp, |
|
9 |
org.eclipse.ui, |
|
10 |
org.eclipse.core.runtime, |
|
11 |
org.txm.searchengine.cqp.core, |
|
12 |
org.txm.utils |
|
13 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.6 |
|
14 |
Bundle-ActivationPolicy: lazy |
|
15 |
Bundle-Vendor: Textometrie.org |
|
0 | 16 |
tmp/org.txm.oriflamms.rcp/.project (revision 476) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<projectDescription> |
|
3 |
<name>OriflammsRCP</name> |
|
4 |
<comment></comment> |
|
5 |
<projects> |
|
6 |
</projects> |
|
7 |
<buildSpec> |
|
8 |
<buildCommand> |
|
9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
10 |
<arguments> |
|
11 |
</arguments> |
|
12 |
</buildCommand> |
|
13 |
<buildCommand> |
|
14 |
<name>org.eclipse.pde.ManifestBuilder</name> |
|
15 |
<arguments> |
|
16 |
</arguments> |
|
17 |
</buildCommand> |
|
18 |
<buildCommand> |
|
19 |
<name>org.eclipse.pde.SchemaBuilder</name> |
|
20 |
<arguments> |
|
21 |
</arguments> |
|
22 |
</buildCommand> |
|
23 |
</buildSpec> |
|
24 |
<natures> |
|
25 |
<nature>org.eclipse.pde.PluginNature</nature> |
|
26 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
27 |
</natures> |
|
28 |
</projectDescription> |
|
0 | 29 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/UpdateCorpusImagePaths.java (revision 476) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.oriflamms.functions; |
|
3 |
|
|
4 |
import java.io.File; |
|
5 |
import java.io.FileFilter; |
|
6 |
import java.io.IOException; |
|
7 |
|
|
8 |
import javax.xml.stream.XMLStreamException; |
|
9 |
|
|
10 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
11 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
12 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
13 |
|
|
14 |
public class UpdateCorpusImagePaths { |
|
15 |
|
|
16 |
public boolean process(MainCorpus corpus, String newpathprefix) throws CqiClientException, IOException, CqiServerError, XMLStreamException { |
|
17 |
|
|
18 |
File binDir = corpus.getBaseDirectory(); |
|
19 |
File htmlDir = new File(binDir, "HTML"); |
|
20 |
if (!htmlDir.exists()) { |
|
21 |
System.out.println("Error: no html directory: "+htmlDir); |
|
22 |
return false; |
|
23 |
} |
|
24 |
File corpushtmlDir = new File(htmlDir, corpus.getName()); |
|
25 |
if (!corpushtmlDir.exists()) { |
|
26 |
System.out.println("Error: no html corpus directory: "+corpushtmlDir); |
|
27 |
return false; |
|
28 |
} |
|
29 |
File corpusdefaulthtmlDir = new File(corpushtmlDir, "facsimile"); |
|
30 |
if (!corpusdefaulthtmlDir.exists()) { |
|
31 |
System.out.println("Error: no html 'facsimile' corpus directory: "+corpusdefaulthtmlDir); |
|
32 |
return false; |
|
33 |
} |
|
34 |
if (newpathprefix == null) { |
|
35 |
newpathprefix = corpusdefaulthtmlDir.getAbsolutePath()+"/images/"; |
|
36 |
System.out.println("No image path prefix specified, using corpus 'facsimile' HTML directory path: "+newpathprefix); |
|
37 |
} |
|
38 |
|
|
39 |
File[] files = corpusdefaulthtmlDir.listFiles(new FileFilter() { |
|
40 |
@Override |
|
41 |
public boolean accept(File f) { |
|
42 |
return f.isFile() && f.getName().endsWith(".html") && !f.isHidden(); |
|
43 |
} |
|
44 |
}); |
|
45 |
|
|
46 |
if (files == null || files.length == 0) { |
|
47 |
System.out.println("Error: no html files in HTML default corpus directory: "+corpusdefaulthtmlDir); |
|
48 |
return false; |
|
49 |
} |
|
50 |
|
|
51 |
for (File htmlFile : files) { |
|
52 |
UpdateHTMLFileImagePaths uhfip = new UpdateHTMLFileImagePaths(htmlFile, newpathprefix); |
|
53 |
File outhtmlFile = new File(htmlFile.getAbsolutePath()+".tmp"); |
|
54 |
if (!uhfip.process(outhtmlFile)) { |
|
55 |
System.out.println("Fail to process HTML file: "+htmlFile); |
|
56 |
return false; |
|
57 |
} |
|
58 |
|
|
59 |
if (htmlFile.delete() && outhtmlFile.renameTo(htmlFile)) { |
|
60 |
//ok |
|
61 |
} else { |
|
62 |
System.out.println("Fail to replace HTML file: "+htmlFile+" with "+outhtmlFile); |
|
63 |
return false; |
|
64 |
} |
|
65 |
} |
|
66 |
return true; |
|
67 |
} |
|
68 |
} |
|
0 | 69 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/BuildAllProjects.java (revision 476) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.oriflamms.functions; |
|
3 |
|
|
4 |
import java.io.File; |
|
5 |
import java.util.HashMap; |
|
6 |
|
|
7 |
public class BuildAllProjects { |
|
8 |
public BuildAllProjects(File projectsDirectory, File xslDirectory) { |
|
9 |
|
|
10 |
HashMap<String, HashMap> todo = new HashMap<String, HashMap>(); |
|
11 |
|
|
12 |
HashMap c1 = new HashMap<>(); |
|
13 |
//["xmlFile":new File(projectsDirectory, "Charrette_Ms_A.xml"),xslFile:new File(projectsDirectory, "oriflamms-convert-mss-dates-oriflammsxml.xsl"),imageDirectory:new File(projectsDirectory, "images"), createArchive:false], |
|
14 |
todo.put("CHARETTE", c1); |
|
15 |
HashMap c2 = new HashMap<>(); |
|
16 |
//"CHARETTEBIS":["xmlFile":new File(projectsDirectory, "Charrette_Ms_A.xml"),xslFile:new File(projectsDirectory, "oriflamms-convert-mss-dates-oriflammsxml.xsl"),imageDirectory:new File(projectsDirectory, "images"), createArchive:false], |
|
17 |
todo.put("CHARETTEBIS", c2); |
|
18 |
|
|
19 |
for (String k : todo.keySet()) { |
|
20 |
System.out.println("*** BUILD "+k+" ***"); |
|
21 |
try { |
|
22 |
//TEI2Project p = new TEI2Project(); |
|
23 |
} catch(Exception e) { |
|
24 |
System.out.println( "ERROR WHILE PROCESSING "+k+": "+e); |
|
25 |
e.printStackTrace(); |
|
26 |
} |
|
27 |
} |
|
28 |
} |
|
29 |
|
|
30 |
public static void main(String[] args) { |
|
31 |
File projectsDirectory = new File("/home/mdecorde/TEMP/testori/corpus/"); |
|
32 |
File xslDirectory = new File("/home/mdecorde/TXM/scripts/macro/org/txm/macro/oriflamms/prepare"); |
|
33 |
BuildAllProjects bp = new BuildAllProjects(projectsDirectory, xslDirectory); |
|
34 |
|
|
35 |
} |
|
36 |
} |
|
37 |
|
|
0 | 38 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/AbbreviationsAndLines.java (revision 476) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.oriflamms.functions; |
|
3 |
|
|
4 |
import java.io.File; |
|
5 |
import java.io.IOException; |
|
6 |
|
|
7 |
import org.txm.Toolbox; |
|
8 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
9 |
import org.txm.searchengine.cqp.CQPEngine; |
|
10 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
11 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
|
12 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
13 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
14 |
|
|
15 |
public class AbbreviationsAndLines extends OriflammsFunction { |
|
16 |
|
|
17 |
protected int dist_start; |
|
18 |
protected int dist_end; |
|
19 |
|
|
20 |
public AbbreviationsAndLines(Corpus corpus, File tsvFile, int dist_start, int dist_end) throws CqiClientException, IOException, CqiServerError { |
|
21 |
super(corpus, tsvFile); |
|
22 |
} |
|
23 |
|
|
24 |
public boolean process() throws CqiClientException, IOException, CqiServerError { |
|
25 |
|
|
26 |
dist_start = Math.abs(dist_start); |
|
27 |
dist_end = Math.abs(dist_end); |
|
28 |
|
|
29 |
System.out.println("Dénombrement des abbréviations de "+corpus+" en '"+(wordCorpus?"mots":"lettres")+"' pour des distances au début de "+dist_start+" et à la fin "+dist_end+" de la ligne"); |
|
30 |
return super.process(); |
|
31 |
} |
|
32 |
|
|
33 |
boolean processLine(String text_id, String pb_id, String cb_id, String lb_id, int length, Object[] infos) { |
|
34 |
|
|
35 |
String[] abbrNs = (String[]) infos[0]; |
|
36 |
String[] allLetters = (String[]) infos[1]; |
|
37 |
String[] alignableLetters = (String[]) infos[2]; |
|
38 |
String[] characters = (String[]) infos[3]; |
|
39 |
String[] words = (String[]) infos[4]; |
|
40 |
|
|
41 |
int Nabbr = 0, NsupAbbr = 0, Ntotal = 0; |
|
42 |
int p2 = length - dist_end; |
|
43 |
if (p2 < 0) p2 = length+1; |
|
44 |
if (p2 < dist_start) p2 = dist_start; |
|
45 |
|
|
46 |
for (int i = 0 ; i < length ; i++) { |
|
47 |
|
|
48 |
if (i == dist_start) { |
|
49 |
writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\ts\t"+Nabbr+"\t"+NsupAbbr+"\t"+Ntotal+"\t"+(100.0*(float)NsupAbbr/(float)Ntotal)); |
|
50 |
|
|
51 |
Nabbr = 0; |
|
52 |
NsupAbbr = 0; |
|
53 |
Ntotal = 0; |
|
54 |
} |
|
55 |
if (i == p2) { |
|
56 |
writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\tm\t"+Nabbr+"\t"+NsupAbbr+"\t"+Ntotal+"\t"+(100.0*(float)NsupAbbr/(float)Ntotal)); |
|
57 |
|
|
58 |
Nabbr = 0; |
|
59 |
NsupAbbr = 0; |
|
60 |
Ntotal = 0; |
|
61 |
} |
|
62 |
|
|
63 |
Nabbr += Integer.parseInt(abbrNs[i]); |
|
64 |
NsupAbbr += allLetters[i].length() - characters[i].length(); |
|
65 |
Ntotal += allLetters[i].length(); |
|
66 |
|
|
67 |
} |
|
68 |
writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\te\t"+Nabbr+"\t"+NsupAbbr+"\t"+Ntotal+"\t"+(100.0*(float)NsupAbbr/(float)Ntotal)); |
|
69 |
return true; |
|
70 |
} |
|
71 |
|
|
72 |
Object[] getInfos(int from ,int length) throws UnexpectedAnswerException, IOException, CqiServerError { |
|
73 |
|
|
74 |
int[] positions = new int[length]; |
|
75 |
for (int i = 0 ; i < length ; i++) positions[i] = from++; |
|
76 |
AbstractCqiClient CQI = CQPEngine.getCqiClient(); |
|
77 |
|
|
78 |
String[] abbrNs = CQI.cpos2Str(abbrn.getQualifiedName(), positions); |
|
79 |
String[] allLetters = CQI.cpos2Str(lettersAll.getQualifiedName(), positions); |
|
80 |
String[] alignableLetters = CQI.cpos2Str(lettersAlignable.getQualifiedName(), positions); |
|
81 |
String[] characters = CQI.cpos2Str(charactersP.getQualifiedName(), positions); |
|
82 |
String[] words = CQI.cpos2Str(form.getQualifiedName(), positions); |
|
83 |
|
|
84 |
String[][] rez = {abbrNs,allLetters,alignableLetters,characters,words}; |
|
85 |
return rez; |
|
86 |
} |
|
87 |
} |
|
0 | 88 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/AbbreviationsAndSementics.java (revision 476) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.oriflamms.functions; |
|
3 |
|
|
4 |
import java.io.File; |
|
5 |
import java.io.IOException; |
|
6 |
import java.util.ArrayList; |
|
7 |
import java.util.HashMap; |
|
8 |
|
|
9 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
10 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
|
11 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
12 |
import org.txm.searchengine.cqp.corpus.StructuralUnit; |
|
13 |
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty; |
|
14 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
15 |
|
|
16 |
public class AbbreviationsAndSementics extends OriflammsFunction { |
|
17 |
|
|
18 |
protected String entities; |
|
19 |
protected String[] entitiesArray; |
|
20 |
protected ArrayList<StructuralUnitProperty> structures; |
|
21 |
|
|
22 |
public AbbreviationsAndSementics(Corpus corpus, File tsvFile, String entities) throws CqiClientException, IOException, |
|
23 |
CqiServerError { |
|
24 |
super(corpus, tsvFile); |
|
25 |
|
|
26 |
entitiesArray = entities.split(","); |
|
27 |
structures = new ArrayList<StructuralUnitProperty>(); |
|
28 |
for (String structName : entitiesArray) { |
|
29 |
StructuralUnit su = corpus.getStructuralUnit(structName); |
|
30 |
if (su != null) { |
|
31 |
StructuralUnitProperty sup = su.getProperty("n"); |
|
32 |
if (sup != null) |
|
33 |
structures.add(sup); |
|
34 |
} |
|
35 |
} |
|
36 |
|
|
37 |
} |
|
38 |
|
|
39 |
public boolean process() throws CqiClientException, IOException, CqiServerError { |
|
40 |
|
|
41 |
System.out.println("Dénombrement des abbréviations de "+corpus+" en '"+(wordCorpus?"mots":"lettres")+"' pour les entités "+entities); |
|
42 |
if (structures.size() == 0) { |
|
43 |
System.out.println("Erreur: pas de structures disponibles pour les entités suivantes "+entities); |
|
44 |
return false; |
|
45 |
} |
|
46 |
return super.process(); |
|
47 |
} |
|
48 |
|
|
49 |
Object[] getInfos(int from ,int length) throws UnexpectedAnswerException, IOException, CqiServerError { |
|
50 |
|
|
51 |
int[] positions = new int[length]; |
|
52 |
for (int i = 0 ; i < length ; i++) positions[i] = from++; |
|
53 |
|
|
54 |
String[] abbrNs = CQI.cpos2Str(abbrn.getQualifiedName(), positions); |
|
55 |
String[] allLetters = CQI.cpos2Str(lettersAll.getQualifiedName(), positions); |
|
56 |
String[] alignableLetters = CQI.cpos2Str(lettersAlignable.getQualifiedName(), positions); |
|
57 |
String[] characters = CQI.cpos2Str(charactersP.getQualifiedName(), positions); |
|
58 |
String[] words = CQI.cpos2Str(form.getQualifiedName(), positions); |
|
59 |
HashMap<String, int[]> structuresPositions = new HashMap<String, int[]>(); |
|
60 |
for (StructuralUnitProperty structProp : structures) { |
|
61 |
structuresPositions.put(structProp.getFullName(), CQI.cpos2Struc(structProp.getQualifiedName(), positions)); |
|
62 |
} |
|
63 |
|
|
64 |
Object[] rez = {abbrNs,allLetters,alignableLetters,characters,words, structuresPositions}; |
|
65 |
return rez; |
|
66 |
} |
|
67 |
|
|
68 |
boolean processLine(String text_id, String pb_id, String cb_id, String lb_id, int length, Object[] infos) { |
|
69 |
|
|
70 |
String[] abbrNs = (String[]) infos[0]; |
|
71 |
String[] allLetters = (String[]) infos[1]; |
|
72 |
String[] alignableLetters = (String[]) infos[2]; |
|
73 |
String[] characters = (String[]) infos[3]; |
|
74 |
String[] words = (String[]) infos[4]; |
|
75 |
HashMap<String, int[]> structuresPositions = (HashMap<String, int[]>) infos[5]; |
|
76 |
|
|
77 |
int NabbrTotal = 0, NsupAbbrTotal = 0, NtotalTotal = 0; |
|
78 |
for (int i = 0 ; i < length ; i++) { |
|
79 |
NabbrTotal += Integer.parseInt(abbrNs[i]); |
|
80 |
NsupAbbrTotal += allLetters[i].length() - characters[i].length(); |
|
81 |
NtotalTotal += allLetters[i].length(); |
|
82 |
} |
|
83 |
|
|
84 |
for (StructuralUnitProperty strutcProp : structures) { |
|
85 |
int Nabbr = 0, NsupAbbr = 0, Ntotal = 0; |
|
86 |
int[] structureP = structuresPositions.get(strutcProp.getFullName()); |
|
87 |
|
|
88 |
for (int i = 0 ; i < length ; i++) { |
|
89 |
if (structureP[i] >= 0) { // the position is in the structure |
|
90 |
Nabbr += Integer.parseInt(abbrNs[i]); |
|
91 |
NsupAbbr += allLetters[i].length() - characters[i].length(); |
|
92 |
Ntotal += allLetters[i].length(); |
|
93 |
} |
|
94 |
} |
|
95 |
|
|
96 |
writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\t"+strutcProp.getStructuralUnit()+"\t"+Nabbr+"\t"+NsupAbbr+"\t"+Ntotal+"\t"+((float)NsupAbbr/(float)Ntotal)); |
|
97 |
NabbrTotal -= Nabbr; |
|
98 |
NsupAbbrTotal -= NsupAbbr; |
|
99 |
NtotalTotal -= Ntotal; |
|
100 |
} |
|
101 |
|
|
102 |
int Nabbr = 0, NsupAbbr = 0, Ntotal = 0; |
|
103 |
writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\t#REST\t"+NabbrTotal+"\t"+NsupAbbrTotal+"\t"+NtotalTotal+"\t"+((float)NsupAbbrTotal/(float)NtotalTotal)); |
|
104 |
return true; |
|
105 |
} |
|
106 |
} |
|
0 | 107 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/CoordsProjection.java (revision 476) | ||
---|---|---|
1 |
package org.txm.oriflamms.functions; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.IOException; |
|
5 |
import java.util.HashMap; |
|
6 |
|
|
7 |
import javax.xml.stream.XMLStreamException; |
|
8 |
|
|
9 |
import org.txm.importer.StaxIdentityParser; |
|
10 |
import org.txm.importer.StaxParser; |
|
11 |
|
|
12 |
class CoordsProjection extends StaxIdentityParser { |
|
13 |
|
|
14 |
File xmlFile; |
|
15 |
File img_links_directory; |
|
16 |
File zones_directory; |
|
17 |
|
|
18 |
String wordTag; |
|
19 |
String textname; |
|
20 |
String milestone; |
|
21 |
|
|
22 |
String current_img_file = ""; |
|
23 |
String current_zone_file = ""; |
|
24 |
|
|
25 |
String xmlType; |
|
26 |
String group; |
|
27 |
|
|
28 |
HashMap<String, String[]> zones = new HashMap<String, String[]>(); |
|
29 |
HashMap<String, String> links = new HashMap<String, String>(); |
|
30 |
|
|
31 |
public CoordsProjection(File xmlFile, File img_links_directory, File zones_directory, String wordTag) throws IOException, XMLStreamException { |
|
32 |
super(xmlFile); |
|
33 |
|
|
34 |
this.xmlFile = xmlFile; |
|
35 |
this.img_links_directory = img_links_directory; |
|
36 |
this.zones_directory = zones_directory; |
|
37 |
this.wordTag = wordTag; |
|
38 |
|
|
39 |
textname = xmlFile.getName(); |
|
40 |
int idx = textname.indexOf(".xml"); |
|
41 |
if (idx > 0) textname = textname.substring(0, idx); |
|
42 |
|
|
43 |
idx = textname.indexOf("-w"); |
|
44 |
if (idx > 0) { |
|
45 |
textname = textname.substring(0, idx); |
|
46 |
xmlType = "word"; |
|
47 |
} |
|
48 |
|
|
49 |
idx = textname.indexOf("-c"); |
|
50 |
if (idx > 0) { |
|
51 |
textname = textname.substring(0, idx); |
|
52 |
xmlType = "character"; |
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
public void processStartElement() throws XMLStreamException, IOException { |
|
57 |
super.processStartElement(); |
|
58 |
if (localname.equals("milestone")) { |
|
59 |
String id = ""; |
|
60 |
String unit= ""; |
|
61 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
62 |
if (parser.getAttributeLocalName(i).equals("id")) { |
|
63 |
id = parser.getAttributeValue(i); |
|
64 |
} else if (parser.getAttributeLocalName(i).equals("unit")) { |
|
65 |
unit = parser.getAttributeValue(i); |
|
66 |
} |
|
67 |
} |
|
68 |
|
|
69 |
if (unit.equals("surface")) { |
|
70 |
milestone = id; |
|
71 |
} |
|
72 |
} else if (localname.equals(wordTag)) { |
|
73 |
String id = ""; |
|
74 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
75 |
if (parser.getAttributeLocalName(i).equals("id")) { |
|
76 |
id = parser.getAttributeValue(i); |
|
77 |
break; |
|
78 |
} |
|
79 |
} |
|
80 |
|
|
81 |
// load next data if needed |
|
82 |
String img_file_name = textname+"_"+milestone+"-links.xml"; |
|
83 |
if (!current_img_file.equals(img_file_name)) { // rebuild hashmaps |
|
84 |
String zone_file_name = textname+"_"+milestone+"-zones.xml"; |
|
85 |
loadNextData(img_file_name, zone_file_name); |
|
86 |
} |
|
87 |
|
|
88 |
// println "Find coords for word_id="+id+" in "+img_file_name+" and "+zone_file_name |
|
89 |
// println "zone: "+links[id] |
|
90 |
// println "coords: "+zones[links[id]] |
|
91 |
if (zones.size() > 0 && links.size() > 0) { |
|
92 |
String[] coords = zones.get(links.get(id)); |
|
93 |
if (coords != null) { |
|
94 |
if (coords[0] == null || coords[1] == null || coords[2] == null || coords[3] == null) { |
|
95 |
System.out.println("WARNING one of coordinates is missing: "+coords); |
|
96 |
} else { |
|
97 |
try { |
|
98 |
writer.writeAttribute("x1", coords[0]); |
|
99 |
writer.writeAttribute("y1", coords[1]); |
|
100 |
writer.writeAttribute("x2", coords[2]); |
|
101 |
writer.writeAttribute("y2", coords[3]); |
|
102 |
} catch (XMLStreamException e) { |
|
103 |
// TODO Auto-generated catch block |
|
104 |
e.printStackTrace(); |
|
105 |
} |
|
106 |
} |
|
107 |
} else { |
|
108 |
System.out.println("WARNING No group for word id="+id+" and link id="+links.get(id)+" in text "+textname); |
|
109 |
} |
|
110 |
} |
|
111 |
} |
|
112 |
} |
|
113 |
|
|
114 |
protected void loadNextData(String img_file_name, String zone_file_name) { |
|
115 |
File img_link_file = new File(img_links_directory, img_file_name); |
|
116 |
File zone_file = new File(zones_directory, zone_file_name); |
|
117 |
|
|
118 |
zones.clear(); |
|
119 |
links.clear(); |
|
120 |
if (zone_file.exists()) { |
|
121 |
StaxParser pZones = new StaxParser(zone_file) { |
|
122 |
public void processStartElement() { |
|
123 |
if (localname.equals("zone")) { |
|
124 |
String type = ""; |
|
125 |
String idZone = ""; |
|
126 |
String ulx = "", uly = "", lrx = "", lry = ""; |
|
127 |
|
|
128 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
129 |
if (parser.getAttributeLocalName(i).equals("id")) { |
|
130 |
idZone = parser.getAttributeValue(i); |
|
131 |
} else if (parser.getAttributeLocalName(i).equals("type")) { |
|
132 |
type = parser.getAttributeValue(i); |
|
133 |
} else if (parser.getAttributeLocalName(i).equals("ulx")) { |
|
134 |
ulx = parser.getAttributeValue(i); |
|
135 |
} else if (parser.getAttributeLocalName(i).equals("uly")) { |
|
136 |
uly = parser.getAttributeValue(i); |
|
137 |
} else if (parser.getAttributeLocalName(i).equals("lrx")) { |
|
138 |
lrx = parser.getAttributeValue(i); |
|
139 |
} else if (parser.getAttributeLocalName(i).equals("lry")) { |
|
140 |
lry = parser.getAttributeValue(i); |
|
141 |
} |
|
142 |
} |
|
143 |
|
|
144 |
if (type.equals(xmlType)) { |
|
145 |
zones.put(idZone, new String[]{ulx, uly, lrx, lry}); |
|
146 |
} |
|
147 |
|
|
148 |
} |
|
149 |
} |
|
150 |
}; |
|
151 |
pZones.process(); |
|
152 |
} |
|
153 |
if (img_link_file.exists()) { |
|
154 |
StaxParser pLinks = new StaxParser(img_link_file) { |
|
155 |
public void processStartElement() { |
|
156 |
if (localname.equals("linkGrp")) { |
|
157 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
158 |
if (parser.getAttributeLocalName(i).equals("type")) { |
|
159 |
group = parser.getAttributeValue(i); |
|
160 |
break; |
|
161 |
} |
|
162 |
} |
|
163 |
} else if (localname.equals("link") && group.startsWith(xmlType)) { |
|
164 |
String target = ""; |
|
165 |
|
|
166 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
167 |
if (parser.getAttributeLocalName(i).equals("target")) { |
|
168 |
target = parser.getAttributeValue(i); |
|
169 |
break; |
|
170 |
} |
|
171 |
} |
|
172 |
|
|
173 |
String[] split = target.split(" "); |
|
174 |
links.put(split[0].substring(4), split[1].substring(4)); |
|
175 |
} |
|
176 |
} |
|
177 |
}; |
|
178 |
pLinks.process(); |
|
179 |
} |
|
180 |
//println "zones size: "+zones.size() |
|
181 |
//println "links size: "+links.size() |
|
182 |
|
|
183 |
current_img_file = img_file_name; |
|
184 |
} |
|
185 |
|
|
186 |
public static void main(String[] args) { |
|
187 |
File corpusDirectory = new File("/home/mdecorde/TEMP/testori/FontenatTestAlignement"); |
|
188 |
File xmlFile = new File(corpusDirectory, "txm/FontenayTest-w/FontenayTest-w.xml"); |
|
189 |
File img_links_directory = new File(corpusDirectory, "img_links"); |
|
190 |
File zones_directory = new File(corpusDirectory, "zones"); |
|
191 |
|
|
192 |
File outputFile = new File(corpusDirectory, "txm/FontenayTest-w/FontenayTest-w-coords2.xml"); |
|
193 |
|
|
194 |
CoordsProjection cp; |
|
195 |
try { |
|
196 |
cp = new CoordsProjection(xmlFile, img_links_directory, zones_directory, "w"); |
|
197 |
System.out.println(cp.process(outputFile)); |
|
198 |
} catch (IOException e) { |
|
199 |
// TODO Auto-generated catch block |
|
200 |
e.printStackTrace(); |
|
201 |
} catch (XMLStreamException e) { |
|
202 |
// TODO Auto-generated catch block |
|
203 |
e.printStackTrace(); |
|
204 |
} |
|
205 |
|
|
206 |
} |
|
207 |
} |
|
0 | 208 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/TEI2Project.java (revision 476) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.oriflamms.functions; |
|
3 |
|
|
4 |
import java.io.File; |
|
5 |
import java.io.IOException; |
|
6 |
import java.io.PrintWriter; |
|
7 |
import java.util.Arrays; |
|
8 |
|
|
9 |
import javax.xml.stream.XMLStreamException; |
|
10 |
import javax.xml.transform.TransformerException; |
|
11 |
|
|
12 |
import org.eclipse.core.runtime.Platform; |
|
13 |
import org.osgi.framework.Bundle; |
|
14 |
import org.txm.Toolbox; |
|
15 |
import org.txm.functions.ProgressWatcher; |
|
16 |
import org.txm.importer.ApplyXsl2; |
|
17 |
import org.txm.importer.WriteIdAndNAttributes; |
|
18 |
import org.txm.utils.BundleUtils; |
|
19 |
import org.txm.utils.DeleteDir; |
|
20 |
import org.txm.utils.io.FileCopy; |
|
21 |
import org.txm.utils.io.IOUtils; |
|
22 |
import org.txm.utils.logger.Log; |
|
23 |
import org.txm.utils.zip.Zip; |
|
24 |
|
|
25 |
public class TEI2Project { |
|
26 |
|
|
27 |
File xmlFile; |
|
28 |
File xslFile; |
|
29 |
File imagesDirectory; |
|
30 |
boolean createArchive; |
|
31 |
|
|
32 |
public TEI2Project(File xmlFile, File xslFile, File imagesDirectory, boolean createArchive) { |
|
33 |
this.xmlFile = xmlFile; |
|
34 |
this.xslFile = xslFile; |
|
35 |
this.imagesDirectory = imagesDirectory; |
|
36 |
this.createArchive = createArchive; |
|
37 |
} |
|
38 |
|
|
39 |
public boolean process(ProgressWatcher monitor) throws IOException, XMLStreamException, TransformerException { |
|
40 |
if (!xmlFile.exists()) { |
|
41 |
System.out.println("Could not read input XML input file: "+xmlFile); |
|
42 |
return false; |
|
43 |
} |
|
44 |
|
|
45 |
File oriflammsMacroDirectory = new File(BundleUtils.getBundleFile("Oriflamms"), "res"); |
|
46 |
System.out.println("Ressources files directory: "+oriflammsMacroDirectory); |
|
47 |
if (!oriflammsMacroDirectory.exists()) { |
|
48 |
System.out.println("Oriflamms macro directory not found: "+oriflammsMacroDirectory); |
|
49 |
return false; |
|
50 |
} |
|
51 |
File xslTokenizer = new File(oriflammsMacroDirectory, "oriflamms-tokenize-words.xsl"); |
|
52 |
File xslPatchLbInWords = new File(oriflammsMacroDirectory,"oriflamms-patch-words-with-lb.xsl"); |
|
53 |
File xslMissingMilestones = new File(oriflammsMacroDirectory, "oriflamms-patch-milestones.xsl"); |
|
54 |
File xslCharactersTokenizer = new File(oriflammsMacroDirectory, "oriflamms-tokenize-chars-1-tag.xsl"); |
|
55 |
File xslCharactersIdentifier = new File(oriflammsMacroDirectory, "oriflamms-tokenize-chars-2-identify.xsl"); |
|
56 |
File xslZones = new File(oriflammsMacroDirectory, "oriflamms-convert-transcriptions-orizones.xsl"); |
|
57 |
|
|
58 |
if (!xslTokenizer.exists() || !xslPatchLbInWords.exists() || !xslMissingMilestones.exists() || |
|
59 |
!xslCharactersTokenizer.exists() || !xslCharactersIdentifier.exists() || !xslZones.exists()) { |
|
60 |
System.out.println("Could not find one of TXM's XSL file : "+ |
|
61 |
Arrays.asList(xslTokenizer, xslPatchLbInWords, xslMissingMilestones, |
|
62 |
xslCharactersTokenizer, xslCharactersIdentifier, xslZones)); |
|
63 |
|
|
64 |
System.out.println(Arrays.asList(xslTokenizer.exists(), xslPatchLbInWords.exists(), xslMissingMilestones.exists() |
|
65 |
, xslCharactersTokenizer.exists(), xslCharactersIdentifier.exists(), xslZones.exists()).toString()); |
|
66 |
return false; |
|
67 |
} |
|
68 |
|
|
69 |
File xmlFileParentDirectory = xmlFile.getParentFile(); |
|
70 |
String projectName = xmlFile.getName(); |
|
71 |
if (projectName.indexOf(".") > 0) projectName = projectName.substring(0, projectName.indexOf(".")); |
|
72 |
File projectDirectory = new File(xmlFileParentDirectory, projectName); |
|
73 |
DeleteDir.deleteDirectory(projectDirectory); |
|
74 |
if (projectDirectory.exists()) { |
|
75 |
System.out.println("Could not delete previous project directory: "+projectDirectory); |
|
76 |
return false; |
|
77 |
} |
|
78 |
|
|
79 |
projectDirectory.mkdir(); |
|
80 |
|
|
81 |
if (!projectDirectory.exists()) { |
|
82 |
System.out.println("Could not create project directory: "+projectDirectory); |
|
83 |
return false; |
|
84 |
} |
|
85 |
|
|
86 |
System.out.println("Oriflamms project directory: "+projectDirectory); |
|
87 |
|
|
88 |
File xmlFileCopy = new File(projectDirectory, xmlFile.getName()); |
|
89 |
System.out.println("Copying XML files: "+xmlFile+" to "+projectDirectory); |
|
90 |
CopyXMLFiles cdf = new CopyXMLFiles(xmlFile); |
|
91 |
projectDirectory.mkdir(); |
|
92 |
System.out.println("Files copied: "+cdf.copy(projectDirectory)); |
|
93 |
if (!xmlFileCopy.exists()) { |
|
94 |
System.out.println("Could not copy input XML input file: "+xmlFile+" to "+xmlFileCopy); |
|
95 |
return false; |
|
96 |
} |
|
97 |
|
|
98 |
if (xslFile != null) { |
|
99 |
if (xslFile.exists()) { |
|
100 |
System.out.println("Applying "+xslFile+" to "+xmlFileCopy+"..."); |
|
101 |
ApplyXsl2 builder = new ApplyXsl2(xslFile); |
|
102 |
if (!builder.process(xmlFileCopy, xmlFileCopy)) { |
|
103 |
System.out.println("Failed to process "+xmlFileCopy+" with "+xslFile); |
|
104 |
return false; |
|
105 |
} |
|
106 |
} |
|
107 |
} |
|
108 |
|
|
109 |
File textsDirectory = new File(projectDirectory, "texts"); |
|
110 |
File imgDirectory = new File(projectDirectory, "img"); |
|
111 |
File img_linksDirectory = new File(projectDirectory, "img_links"); |
|
112 |
File ontologiesDirectory = new File(projectDirectory, "ontologies"); |
|
113 |
File ontologies_linksDirectory = new File(projectDirectory, "ontologies_links"); |
|
114 |
File zonesDirectory = new File(projectDirectory, "zones"); |
|
115 |
textsDirectory.mkdir(); |
|
116 |
imgDirectory.mkdir(); |
|
117 |
img_linksDirectory.mkdir(); |
|
118 |
ontologiesDirectory.mkdir(); |
|
119 |
ontologies_linksDirectory.mkdir(); |
|
120 |
zonesDirectory.mkdir(); |
|
121 |
|
|
122 |
File xmlWFile = new File(textsDirectory, projectName+"-w.xml"); |
|
123 |
File xmlWCFile = new File(textsDirectory, projectName+"-c.xml"); |
|
124 |
|
|
125 |
try { |
|
126 |
|
|
127 |
System.out.println("Applying "+xslMissingMilestones+" to "+xmlWFile+"..."); |
|
128 |
if (monitor != null) monitor.worked(1, "Applying "+xslMissingMilestones+" to "+xmlWFile+"..."); |
|
129 |
ApplyXsl2 builder = new ApplyXsl2(xslMissingMilestones); |
|
130 |
if (!builder.process(xmlFileCopy, xmlWFile)) { |
|
131 |
System.out.println("Failed to process "+xmlWFile+" with "+xslMissingMilestones); |
|
132 |
return false; |
|
133 |
} |
|
134 |
|
|
135 |
System.out.println("Applying "+xslTokenizer+" to "+xmlWFile+"..."); |
|
136 |
if (monitor != null) monitor.worked(15, "Applying "+xslTokenizer+" to "+xmlWFile+"..."); |
|
137 |
builder = new ApplyXsl2(xslTokenizer); |
|
138 |
if (!builder.process(xmlWFile, xmlWFile)) { |
|
139 |
System.out.println("Failed to process "+xmlFileCopy+" with "+xslTokenizer); |
|
140 |
return false; |
|
141 |
} |
|
142 |
|
|
143 |
System.out.println("Merging words </w><w>"); |
|
144 |
if (monitor != null) monitor.worked(15, "Merging words </w><w>"); |
|
145 |
String content = IOUtils.getText(xmlWFile, "UTF-8"); |
|
146 |
content = content.replaceAll("</w><w[^>]*>", ""); |
|
147 |
content = content.replaceAll("</w>\\s*(<milestone[^>]*>)?\\s*(<pb[^>]*>)?\\s*(<cb[^>]*>)?\\s*(<lb[^>]*break=\"no\"[^>]*>)\\s*<w[^>]*>", "$1$2$3$4"); |
|
148 |
try { |
|
149 |
PrintWriter writer = IOUtils.getWriter(xmlWFile); |
|
150 |
writer.print(content); |
|
151 |
writer.close(); |
|
152 |
} catch (Exception e2) { |
|
153 |
System.out.println("Error while fixing words: "+e2); |
|
154 |
return false; |
|
155 |
} |
|
156 |
|
|
157 |
System.out.println("Applying "+xslPatchLbInWords+" to "+xmlWFile+"..."); |
|
158 |
if (monitor != null) monitor.worked(15, "Applying "+xslPatchLbInWords+" to "+xmlWFile+"..."); |
|
159 |
builder = new ApplyXsl2(xslPatchLbInWords); |
|
160 |
if (!builder.process(xmlWFile, xmlWFile)) { |
|
161 |
System.out.println("Failed to process "+xmlFileCopy+" with "+xslPatchLbInWords); |
|
162 |
return false; |
|
163 |
} |
|
164 |
|
|
165 |
System.out.println("Fixing 'id' and 'n' attributes in "+xmlWFile+"..."); |
|
166 |
if (monitor != null) monitor.worked(15, "Fixing 'id' and 'n' attributes in "+xmlWFile+"..."); |
|
167 |
WriteIdAndNAttributes wiana = new WriteIdAndNAttributes(xmlWFile, projectName); |
|
168 |
File tmp = new File(xmlWFile.getParentFile(), "tmp_"+xmlWFile.getName()); |
|
169 |
if (!wiana.process(tmp)) { |
|
170 |
System.out.println("Failed to fix id and n attributes with of "+xmlWFile+" file"); |
|
171 |
return false; |
|
172 |
} else { |
|
173 |
wiana = null; |
|
174 |
xmlWFile.delete(); |
|
175 |
tmp.renameTo(xmlWFile); |
|
176 |
if (tmp.exists()) { |
|
177 |
System.out.println("Failed to replace "+xmlWFile+" with result file "+tmp); |
|
178 |
return false; |
|
179 |
} |
|
180 |
} |
|
181 |
|
|
182 |
System.out.println("Applying "+xslCharactersTokenizer+" to "+xmlWFile+"..."); |
|
183 |
if (monitor != null) monitor.worked(15, "Applying "+xslCharactersTokenizer+" to "+xmlWFile+"..."); |
|
184 |
builder = new ApplyXsl2(xslCharactersTokenizer); |
|
185 |
if (!builder.process(xmlWFile, xmlWCFile)) { |
|
186 |
System.out.println("Failed to process "+xmlWFile+" with "+xslCharactersTokenizer); |
|
187 |
return false; |
|
188 |
} |
|
189 |
|
|
190 |
System.out.println("Applying "+xslCharactersIdentifier+" to "+xmlWCFile+"..."); |
|
191 |
if (monitor != null) monitor.worked(1, "Applying "+xslCharactersIdentifier+" to "+xmlWCFile+"..."); |
|
192 |
builder = new ApplyXsl2(xslCharactersIdentifier); |
|
193 |
if (!builder.process(xmlWCFile, xmlWCFile)) { |
|
194 |
System.out.println("Failed to process "+xmlWCFile+" with "+xslCharactersIdentifier); |
|
195 |
return false; |
|
196 |
} |
|
197 |
|
|
198 |
System.out.println("Applying "+xslZones+" to "+xmlWFile+"..."); |
|
199 |
if (monitor != null) monitor.worked(15, "Applying "+xslZones+" to "+xmlWFile+"..."); |
|
200 |
builder = new ApplyXsl2(xslZones); |
|
201 |
if (!builder.process(xmlWFile, null)) { |
|
202 |
System.out.println("Failed to process "+xmlFileCopy+" with "+xslZones); |
|
203 |
return false; |
|
204 |
} |
|
205 |
|
|
206 |
if (imagesDirectory.exists() && imagesDirectory.listFiles().length > 0) { |
|
207 |
System.out.println("Copying images files from "+imagesDirectory+" to "+imgDirectory+"..."); |
|
208 |
FileCopy.copyFiles(imagesDirectory, imgDirectory); |
|
209 |
File[] files = imgDirectory.listFiles(); |
|
210 |
if (files != null) System.out.println(""+files.length+" images copied."); |
|
211 |
} |
|
212 |
|
|
213 |
if (createArchive) { |
|
214 |
if (monitor != null) monitor.worked(15, "Building Oriflamms binary project... "); |
|
215 |
File zipFile = new File(xmlFileParentDirectory, projectName+".oriflamms"); |
|
216 |
zipFile.delete(); |
|
217 |
Zip.compress(projectDirectory, zipFile); |
|
218 |
|
|
219 |
if (zipFile.exists()) { |
|
220 |
System.out.println("Project oriflamms exported to "+zipFile); |
|
221 |
DeleteDir.deleteDirectory(projectDirectory); |
|
222 |
} else { |
|
223 |
System.out.println("Fail to export project "+projectDirectory); |
|
224 |
} |
|
225 |
} |
|
226 |
|
|
227 |
} catch (Exception e) { |
|
228 |
System.out.println("Error while applying a XSL file: "+e); |
|
229 |
Log.printStackTrace(e); |
|
230 |
} |
|
231 |
if (monitor != null) monitor.done(); |
|
232 |
return true; |
|
233 |
} |
|
234 |
} |
|
0 | 235 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/UpdateHTMLFileImagePaths.java (revision 476) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.oriflamms.functions; |
|
3 |
|
|
4 |
import java.io.File; |
|
5 |
import java.io.IOException; |
|
6 |
|
|
7 |
import javax.xml.stream.XMLStreamException; |
|
8 |
|
|
9 |
import org.txm.importer.StaxIdentityParser; |
|
10 |
|
|
11 |
public class UpdateHTMLFileImagePaths extends StaxIdentityParser { |
|
12 |
String newpathprefix; |
|
13 |
|
|
14 |
public UpdateHTMLFileImagePaths(File htmlFile, String newpathprefix) throws IOException, XMLStreamException { |
|
15 |
super(htmlFile); |
|
16 |
this.newpathprefix = newpathprefix; |
|
17 |
|
|
18 |
if (newpathprefix.endsWith("/")) newpathprefix = newpathprefix.substring(0, newpathprefix.length()-1); |
|
19 |
} |
|
20 |
|
|
21 |
String src = null; |
|
22 |
protected void processStartElement() throws XMLStreamException, IOException { |
|
23 |
src = null; |
|
24 |
if (localname.equals("img")) { |
|
25 |
//System.out.println("start element img"); |
|
26 |
int n = parser.getAttributeCount(); |
|
27 |
for (int i = 0 ; i < n ; i++) { |
|
28 |
if (parser.getAttributeLocalName(i).equals("src")) { |
|
29 |
src = parser.getAttributeValue(i); |
|
30 |
//System.out.println("start element img@src="+src); |
|
31 |
break; |
|
32 |
} |
|
33 |
} |
|
34 |
|
|
35 |
if (src != null) { |
|
36 |
int idx = src.lastIndexOf("/"); |
|
37 |
String name = src.substring(idx); |
|
38 |
src = newpathprefix+name; |
|
39 |
} |
|
40 |
} |
|
41 |
|
|
42 |
super.processStartElement(); |
|
43 |
} |
|
44 |
|
|
45 |
protected void writeAttributes() throws XMLStreamException { |
|
46 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
|
47 |
if (src != null && "src".equals(parser.getAttributeLocalName(i))) { |
|
48 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), src); |
|
49 |
src = null; |
|
50 |
} else { |
|
51 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
52 |
} |
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
public static void main(String[] args) { |
|
57 |
File htmlFile = new File("/home/mdecorde/TXM/corpora/graal/HTML/GRAAL/ms-colonne/qgraal_cm_160a.html"); |
|
58 |
File outfile = new File("/home/mdecorde/TXM/corpora/graal/HTML/GRAAL/ms-colonne/qgraal_cm_160a-o.html"); |
|
59 |
String prefix = "AAAA"; |
|
60 |
UpdateHTMLFileImagePaths p; |
|
61 |
try { |
|
62 |
p = new UpdateHTMLFileImagePaths(htmlFile, prefix); |
|
63 |
System.out.println(p.process(outfile)); |
|
64 |
} catch (IOException e) { |
|
65 |
// TODO Auto-generated catch block |
|
66 |
e.printStackTrace(); |
|
67 |
} catch (XMLStreamException e) { |
|
68 |
// TODO Auto-generated catch block |
|
69 |
e.printStackTrace(); |
|
70 |
} |
|
71 |
|
|
72 |
} |
|
73 |
} |
|
0 | 74 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/Allographs.java (revision 476) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.oriflamms.functions; |
|
3 |
|
|
4 |
import java.io.File; |
|
5 |
import java.io.IOException; |
|
6 |
import java.util.HashMap; |
|
7 |
import java.util.regex.Pattern; |
|
8 |
|
|
9 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
10 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
|
11 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
12 |
import org.txm.searchengine.cqp.corpus.Property; |
|
13 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
14 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
15 |
|
|
16 |
public class Allographs extends OriflammsFunction { |
|
17 |
|
|
18 |
protected String sign, allograph; |
|
19 |
Query query; |
|
20 |
private Property sign_property; |
|
21 |
private Property allograph_auto_property; |
|
22 |
private Property allograph_expert_property; |
|
23 |
|
|
24 |
Pattern signRegExp, characterRegExp; |
|
25 |
|
|
26 |
public Allographs(Corpus corpus, File tsvFile, String sign, Query query, String allograph) throws CqiClientException, IOException, CqiServerError { |
|
27 |
super(corpus, tsvFile); |
|
28 |
|
|
29 |
this.query = query; |
|
30 |
this.allograph = allograph; |
|
31 |
this.sign = sign; |
|
32 |
} |
|
33 |
|
|
34 |
public boolean process() throws CqiClientException, IOException, CqiServerError { |
|
35 |
String[] props = {"sign", "allograph-expert", "allograph-auto", "characters"}; |
|
36 |
System.out.println("Dénombrement des allographes '"+allograph+"' de signe '"+sign+"' dans le contexte '"+query+"'"); |
|
37 |
|
|
38 |
for (String prop : props) { |
|
39 |
if (corpus.getProperty(prop) == null) { |
|
40 |
System.out.println("Le corpus '"+corpus+"' n'a pas de propriété de mot '"+prop+"'. Abandon."); |
|
41 |
return false; |
|
42 |
} |
|
43 |
} |
|
44 |
|
|
45 |
sign_property = corpus.getProperty("sign"); |
|
46 |
allograph_expert_property = corpus.getProperty("allograph-expert"); |
|
47 |
allograph_auto_property = corpus.getProperty("allograph-auto"); |
|
48 |
signRegExp = Pattern.compile(sign); |
|
49 |
characterRegExp = Pattern.compile(allograph); |
|
50 |
|
|
51 |
return super.process(); |
|
52 |
} |
|
53 |
|
|
54 |
Object[] getInfos(int from ,int length) throws UnexpectedAnswerException, IOException, CqiServerError { |
|
55 |
|
|
56 |
int[] positions = new int[length]; |
|
57 |
for (int i = 0 ; i < length ; i++) positions[i] = from++; |
|
58 |
|
|
59 |
String[] signs = CQI.cpos2Str(sign_property.getQualifiedName(), positions); |
|
60 |
String[] allographs_expert = CQI.cpos2Str(allograph_expert_property.getQualifiedName(), positions); |
|
61 |
String[] allographs_auto = CQI.cpos2Str(allograph_auto_property.getQualifiedName(), positions); |
|
62 |
String[] characters = CQI.cpos2Str(charactersP.getQualifiedName(), positions); |
|
63 |
|
|
64 |
Object[] rez = {signs,allographs_expert,allographs_auto, characters}; |
|
65 |
return rez; |
|
66 |
} |
|
67 |
|
|
68 |
boolean processLine(String text_id, String pb_id, String cb_id, String lb_id, int length, Object[] infos) { |
|
69 |
|
|
70 |
String[] signs = (String[]) infos[0]; |
|
71 |
String[] allographs_expert = (String[]) infos[1]; |
|
72 |
String[] allographs_auto = (String[]) infos[2]; |
|
73 |
String[] characters = (String[]) infos[3]; |
|
74 |
HashMap<String, HashMap<String, Integer>> count_signs = new HashMap<String, HashMap<String, Integer>>(); |
|
75 |
|
|
76 |
for (int i = 0 ; i < length ; i++) { |
|
77 |
String s = signs[i]; |
|
78 |
String c = characters[i]; |
|
79 |
|
|
80 |
if (signRegExp.matcher(s).find() && characterRegExp.matcher(c).find()) { |
|
81 |
if (!count_signs.containsKey(s)) count_signs.put(s, new HashMap<String, Integer>()); |
|
82 |
HashMap<String, Integer> counts = count_signs.get(s); |
|
83 |
|
|
84 |
if (!counts.containsKey(c)) counts.put(c, 0); |
|
85 |
counts.put(c, counts.get(c) + 1); |
|
86 |
} |
|
87 |
} |
|
88 |
|
|
89 |
for (String s : count_signs.keySet()) { |
|
90 |
HashMap<String, Integer> counts = count_signs.get(s); |
|
91 |
int sum = 0; |
|
92 |
for (Integer i : counts.values()) sum += i; |
|
93 |
|
|
94 |
for (String c : counts.keySet()) { |
|
95 |
writer.println(text_id+"\t"+pb_id+"\t"+cb_id+"\t"+lb_id+"\t"+s+"\t"+c+"\t"+counts.get(c)+"\t"+((float)counts.get(c)/(float)sum)); |
|
96 |
} |
|
97 |
} |
|
98 |
return true; |
|
99 |
} |
|
100 |
} |
|
0 | 101 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/CopyXMLFiles.java (revision 476) | ||
---|---|---|
1 |
package org.txm.oriflamms.functions; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.IOException; |
|
5 |
import java.io.InputStream; |
|
6 |
import java.util.ArrayList; |
|
7 |
|
|
8 |
import javax.xml.stream.XMLInputFactory; |
|
9 |
import javax.xml.stream.XMLResolver; |
|
10 |
import javax.xml.stream.XMLStreamConstants; |
|
11 |
import javax.xml.stream.XMLStreamException; |
|
12 |
import javax.xml.stream.XMLStreamReader; |
|
13 |
|
|
14 |
import org.txm.utils.io.FileCopy; |
|
15 |
|
|
16 |
public class CopyXMLFiles { |
|
17 |
File xmlFile; |
|
18 |
File outDir; |
|
19 |
ArrayList<File> dtdFiles = new ArrayList<File>(); |
|
20 |
|
|
21 |
public CopyXMLFiles(File xmlFile) { |
|
22 |
this.xmlFile = xmlFile; |
|
23 |
} |
|
24 |
|
|
25 |
public ArrayList<File> copy(File outDir) throws IOException, XMLStreamException { |
|
26 |
XMLInputFactory factory; |
|
27 |
XMLStreamReader parser; |
|
28 |
InputStream inputData = xmlFile.toURI().toURL().openStream(); |
|
29 |
factory = XMLInputFactory.newInstance(); |
|
30 |
factory.setXMLResolver(new XMLResolver() { |
|
31 |
@Override |
|
32 |
public Object resolveEntity(String publicID, String systemID, |
|
33 |
String baseURI, String namespace) throws XMLStreamException { |
|
34 |
File srcFile = new File(xmlFile.getParentFile(), systemID); |
|
35 |
dtdFiles.add(srcFile); |
|
36 |
try { |
|
37 |
return srcFile.toURI().toURL().openStream(); |
|
38 |
} catch (IOException e) { |
|
39 |
// TODO Auto-generated catch block |
|
40 |
e.printStackTrace(); |
|
41 |
return new ArrayList<>(); |
|
42 |
} |
|
43 |
} |
|
44 |
}); |
|
45 |
|
|
46 |
parser = factory.createXMLStreamReader(inputData); |
|
47 |
|
|
48 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
|
49 |
|
|
50 |
} |
|
51 |
|
|
52 |
dtdFiles.add(xmlFile); |
|
53 |
for (File dtd : dtdFiles) { |
|
54 |
File cpy = new File(outDir, dtd.getName()); |
|
55 |
FileCopy.copy(dtd, cpy); |
|
56 |
} |
|
57 |
return dtdFiles; |
|
58 |
} |
|
59 |
|
|
60 |
public static void main(String[] args) throws IOException, XMLStreamException { |
|
61 |
File xmlFile = new File("/home/mdecorde/Téléchargements/Inscriptions1.xml"); |
|
62 |
File outDir = new File("/home/mdecorde/Téléchargements/test"); |
|
63 |
outDir.mkdir(); |
|
64 |
CopyXMLFiles cdf = new CopyXMLFiles(xmlFile); |
|
65 |
System.out.println(cdf.copy(outDir)); |
|
66 |
} |
|
67 |
} |
|
0 | 68 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/Project2XTZ.java (revision 476) | ||
---|---|---|
1 |
package org.txm.oriflamms.functions; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.IOException; |
|
5 |
import java.util.HashMap; |
|
6 |
|
|
7 |
import javax.xml.parsers.ParserConfigurationException; |
|
8 |
import javax.xml.stream.XMLStreamException; |
|
9 |
import javax.xml.transform.TransformerException; |
|
10 |
|
|
11 |
import org.txm.Toolbox; |
|
12 |
import org.txm.importer.ApplyXsl2; |
|
13 |
import org.txm.objects.BaseParameters; |
|
14 |
import org.txm.utils.AsciiUtils; |
|
15 |
import org.txm.utils.BundleUtils; |
|
16 |
import org.txm.utils.DeleteDir; |
|
17 |
import org.txm.utils.io.FileCopy; |
|
18 |
import org.txm.utils.xml.UpdateXSLParameters; |
|
19 |
import org.xml.sax.SAXException; |
|
20 |
|
|
21 |
public class Project2XTZ { |
|
22 |
File projectDirectory; |
|
23 |
public Project2XTZ(File projectDirectory) { |
|
24 |
this.projectDirectory = projectDirectory; |
|
25 |
} |
|
26 |
|
|
27 |
public boolean process() throws IOException, TransformerException, ParserConfigurationException, SAXException, XMLStreamException { |
|
28 |
File oriflammsMacroDirectory = new File(BundleUtils.getBundleFile("Oriflamms"), "res"); |
|
29 |
System.out.println("Ressources files directory: "+oriflammsMacroDirectory); |
|
30 |
if (!oriflammsMacroDirectory.exists()) { |
|
31 |
System.out.println("Oriflamms macro directory not found: "+oriflammsMacroDirectory+". Aborting"); |
|
32 |
return false; |
|
33 |
} |
|
34 |
File wFrontXSLFile = new File(oriflammsMacroDirectory, "txm-front-teioriflammsw-xtz.xsl"); |
|
35 |
if (!wFrontXSLFile.exists()) { |
|
36 |
System.out.println("Oriflamms to XML-XTZ front XSL file is missing: "+wFrontXSLFile+". Aborting"); |
|
37 |
return false; |
|
38 |
} |
|
39 |
File cFrontXSLFile = new File(oriflammsMacroDirectory, "txm-front-teioriflammsc-xtz.xsl"); |
|
40 |
if (!cFrontXSLFile.exists()) { |
|
41 |
System.out.println("Oriflamms to XML-XTZ front XSL file is missing: "+cFrontXSLFile+". Aborting"); |
|
42 |
return false; |
|
43 |
} |
|
44 |
|
|
45 |
File cSplitXSLFile = new File(oriflammsMacroDirectory, "1-oriflamms-split-surfaces.xsl"); |
|
46 |
if (!cSplitXSLFile.exists()) { |
|
47 |
System.out.println("Oriflamms to XML-XTZ split XSL file is missing: "+cSplitXSLFile+". Aborting"); |
|
48 |
return false; |
|
49 |
} |
|
50 |
|
|
51 |
File editionXSLFile1 = new File(oriflammsMacroDirectory, "1-default-html.xsl"); |
|
52 |
if (!editionXSLFile1.exists()) { |
|
53 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile1+"."); |
|
54 |
return false; |
|
55 |
} |
|
56 |
File editionXSLFile2 = new File(oriflammsMacroDirectory, "2-default-pager.xsl"); |
|
57 |
if (!editionXSLFile2.exists()) { |
|
58 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile2+"."); |
|
59 |
return false; |
|
60 |
} |
|
61 |
File editionXSLFile3 = new File(oriflammsMacroDirectory, "3-facsimile-pager.xsl"); |
|
62 |
if (!editionXSLFile3.exists()) { |
|
63 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile3+"."); |
|
64 |
return false; |
|
65 |
} |
|
66 |
File cssDirectory = new File(oriflammsMacroDirectory, "css"); |
|
67 |
if (!cssDirectory.exists()) { |
|
68 |
System.out.println("Oriflamms css directory is missing: "+cssDirectory+"."); |
|
69 |
return false; |
|
70 |
} |
|
71 |
File jsDirectory = new File(oriflammsMacroDirectory, "js"); |
|
72 |
if (!jsDirectory.exists()) { |
|
73 |
System.out.println("Oriflamms js directory is missing: "+jsDirectory+"."); |
|
74 |
return false; |
|
75 |
} |
|
76 |
File imagesDirectory = new File(oriflammsMacroDirectory, "images"); |
|
77 |
if (!imagesDirectory.exists()) { |
|
78 |
System.out.println("Oriflamms images directory is missing: "+imagesDirectory+"."); |
|
79 |
return false; |
|
80 |
} |
|
81 |
|
|
82 |
File textDirectory = new File(projectDirectory, "texts"); |
|
83 |
|
|
84 |
File txmDirectory = new File(projectDirectory, "txm"); |
|
85 |
if (txmDirectory.exists()) DeleteDir.deleteDirectory(txmDirectory); |
|
86 |
txmDirectory.mkdir(); |
|
87 |
if (!txmDirectory.exists()) { |
|
88 |
System.out.println("Error: the 'txm' directory could not be created: "+txmDirectory+". Aborting."); |
|
89 |
return false; |
|
90 |
} |
|
91 |
|
|
92 |
File wDirectory = null; |
|
93 |
File cDirectory = null; |
|
94 |
File wFile = null; |
|
95 |
File cFile = null; |
|
96 |
|
|
97 |
File[] xmlFiles = textDirectory.listFiles(); |
|
98 |
if (xmlFiles == null) return false; |
|
99 |
|
|
100 |
for (File xmlFile : xmlFiles) { |
|
101 |
if (xmlFile.getName().endsWith("-w.xml")) { |
|
102 |
String name = xmlFile.getName().substring(0, xmlFile.getName().indexOf(".xml")); |
|
103 |
wDirectory = new File(txmDirectory, name); |
|
104 |
wFile = xmlFile; |
|
105 |
} else if (xmlFile.getName().endsWith("-c.xml")) { |
|
106 |
String name = xmlFile.getName().substring(0, xmlFile.getName().indexOf(".xml")); |
|
107 |
cDirectory = new File(txmDirectory, name); |
|
108 |
cFile = xmlFile; |
|
109 |
} |
|
110 |
} |
|
111 |
|
|
112 |
if (wDirectory == null) { |
|
113 |
System.out.println("The Word corpus XML file was not found in "+textDirectory+". Aborting."); |
|
114 |
return false; |
|
115 |
} |
|
116 |
if (cDirectory == null) { |
|
117 |
System.out.println("The Letter corpus XML file was not found in "+textDirectory+". Aborting."); |
|
118 |
return false; |
|
119 |
} |
|
120 |
|
|
121 |
//Create XML-XTZ source directories |
|
122 |
wDirectory.mkdirs(); |
|
123 |
cDirectory.mkdirs(); |
|
124 |
|
|
125 |
// Copy XML files and split character XML file |
|
126 |
FileCopy.copy(wFile, new File(wDirectory, wFile.getName())); |
|
127 |
|
|
128 |
ApplyXsl2 builder = new ApplyXsl2(cSplitXSLFile); |
|
129 |
HashMap<String, String> xslParams = new HashMap<String, String>(); |
|
130 |
xslParams.put("output-directory", cDirectory.getAbsoluteFile().toURI().toString()); |
|
131 |
for (String name : xslParams.keySet()) builder.setParam(name, xslParams.get(name)); |
|
132 |
if (!builder.process(cFile, null)) { |
|
133 |
System.out.println("Error: fail to split "+cFile); |
|
134 |
return false; |
|
135 |
} |
|
136 |
if (!ApplyXsl2.processImportSources(cFrontXSLFile, ApplyXsl2.listFiles(cDirectory), new HashMap<String, Object>())) { |
|
137 |
System.out.println("Error: fail to apply front XSL with "+cDirectory+" files"); |
|
138 |
return false; |
|
139 |
} |
|
140 |
// INJECT ontologies |
|
141 |
System.out.println("Injecting ontologies..."); |
|
142 |
for (File f : cDirectory.listFiles()) { |
|
143 |
if (f.getName().startsWith(cDirectory.getName())) { |
|
144 |
OntologiesProjection cp = new OntologiesProjection(f, projectDirectory); |
|
145 |
File outputFile = new File(cDirectory, "temp.xml"); |
|
146 |
cp.process(outputFile); |
|
147 |
if (outputFile.exists() && f.delete() && outputFile.renameTo(f)) { |
|
148 |
|
|
149 |
} else { |
|
150 |
System.out.println("Failed to replace XML file "+f+" with "+outputFile); |
|
151 |
return false; |
|
152 |
} |
|
153 |
} |
|
154 |
} |
|
155 |
|
|
156 |
// INJECT word's coordinates |
|
157 |
System.out.println("Injecting coordinates..."); |
|
158 |
File xmlFile = new File(wDirectory, wFile.getName()); |
|
159 |
File img_links_directory = new File(projectDirectory, "img_links"); |
|
160 |
File zones_directory = new File(projectDirectory, "zones"); |
|
161 |
File outputFile = new File(wDirectory, "temp.xml"); |
|
162 |
CoordsProjection cp = new CoordsProjection(xmlFile, img_links_directory, zones_directory, "w"); |
|
163 |
if (cp.process(outputFile)) { |
|
164 |
if (outputFile.exists() && xmlFile.delete() && outputFile.renameTo(xmlFile)) { |
|
165 |
|
|
166 |
} else { |
|
167 |
System.out.println("Failed to replace XML file "+xmlFile+" with "+outputFile); |
|
168 |
return false; |
|
169 |
} |
|
170 |
} else { |
|
171 |
System.out.println("Coordinates injection failed. Aborting"); |
|
172 |
return false; |
|
173 |
} |
|
174 |
|
|
175 |
// Create XSL directories |
|
176 |
|
|
177 |
File wXSLDirectory = new File(wDirectory, "xsl"); |
|
178 |
File cXSLDirectory = new File(cDirectory, "xsl"); |
|
179 |
|
|
180 |
//File cSplitXSLDirectory = new File(cXSLDirectory, "1-split-merge") |
|
181 |
//cSplitXSLDirectory.mkdirs() |
|
182 |
|
|
183 |
File wFrontXSLDirectory = new File(wXSLDirectory, "2-front"); |
|
184 |
//File cFrontXSLDirectory = new File(cXSLDirectory, "2-front") |
|
185 |
wFrontXSLDirectory.mkdirs(); |
|
186 |
//cFrontXSLDirectory.mkdirs() |
|
187 |
|
|
188 |
// Copy Split XSL file |
|
189 |
//File newCSplitXSLFile = new File(cSplitXSLDirectory, cSplitXSLFile.getName()) |
|
190 |
//FileCopy.copy(cSplitXSLFile, newCSplitXSLFile); |
|
191 |
|
|
192 |
// Copy Front XSL file |
|
193 |
File newWFrontXSLFile = new File(wFrontXSLDirectory, wFrontXSLFile.getName()); |
|
194 |
//File newCFrontXSLFile = new File(cFrontXSLDirectory, cFrontXSLFile.getName()) |
|
195 |
FileCopy.copy(wFrontXSLFile, newWFrontXSLFile); |
|
196 |
//FileCopy.copy(cFrontXSLFile, newCFrontXSLFile); |
|
197 |
|
|
198 |
// Copy edition XSL file |
|
199 |
File wEditionXSLDirectory = new File(wXSLDirectory, "4-edition"); |
|
200 |
File cEditionXSLDirectory = new File(cXSLDirectory, "4-edition"); |
|
201 |
wEditionXSLDirectory.mkdirs(); |
|
202 |
cEditionXSLDirectory.mkdirs(); |
|
203 |
File newWEditionXSLFile1 = new File(wEditionXSLDirectory, editionXSLFile1.getName()); |
|
204 |
File newCEditionXSLFile1 = new File(cEditionXSLDirectory, editionXSLFile1.getName()); |
|
205 |
FileCopy.copy(editionXSLFile1, newWEditionXSLFile1); |
|
206 |
FileCopy.copy(editionXSLFile1, newCEditionXSLFile1); |
|
207 |
File newWEditionXSLFile2 = new File(wEditionXSLDirectory, editionXSLFile2.getName()); |
|
208 |
File newCEditionXSLFile2 = new File(cEditionXSLDirectory, editionXSLFile2.getName()); |
|
209 |
FileCopy.copy(editionXSLFile2, newWEditionXSLFile2); |
|
210 |
FileCopy.copy(editionXSLFile2, newCEditionXSLFile2); |
|
211 |
File newWEditionXSLFile3 = new File(wEditionXSLDirectory, editionXSLFile3.getName()); |
|
212 |
File newCEditionXSLFile3 = new File(cEditionXSLDirectory, editionXSLFile3.getName()); |
|
213 |
FileCopy.copy(editionXSLFile3, newWEditionXSLFile3); |
|
214 |
FileCopy.copy(editionXSLFile3, newCEditionXSLFile3); |
|
215 |
|
|
216 |
//patch XSL files with image directory path and set the 'word-element' xsl param |
|
217 |
File projectImgDirectory = new File(projectDirectory, "img"); |
|
218 |
HashMap<String, String> parameters = new HashMap<String, String>(); |
|
219 |
parameters.put("image-directory", projectImgDirectory.getAbsolutePath()); |
|
220 |
parameters.put("word-element", "w"); |
|
221 |
System.out.println("update "+newWEditionXSLFile3+" with "+parameters); |
|
222 |
UpdateXSLParameters p = new UpdateXSLParameters(newWEditionXSLFile3); |
|
223 |
if (!p.process(parameters)) { |
|
224 |
System.out.println("Fail to patch "+newWEditionXSLFile3); |
|
225 |
return false; |
|
226 |
} |
|
227 |
parameters = new HashMap<String, String>(); |
|
228 |
parameters.put("image-directory", projectImgDirectory.getAbsolutePath()); |
|
229 |
parameters.put("word-element", "c"); |
|
230 |
System.out.println("update "+newCEditionXSLFile3+" with "+parameters); |
|
231 |
UpdateXSLParameters p2 = new UpdateXSLParameters(newCEditionXSLFile3); |
|
232 |
if (!p2.process(parameters)) { |
|
233 |
System.out.println("Fail to patch "+newCEditionXSLFile3); |
|
234 |
return false; |
|
235 |
} |
|
236 |
|
|
237 |
// Copy js and images directories |
|
238 |
File wCSSDirectory = new File(wDirectory, cssDirectory.getName()); |
|
239 |
wCSSDirectory.mkdir(); |
|
240 |
File wJsDirectory = new File(wDirectory, jsDirectory.getName()); |
|
241 |
wJsDirectory.mkdir(); |
|
242 |
File wImagesDirectory = new File(wDirectory, imagesDirectory.getName()); |
|
243 |
wImagesDirectory.mkdir(); |
|
244 |
File cCSSDirectory = new File(cDirectory, cssDirectory.getName()); |
|
245 |
cCSSDirectory.mkdir(); |
|
246 |
File cJsDirectory = new File(cDirectory, jsDirectory.getName()); |
|
247 |
cJsDirectory.mkdir(); |
|
248 |
File cImagesDirectory = new File(cDirectory, imagesDirectory.getName()); |
|
249 |
cImagesDirectory.mkdir(); |
|
250 |
FileCopy.copyFiles(cssDirectory, wCSSDirectory); |
|
251 |
FileCopy.copyFiles(jsDirectory, wJsDirectory); |
|
252 |
FileCopy.copyFiles(imagesDirectory, wImagesDirectory); |
|
253 |
FileCopy.copyFiles(cssDirectory, cCSSDirectory); |
|
254 |
FileCopy.copyFiles(jsDirectory, cJsDirectory); |
|
255 |
FileCopy.copyFiles(imagesDirectory, cImagesDirectory); |
|
256 |
|
|
257 |
// Prepare import.xml files |
|
258 |
File wImportXMLFile = new File(wDirectory, "import.xml"); |
|
259 |
File cImportXMLFile = new File(cDirectory, "import.xml"); |
|
260 |
|
|
261 |
BaseParameters.createEmptyParams(wImportXMLFile, AsciiUtils.buildId(wDirectory.getName()).toUpperCase()); |
|
262 |
BaseParameters wParams = new BaseParameters(wImportXMLFile); |
|
263 |
wParams.load(); |
|
264 |
wParams.setSkipTokenization(true); |
|
265 |
wParams.setWordElement("w"); |
|
266 |
wParams.setDoAnnotation(false); |
|
267 |
wParams.setAnnotationLang("fr"); |
|
268 |
wParams.setWordsPerPage(9999999); |
|
269 |
wParams.setTextualPlans("", "note", "teiHeader,facsimile","pb,cb,lb"); |
|
270 |
wParams.getCorpusElement().setAttribute("font", "Junicode"); |
|
271 |
wParams.getEditionsElement(wParams.getCorpusElement()).setAttribute("default", "default,facsimile"); |
|
272 |
wParams.getCorpusElement().setAttribute("name", AsciiUtils.buildId(wDirectory.getName()).toUpperCase()); |
|
273 |
|
|
274 |
|
|
275 |
BaseParameters.createEmptyParams(cImportXMLFile, AsciiUtils.buildId(cDirectory.getName()).toUpperCase()); |
|
276 |
BaseParameters cParams = new BaseParameters(cImportXMLFile); |
|
277 |
cParams.load(); |
|
278 |
cParams.setSkipTokenization(true); |
|
279 |
cParams.setWordElement("c"); |
|
280 |
cParams.setDoAnnotation(false); |
|
281 |
cParams.setAnnotationLang("fr"); |
|
282 |
cParams.setWordsPerPage(9999999); |
|
283 |
cParams.setTextualPlans("", "note", "teiHeader,facsimile","pb,cb,lb"); |
|
284 |
cParams.getCorpusElement().setAttribute("font", "Junicode"); |
|
285 |
cParams.getEditionsElement(cParams.getCorpusElement()).setAttribute("default", "default,facsimile"); |
|
286 |
cParams.getCorpusElement().setAttribute("name", AsciiUtils.buildId(cDirectory.getName()).toUpperCase()); |
|
287 |
|
|
288 |
return cParams.save() && wParams.save(); |
|
289 |
} |
|
290 |
} |
|
0 | 291 |
tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/OntologiesProjection.java (revision 476) | ||
---|---|---|
1 |
package org.txm.oriflamms.functions; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.IOException; |
|
5 |
import java.io.Serializable; |
|
6 |
import java.util.ArrayList; |
|
7 |
import java.util.Arrays; |
|
8 |
import java.util.HashMap; |
|
9 |
import java.util.List; |
|
10 |
import java.util.regex.Pattern; |
|
11 |
|
|
12 |
import javax.xml.stream.XMLStreamException; |
|
13 |
|
|
14 |
import org.txm.importer.StaxIdentityParser; |
|
15 |
import org.txm.importer.StaxParser; |
|
16 |
|
|
17 |
class OntologiesProjection extends StaxIdentityParser { |
|
18 |
|
|
19 |
File xmlFile; |
|
20 |
|
|
21 |
String wordTag; |
|
22 |
String textname; |
Formats disponibles : Unified diff