Révision 2984
| tmp/org.txm.core/src/java/org/txm/scripts/importer/WriteIdAndNAttributesUltraGeneric.groovy (revision 2984) | ||
|---|---|---|
| 3 | 3 |
import org.txm.importer.StaxIdentityParser |
| 4 | 4 |
|
| 5 | 5 |
class WriteIdAndNAttributesUltraGeneric extends StaxIdentityParser {
|
| 6 |
|
|
| 6 |
|
|
| 7 | 7 |
String baseID = "" |
| 8 | 8 |
def rules = []; |
| 9 |
|
|
| 9 |
|
|
| 10 | 10 |
def idCounters = [:] |
| 11 | 11 |
def nCounters = [:] |
| 12 | 12 |
def parentIds = [:] |
| ... | ... | |
| 26 | 26 |
this.elementResets = elementResets |
| 27 | 27 |
|
| 28 | 28 |
this.baseID = xmlFile.getName(); |
| 29 |
if (baseID.indexOf(".") > 0) baseID = baseID.substring(0, baseID.indexOf("."))
|
|
| 29 |
if (baseID.indexOf(".") > 0) {
|
|
| 30 |
baseID = baseID.substring(0, baseID.indexOf("."))
|
|
| 31 |
} |
|
| 30 | 32 |
|
| 31 | 33 |
for (String element : elements) {
|
| 32 | 34 |
idCounters[element] = 1; |
| ... | ... | |
| 34 | 36 |
parentIds[element] = null; |
| 35 | 37 |
} |
| 36 | 38 |
} |
| 37 |
|
|
| 39 |
|
|
| 38 | 40 |
/** |
| 39 | 41 |
* Special rule to add @corresp=#ref to its reference element to element which @attribute=@value |
| 40 | 42 |
*/ |
| ... | ... | |
| 46 | 48 |
if (!idCounters.containsKey(localname) ) {
|
| 47 | 49 |
super.writeAttributes(); |
| 48 | 50 |
return; |
| 49 |
}
|
|
| 51 |
} |
|
| 50 | 52 |
println "write attributes $localname" |
| 51 | 53 |
boolean idFound = false |
| 52 | 54 |
boolean nFound = false |
| ... | ... | |
| 63 | 65 |
if ("id" == name) {
|
| 64 | 66 |
if (value == null) {
|
| 65 | 67 |
value = getNextID() |
| 66 |
}
|
|
| 68 |
} |
|
| 67 | 69 |
|
| 68 | 70 |
parentIds[localname] = value |
| 69 | 71 |
} else if ("n" == name) {
|
| ... | ... | |
| 116 | 118 |
value += "_"+parentIds[parent] |
| 117 | 119 |
else |
| 118 | 120 |
value += "_"+baseID |
| 119 |
|
|
| 121 |
|
|
| 120 | 122 |
value += "-"+idCounters[localname] |
| 121 | 123 |
|
| 122 | 124 |
return value |
| ... | ... | |
| 132 | 134 |
File xmlFile = new File("/home/mdecorde/TEMP/idnmissing.xml")
|
| 133 | 135 |
File outFile = new File("/home/mdecorde/TEMP/idnmissing-fix.xml")
|
| 134 | 136 |
|
| 135 |
WriteIdAndNAttributesUltraGeneric wiana = new WriteIdAndNAttributesUltraGeneric(xmlFile, |
|
| 136 |
["milestone", "pb", "cb", "lb"], |
|
| 137 |
["milestone":"unit=surface", "pb":null, "cb":null, "lb":null], |
|
| 138 |
["milestone":"surf", "pb":"page", "cb":"col", "lb":"line"], |
|
| 139 |
["milestone":null, "pb":null, "cb":"pb", "lb":"cb"], |
|
| 140 |
["milestone":[], "pb":["cb", "lb"], "cb":[ "lb"], "lb":[]]) |
|
| 137 |
WriteIdAndNAttributesUltraGeneric wiana = new WriteIdAndNAttributesUltraGeneric(xmlFile, |
|
| 138 |
[ |
|
| 139 |
"milestone", |
|
| 140 |
"pb", |
|
| 141 |
"cb", |
|
| 142 |
"lb" |
|
| 143 |
], |
|
| 144 |
["milestone":"unit=surface", "pb":null, "cb":null, "lb":null], |
|
| 145 |
["milestone":"surf", "pb":"page", "cb":"col", "lb":"line"], |
|
| 146 |
["milestone":null, "pb":null, "cb":"pb", "lb":"cb"], |
|
| 147 |
["milestone":[], "pb":["cb", "lb"], "cb":["lb"], "lb":[]]) |
|
| 141 | 148 |
println wiana.process(outFile) |
| 142 | 149 |
} |
| 143 | 150 |
} |
| tmp/org.txm.core/src/java/org/txm/objects/Project.java (revision 2984) | ||
|---|---|---|
| 489 | 489 |
/** |
| 490 | 490 |
* Loads and creates the persisted results from the preferences service. |
| 491 | 491 |
*/ |
| 492 |
public void loadResults(Class clazz) {
|
|
| 492 |
public void loadResults(Class<? extends TXMResult> clazz) {
|
|
| 493 | 493 |
|
| 494 | 494 |
Log.finest("*** Project.loadResults(): reloading project persitent results...");
|
| 495 | 495 |
|
| tmp/org.txm.oriflamms.rcp/groovy/org/txm/macro/oriflamms/prepare/TEI2ProjectMacro.groovy (revision 2984) | ||
|---|---|---|
| 51 | 51 |
|
| 52 | 52 |
File xmlFileParentDirectory = xmlFile.getParentFile() |
| 53 | 53 |
String projectName = xmlFile.getName() |
| 54 |
if (projectName.indexOf(".") > 0) projectName = projectName.substring(0, projectName.indexOf("."))
|
|
| 54 |
if (projectName.lastIndexOf(".") > 0) projectName = projectName.substring(0, projectName.lastIndexOf("."))
|
|
| 55 | 55 |
File projectDirectory = new File(xmlFileParentDirectory, projectName) |
| 56 | 56 |
projectDirectory.deleteDir() |
| 57 | 57 |
if (projectDirectory.exists()) {
|
| tmp/org.txm.oriflamms.rcp/groovy/org/txm/macro/oriflamms/prepare/OntologiesProjection.groovy (revision 2984) | ||
|---|---|---|
| 4 | 4 |
import org.codehaus.groovy.transform.trait.SuperCallTraitTransformer; |
| 5 | 5 |
import org.txm.importer.StaxIdentityParser; |
| 6 | 6 |
import org.txm.scripts.importer.StaxParser; |
| 7 |
import org.txm.utils.FileUtils |
|
| 7 | 8 |
|
| 8 | 9 |
class OntologiesProjection extends StaxIdentityParser {
|
| 9 | 10 |
|
| ... | ... | |
| 29 | 30 |
this.xmlFile = xmlFile |
| 30 | 31 |
this.ontologies_links_directory = new File(corpusDirectory, "ontologies_links") |
| 31 | 32 |
|
| 32 |
textname = xmlFile.getName() |
|
| 33 |
int idx = textname.lastIndexOf(".xml")
|
|
| 34 |
if (idx > 0) textname = textname.substring(0, idx) |
|
| 33 |
textname = FileUtils.stripExtension(xmlFile) |
|
| 35 | 34 |
textname = textname.replaceAll("-c", "")
|
| 36 | 35 |
|
| 37 | 36 |
this.wordTag = "c"; |
| tmp/org.txm.oriflamms.rcp/groovy/org/txm/macro/oriflamms/prepare/Project2XTZMacro.groovy (revision 2984) | ||
|---|---|---|
| 93 | 93 |
|
| 94 | 94 |
for (File xmlFile : xmlFiles) {
|
| 95 | 95 |
if (xmlFile.getName().endsWith("-w.xml")) {
|
| 96 |
String name = xmlFile.getName().substring(0, xmlFile.getName().lastIndexOf(".xml"))
|
|
| 96 |
String name = FileUtils.stripExtension(xmlFile);
|
|
| 97 | 97 |
wDirectory = new File(txmDirectory, name) |
| 98 | 98 |
wFile = xmlFile |
| 99 | 99 |
} else if (xmlFile.getName().endsWith("-c.xml")) {
|
| 100 |
String name = xmlFile.getName().substring(0, xmlFile.getName().lastIndexOf(".xml"))
|
|
| 100 |
String name = FileUtils.stripExtension(xmlFile);
|
|
| 101 | 101 |
cDirectory = new File(txmDirectory, name) |
| 102 | 102 |
cFile = xmlFile |
| 103 | 103 |
} |
| tmp/org.txm.oriflamms.rcp/groovy/org/txm/macro/oriflamms/prepare/CoordsProjection.groovy (revision 2984) | ||
|---|---|---|
| 2 | 2 |
|
| 3 | 3 |
import org.txm.importer.StaxIdentityParser; |
| 4 | 4 |
import org.txm.scripts.importer.StaxParser; |
| 5 |
import org.txm.utils.FileUtils |
|
| 5 | 6 |
|
| 6 | 7 |
class CoordsProjection extends StaxIdentityParser {
|
| 7 | 8 |
|
| ... | ... | |
| 30 | 31 |
this.zones_directory = zones_directory |
| 31 | 32 |
this.wordTag = wordTag; |
| 32 | 33 |
|
| 33 |
textname = xmlFile.getName() |
|
| 34 |
int idx = textname.lastIndexOf(".xml")
|
|
| 35 |
if (idx > 0) textname = textname.substring(0, idx) |
|
| 34 |
textname = FileUtils.stripExtension(xmlFile) |
|
| 36 | 35 |
|
| 37 |
idx = textname.indexOf("-w")
|
|
| 36 |
int idx = textname.indexOf("-w")
|
|
| 38 | 37 |
if (idx > 0) {
|
| 39 | 38 |
textname = textname.substring(0, idx) |
| 40 | 39 |
xmlType = "word" |
| tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/CoordsProjection.java (revision 2984) | ||
|---|---|---|
| 8 | 8 |
|
| 9 | 9 |
import org.txm.importer.StaxIdentityParser; |
| 10 | 10 |
import org.txm.scripts.importer.StaxParser; |
| 11 |
import org.txm.utils.FileUtils; |
|
| 11 | 12 |
|
| 12 | 13 |
class CoordsProjection extends StaxIdentityParser {
|
| 13 |
|
|
| 14 |
|
|
| 14 | 15 |
File xmlFile; |
| 16 |
|
|
| 15 | 17 |
File img_links_directory; |
| 18 |
|
|
| 16 | 19 |
File zones_directory; |
| 17 |
|
|
| 20 |
|
|
| 18 | 21 |
String wordTag; |
| 22 |
|
|
| 19 | 23 |
String textname; |
| 24 |
|
|
| 20 | 25 |
String milestone; |
| 21 |
|
|
| 26 |
|
|
| 22 | 27 |
String current_img_file = ""; |
| 28 |
|
|
| 23 | 29 |
String current_zone_file = ""; |
| 24 |
|
|
| 30 |
|
|
| 25 | 31 |
String xmlType; |
| 32 |
|
|
| 26 | 33 |
String group; |
| 27 |
|
|
| 28 |
HashMap<String, String[]> zones = new HashMap<String, String[]>(); |
|
| 29 |
HashMap<String, String> links = new HashMap<String, String>(); |
|
| 30 |
|
|
| 34 |
|
|
| 35 |
HashMap<String, String[]> zones = new HashMap<>(); |
|
| 36 |
|
|
| 37 |
HashMap<String, String> links = new HashMap<>(); |
|
| 38 |
|
|
| 31 | 39 |
public CoordsProjection(File xmlFile, File img_links_directory, File zones_directory, String wordTag) throws IOException, XMLStreamException {
|
| 32 | 40 |
super(xmlFile); |
| 33 |
|
|
| 41 |
|
|
| 34 | 42 |
this.xmlFile = xmlFile; |
| 35 | 43 |
this.img_links_directory = img_links_directory; |
| 36 | 44 |
this.zones_directory = zones_directory; |
| 37 | 45 |
this.wordTag = wordTag; |
| 38 |
|
|
| 39 |
textname = xmlFile.getName(); |
|
| 40 |
int idx = textname.lastIndexOf(".xml");
|
|
| 41 |
if (idx > 0) textname = textname.substring(0, idx); |
|
| 42 |
|
|
| 43 |
idx = textname.indexOf("-w");
|
|
| 46 |
|
|
| 47 |
if (!FileUtils.isXMLFile(xmlFile)) {
|
|
| 48 |
throw new IllegalArgumentException("File is not an XML file: " + xmlFile);
|
|
| 49 |
} |
|
| 50 |
|
|
| 51 |
textname = FileUtils.stripExtension(xmlFile); |
|
| 52 |
|
|
| 53 |
int idx = textname.indexOf("-w");
|
|
| 44 | 54 |
if (idx > 0) {
|
| 45 | 55 |
textname = textname.substring(0, idx); |
| 46 | 56 |
xmlType = "word"; |
| 47 | 57 |
} |
| 48 |
|
|
| 58 |
|
|
| 49 | 59 |
idx = textname.indexOf("-c");
|
| 50 | 60 |
if (idx > 0) {
|
| 51 | 61 |
textname = textname.substring(0, idx); |
| 52 | 62 |
xmlType = "character"; |
| 53 | 63 |
} |
| 54 | 64 |
} |
| 55 |
|
|
| 65 |
|
|
| 66 |
@Override |
|
| 56 | 67 |
public void processStartElement() throws XMLStreamException, IOException {
|
| 57 | 68 |
super.processStartElement(); |
| 58 | 69 |
if (localname.equals("milestone")) {
|
| 59 | 70 |
String id = ""; |
| 60 |
String unit= ""; |
|
| 61 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 71 |
String unit = "";
|
|
| 72 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 62 | 73 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
| 63 | 74 |
id = parser.getAttributeValue(i); |
| 64 |
} else if (parser.getAttributeLocalName(i).equals("unit")) {
|
|
| 75 |
} |
|
| 76 |
else if (parser.getAttributeLocalName(i).equals("unit")) {
|
|
| 65 | 77 |
unit = parser.getAttributeValue(i); |
| 66 | 78 |
} |
| 67 | 79 |
} |
| 68 |
|
|
| 80 |
|
|
| 69 | 81 |
if (unit.equals("surface")) {
|
| 70 | 82 |
milestone = id; |
| 71 | 83 |
} |
| 72 |
} else if (localname.equals(wordTag)) {
|
|
| 84 |
} |
|
| 85 |
else if (localname.equals(wordTag)) {
|
|
| 73 | 86 |
String id = ""; |
| 74 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 87 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 75 | 88 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
| 76 | 89 |
id = parser.getAttributeValue(i); |
| 77 | 90 |
break; |
| 78 | 91 |
} |
| 79 | 92 |
} |
| 80 |
|
|
| 93 |
|
|
| 81 | 94 |
// load next data if needed |
| 82 |
String img_file_name = textname+"_"+milestone+"-links.xml";
|
|
| 95 |
String img_file_name = textname + "_" + milestone + "-links.xml";
|
|
| 83 | 96 |
if (!current_img_file.equals(img_file_name)) { // rebuild hashmaps
|
| 84 |
String zone_file_name = textname+"_"+milestone+"-zones.xml";
|
|
| 97 |
String zone_file_name = textname + "_" + milestone + "-zones.xml";
|
|
| 85 | 98 |
loadNextData(img_file_name, zone_file_name); |
| 86 | 99 |
} |
| 87 |
|
|
| 88 |
// println "Find coords for word_id="+id+" in "+img_file_name+" and "+zone_file_name
|
|
| 89 |
// println "zone: "+links[id]
|
|
| 90 |
// println "coords: "+zones[links[id]]
|
|
| 100 |
|
|
| 101 |
// println "Find coords for word_id="+id+" in "+img_file_name+" and "+zone_file_name
|
|
| 102 |
// println "zone: "+links[id]
|
|
| 103 |
// println "coords: "+zones[links[id]]
|
|
| 91 | 104 |
if (zones.size() > 0 && links.size() > 0) {
|
| 92 | 105 |
String[] coords = zones.get(links.get(id)); |
| 93 | 106 |
if (coords != null) {
|
| 94 | 107 |
if (coords[0] == null || coords[1] == null || coords[2] == null || coords[3] == null) {
|
| 95 |
System.out.println("WARNING one of coordinates is missing: "+coords);
|
|
| 96 |
} else {
|
|
| 108 |
System.out.println("WARNING one of coordinates is missing: " + coords);
|
|
| 109 |
} |
|
| 110 |
else {
|
|
| 97 | 111 |
try {
|
| 98 | 112 |
writer.writeAttribute("x1", coords[0]);
|
| 99 | 113 |
writer.writeAttribute("y1", coords[1]);
|
| 100 | 114 |
writer.writeAttribute("x2", coords[2]);
|
| 101 | 115 |
writer.writeAttribute("y2", coords[3]);
|
| 102 |
} catch (XMLStreamException e) {
|
|
| 116 |
} |
|
| 117 |
catch (XMLStreamException e) {
|
|
| 103 | 118 |
// TODO Auto-generated catch block |
| 104 | 119 |
e.printStackTrace(); |
| 105 | 120 |
} |
| 106 | 121 |
} |
| 107 |
} else {
|
|
| 108 |
System.out.println("WARNING No group for word id="+id+" and link id="+links.get(id)+" in text "+textname);
|
|
| 109 | 122 |
} |
| 123 |
else {
|
|
| 124 |
System.out.println("WARNING No group for word id=" + id + " and link id=" + links.get(id) + " in text " + textname);
|
|
| 125 |
} |
|
| 110 | 126 |
} |
| 111 | 127 |
} |
| 112 | 128 |
} |
| 113 |
|
|
| 129 |
|
|
| 114 | 130 |
protected void loadNextData(String img_file_name, String zone_file_name) {
|
| 115 | 131 |
File img_link_file = new File(img_links_directory, img_file_name); |
| 116 | 132 |
File zone_file = new File(zones_directory, zone_file_name); |
| 117 |
|
|
| 133 |
|
|
| 118 | 134 |
zones.clear(); |
| 119 | 135 |
links.clear(); |
| 120 | 136 |
if (zone_file.exists()) {
|
| 121 | 137 |
StaxParser pZones = new StaxParser(zone_file) {
|
| 122 |
public void processStartElement() {
|
|
| 123 |
if (localname.equals("zone")) {
|
|
| 124 |
String type = ""; |
|
| 125 |
String idZone = ""; |
|
| 126 |
String ulx = "", uly = "", lrx = "", lry = ""; |
|
| 127 |
|
|
| 128 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 129 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
|
| 130 |
idZone = parser.getAttributeValue(i); |
|
| 131 |
} else if (parser.getAttributeLocalName(i).equals("type")) {
|
|
| 132 |
type = parser.getAttributeValue(i); |
|
| 133 |
} else if (parser.getAttributeLocalName(i).equals("ulx")) {
|
|
| 134 |
ulx = parser.getAttributeValue(i); |
|
| 135 |
} else if (parser.getAttributeLocalName(i).equals("uly")) {
|
|
| 136 |
uly = parser.getAttributeValue(i); |
|
| 137 |
} else if (parser.getAttributeLocalName(i).equals("lrx")) {
|
|
| 138 |
lrx = parser.getAttributeValue(i); |
|
| 139 |
} else if (parser.getAttributeLocalName(i).equals("lry")) {
|
|
| 140 |
lry = parser.getAttributeValue(i); |
|
| 141 |
} |
|
| 142 |
} |
|
| 143 |
|
|
| 144 |
if (type.equals(xmlType)) {
|
|
| 145 |
zones.put(idZone, new String[]{ulx, uly, lrx, lry});
|
|
| 146 |
} |
|
| 147 |
|
|
| 138 |
|
|
| 139 |
@Override |
|
| 140 |
public void processStartElement() {
|
|
| 141 |
if (localname.equals("zone")) {
|
|
| 142 |
String type = ""; |
|
| 143 |
String idZone = ""; |
|
| 144 |
String ulx = "", uly = "", lrx = "", lry = ""; |
|
| 145 |
|
|
| 146 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 147 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
|
| 148 |
idZone = parser.getAttributeValue(i); |
|
| 148 | 149 |
} |
| 150 |
else if (parser.getAttributeLocalName(i).equals("type")) {
|
|
| 151 |
type = parser.getAttributeValue(i); |
|
| 152 |
} |
|
| 153 |
else if (parser.getAttributeLocalName(i).equals("ulx")) {
|
|
| 154 |
ulx = parser.getAttributeValue(i); |
|
| 155 |
} |
|
| 156 |
else if (parser.getAttributeLocalName(i).equals("uly")) {
|
|
| 157 |
uly = parser.getAttributeValue(i); |
|
| 158 |
} |
|
| 159 |
else if (parser.getAttributeLocalName(i).equals("lrx")) {
|
|
| 160 |
lrx = parser.getAttributeValue(i); |
|
| 161 |
} |
|
| 162 |
else if (parser.getAttributeLocalName(i).equals("lry")) {
|
|
| 163 |
lry = parser.getAttributeValue(i); |
|
| 164 |
} |
|
| 149 | 165 |
} |
| 150 |
}; |
|
| 166 |
|
|
| 167 |
if (type.equals(xmlType)) {
|
|
| 168 |
zones.put(idZone, new String[] { ulx, uly, lrx, lry });
|
|
| 169 |
} |
|
| 170 |
|
|
| 171 |
} |
|
| 172 |
} |
|
| 173 |
}; |
|
| 151 | 174 |
pZones.process(); |
| 152 | 175 |
} |
| 153 | 176 |
if (img_link_file.exists()) {
|
| 154 | 177 |
StaxParser pLinks = new StaxParser(img_link_file) {
|
| 155 |
public void processStartElement() {
|
|
| 156 |
if (localname.equals("linkGrp")) {
|
|
| 157 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 158 |
if (parser.getAttributeLocalName(i).equals("type")) {
|
|
| 159 |
group = parser.getAttributeValue(i); |
|
| 160 |
break; |
|
| 161 |
} |
|
| 162 |
} |
|
| 163 |
} else if (localname.equals("link") && group.startsWith(xmlType)) {
|
|
| 164 |
String target = ""; |
|
| 165 |
|
|
| 166 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 167 |
if (parser.getAttributeLocalName(i).equals("target")) {
|
|
| 168 |
target = parser.getAttributeValue(i); |
|
| 169 |
break; |
|
| 170 |
} |
|
| 171 |
} |
|
| 172 |
|
|
| 173 |
String[] split = target.split(" ");
|
|
| 174 |
links.put(split[0].substring(4), split[1].substring(4)); |
|
| 178 |
|
|
| 179 |
@Override |
|
| 180 |
public void processStartElement() {
|
|
| 181 |
if (localname.equals("linkGrp")) {
|
|
| 182 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 183 |
if (parser.getAttributeLocalName(i).equals("type")) {
|
|
| 184 |
group = parser.getAttributeValue(i); |
|
| 185 |
break; |
|
| 175 | 186 |
} |
| 176 | 187 |
} |
| 177 |
}; |
|
| 188 |
} |
|
| 189 |
else if (localname.equals("link") && group.startsWith(xmlType)) {
|
|
| 190 |
String target = ""; |
|
| 191 |
|
|
| 192 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 193 |
if (parser.getAttributeLocalName(i).equals("target")) {
|
|
| 194 |
target = parser.getAttributeValue(i); |
|
| 195 |
break; |
|
| 196 |
} |
|
| 197 |
} |
|
| 198 |
|
|
| 199 |
String[] split = target.split(" ");
|
|
| 200 |
links.put(split[0].substring(4), split[1].substring(4)); |
|
| 201 |
} |
|
| 202 |
} |
|
| 203 |
}; |
|
| 178 | 204 |
pLinks.process(); |
| 179 | 205 |
} |
| 180 |
//println "zones size: "+zones.size() |
|
| 181 |
//println "links size: "+links.size() |
|
| 182 |
|
|
| 206 |
// println "zones size: "+zones.size()
|
|
| 207 |
// println "links size: "+links.size()
|
|
| 208 |
|
|
| 183 | 209 |
current_img_file = img_file_name; |
| 184 | 210 |
} |
| 185 |
|
|
| 211 |
|
|
| 186 | 212 |
public static void main(String[] args) {
|
| 187 | 213 |
File corpusDirectory = new File("/home/mdecorde/TEMP/testori/FontenatTestAlignement");
|
| 188 | 214 |
File xmlFile = new File(corpusDirectory, "txm/FontenayTest-w/FontenayTest-w.xml"); |
| 189 | 215 |
File img_links_directory = new File(corpusDirectory, "img_links"); |
| 190 | 216 |
File zones_directory = new File(corpusDirectory, "zones"); |
| 191 |
|
|
| 217 |
|
|
| 192 | 218 |
File outputFile = new File(corpusDirectory, "txm/FontenayTest-w/FontenayTest-w-coords2.xml"); |
| 193 |
|
|
| 219 |
|
|
| 194 | 220 |
CoordsProjection cp; |
| 195 | 221 |
try {
|
| 196 | 222 |
cp = new CoordsProjection(xmlFile, img_links_directory, zones_directory, "w"); |
| 197 | 223 |
System.out.println(cp.process(outputFile)); |
| 198 |
} catch (IOException e) {
|
|
| 224 |
} |
|
| 225 |
catch (IOException e) {
|
|
| 199 | 226 |
// TODO Auto-generated catch block |
| 200 | 227 |
e.printStackTrace(); |
| 201 |
} catch (XMLStreamException e) {
|
|
| 228 |
} |
|
| 229 |
catch (XMLStreamException e) {
|
|
| 202 | 230 |
// TODO Auto-generated catch block |
| 203 | 231 |
e.printStackTrace(); |
| 204 | 232 |
} |
| tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/Project2XTZ.java (revision 2984) | ||
|---|---|---|
| 14 | 14 |
import org.txm.utils.AsciiUtils; |
| 15 | 15 |
import org.txm.utils.BundleUtils; |
| 16 | 16 |
import org.txm.utils.DeleteDir; |
| 17 |
import org.txm.utils.FileUtils; |
|
| 17 | 18 |
import org.txm.utils.io.FileCopy; |
| 18 | 19 |
import org.txm.utils.io.IOUtils; |
| 19 | 20 |
import org.txm.utils.xml.UpdateXSLParameters; |
| 20 | 21 |
import org.xml.sax.SAXException; |
| 21 | 22 |
|
| 22 | 23 |
public class Project2XTZ {
|
| 24 |
|
|
| 23 | 25 |
File projectDirectory; |
| 26 |
|
|
| 24 | 27 |
public Project2XTZ(File projectDirectory) {
|
| 25 | 28 |
this.projectDirectory = projectDirectory; |
| 26 | 29 |
} |
| 27 |
|
|
| 30 |
|
|
| 28 | 31 |
public boolean process() throws IOException, TransformerException, ParserConfigurationException, SAXException, XMLStreamException {
|
| 29 | 32 |
File oriflammsMacroDirectory = new File(BundleUtils.getBundleFile("org.txm.oriflamms.rcp"), "res");
|
| 30 |
System.out.println("Ressources files directory: "+oriflammsMacroDirectory);
|
|
| 33 |
System.out.println("Ressources files directory: " + oriflammsMacroDirectory);
|
|
| 31 | 34 |
if (!oriflammsMacroDirectory.exists()) {
|
| 32 |
System.out.println("Oriflamms macro directory not found: "+oriflammsMacroDirectory+". Aborting");
|
|
| 35 |
System.out.println("Oriflamms macro directory not found: " + oriflammsMacroDirectory + ". Aborting");
|
|
| 33 | 36 |
return false; |
| 34 | 37 |
} |
| 35 | 38 |
File wFrontXSLFile = new File(oriflammsMacroDirectory, "txm-front-teioriflammsw-xtz.xsl"); |
| 36 | 39 |
if (!wFrontXSLFile.exists()) {
|
| 37 |
System.out.println("Oriflamms to XML-XTZ front XSL file is missing: "+wFrontXSLFile+". Aborting");
|
|
| 40 |
System.out.println("Oriflamms to XML-XTZ front XSL file is missing: " + wFrontXSLFile + ". Aborting");
|
|
| 38 | 41 |
return false; |
| 39 | 42 |
} |
| 40 | 43 |
File cFrontXSLFile = new File(oriflammsMacroDirectory, "txm-front-teioriflammsc-xtz.xsl"); |
| 41 | 44 |
if (!cFrontXSLFile.exists()) {
|
| 42 |
System.out.println("Oriflamms to XML-XTZ front XSL file is missing: "+cFrontXSLFile+". Aborting");
|
|
| 45 |
System.out.println("Oriflamms to XML-XTZ front XSL file is missing: " + cFrontXSLFile + ". Aborting");
|
|
| 43 | 46 |
return false; |
| 44 | 47 |
} |
| 45 |
|
|
| 48 |
|
|
| 46 | 49 |
File cSplitXSLFile = new File(oriflammsMacroDirectory, "1-oriflamms-split-surfaces.xsl"); |
| 47 | 50 |
if (!cSplitXSLFile.exists()) {
|
| 48 |
System.out.println("Oriflamms to XML-XTZ split XSL file is missing: "+cSplitXSLFile+". Aborting");
|
|
| 51 |
System.out.println("Oriflamms to XML-XTZ split XSL file is missing: " + cSplitXSLFile + ". Aborting");
|
|
| 49 | 52 |
return false; |
| 50 | 53 |
} |
| 51 |
|
|
| 54 |
|
|
| 52 | 55 |
File editionXSLFile1 = new File(oriflammsMacroDirectory, "1-default-html.xsl"); |
| 53 | 56 |
if (!editionXSLFile1.exists()) {
|
| 54 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile1+".");
|
|
| 57 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: " + editionXSLFile1 + ".");
|
|
| 55 | 58 |
return false; |
| 56 | 59 |
} |
| 57 | 60 |
File editionXSLFile2 = new File(oriflammsMacroDirectory, "2-default-pager.xsl"); |
| 58 | 61 |
if (!editionXSLFile2.exists()) {
|
| 59 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile2+".");
|
|
| 62 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: " + editionXSLFile2 + ".");
|
|
| 60 | 63 |
return false; |
| 61 | 64 |
} |
| 62 | 65 |
File editionXSLFile3 = new File(oriflammsMacroDirectory, "3-facsimile-pager.xsl"); |
| 63 | 66 |
if (!editionXSLFile3.exists()) {
|
| 64 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: "+editionXSLFile3+".");
|
|
| 67 |
System.out.println("Oriflamms to XML-XTZ edition XSL file is missing: " + editionXSLFile3 + ".");
|
|
| 65 | 68 |
return false; |
| 66 | 69 |
} |
| 67 | 70 |
File cssDirectory = new File(oriflammsMacroDirectory, "css"); |
| 68 | 71 |
if (!cssDirectory.exists()) {
|
| 69 |
System.out.println("Oriflamms css directory is missing: "+cssDirectory+".");
|
|
| 72 |
System.out.println("Oriflamms css directory is missing: " + cssDirectory + ".");
|
|
| 70 | 73 |
return false; |
| 71 | 74 |
} |
| 72 | 75 |
File jsDirectory = new File(oriflammsMacroDirectory, "js"); |
| 73 | 76 |
if (!jsDirectory.exists()) {
|
| 74 |
System.out.println("Oriflamms js directory is missing: "+jsDirectory+".");
|
|
| 77 |
System.out.println("Oriflamms js directory is missing: " + jsDirectory + ".");
|
|
| 75 | 78 |
return false; |
| 76 | 79 |
} |
| 77 | 80 |
File imagesDirectory = new File(oriflammsMacroDirectory, "images"); |
| 78 | 81 |
if (!imagesDirectory.exists()) {
|
| 79 |
System.out.println("Oriflamms images directory is missing: "+imagesDirectory+".");
|
|
| 82 |
System.out.println("Oriflamms images directory is missing: " + imagesDirectory + ".");
|
|
| 80 | 83 |
return false; |
| 81 | 84 |
} |
| 82 |
|
|
| 85 |
|
|
| 83 | 86 |
File textDirectory = new File(projectDirectory, "texts"); |
| 84 |
|
|
| 87 |
|
|
| 85 | 88 |
File txmDirectory = new File(projectDirectory, "txm"); |
| 86 | 89 |
if (txmDirectory.exists()) DeleteDir.deleteDirectory(txmDirectory); |
| 87 | 90 |
txmDirectory.mkdir(); |
| 88 | 91 |
if (!txmDirectory.exists()) {
|
| 89 |
System.out.println("Error: the 'txm' directory could not be created: "+txmDirectory+". Aborting.");
|
|
| 92 |
System.out.println("Error: the 'txm' directory could not be created: " + txmDirectory + ". Aborting.");
|
|
| 90 | 93 |
return false; |
| 91 | 94 |
} |
| 92 |
|
|
| 95 |
|
|
| 93 | 96 |
File wDirectory = null; |
| 94 | 97 |
File cDirectory = null; |
| 95 | 98 |
File wFile = null; |
| 96 | 99 |
File cFile = null; |
| 97 |
|
|
| 100 |
|
|
| 98 | 101 |
File[] xmlFiles = textDirectory.listFiles(IOUtils.HIDDENFILE_FILTER); |
| 99 | 102 |
if (xmlFiles == null) return false; |
| 100 |
|
|
| 103 |
|
|
| 101 | 104 |
for (File xmlFile : xmlFiles) {
|
| 102 | 105 |
if (xmlFile.getName().endsWith("-w.xml")) {
|
| 103 |
String name = xmlFile.getName().substring(0, xmlFile.getName().lastIndexOf(".xml"));
|
|
| 106 |
String name = FileUtils.stripExtension(xmlFile);
|
|
| 104 | 107 |
wDirectory = new File(txmDirectory, name); |
| 105 | 108 |
wFile = xmlFile; |
| 106 |
} else if (xmlFile.getName().endsWith("-c.xml")) {
|
|
| 107 |
String name = xmlFile.getName().substring(0, xmlFile.getName().lastIndexOf(".xml"));
|
|
| 109 |
} |
|
| 110 |
else if (xmlFile.getName().endsWith("-c.xml")) {
|
|
| 111 |
String name = FileUtils.stripExtension(xmlFile); |
|
| 108 | 112 |
cDirectory = new File(txmDirectory, name); |
| 109 | 113 |
cFile = xmlFile; |
| 110 | 114 |
} |
| 111 | 115 |
} |
| 112 |
|
|
| 116 |
|
|
| 113 | 117 |
if (wDirectory == null) {
|
| 114 |
System.out.println("The Word corpus XML file was not found in "+textDirectory+". Aborting.");
|
|
| 118 |
System.out.println("The Word corpus XML file was not found in " + textDirectory + ". Aborting.");
|
|
| 115 | 119 |
return false; |
| 116 | 120 |
} |
| 117 | 121 |
if (cDirectory == null) {
|
| 118 |
System.out.println("The Letter corpus XML file was not found in "+textDirectory+". Aborting.");
|
|
| 122 |
System.out.println("The Letter corpus XML file was not found in " + textDirectory + ". Aborting.");
|
|
| 119 | 123 |
return false; |
| 120 | 124 |
} |
| 121 |
|
|
| 122 |
//Create XML-XTZ source directories |
|
| 125 |
|
|
| 126 |
// Create XML-XTZ source directories
|
|
| 123 | 127 |
wDirectory.mkdirs(); |
| 124 | 128 |
cDirectory.mkdirs(); |
| 125 |
|
|
| 129 |
|
|
| 126 | 130 |
// Copy XML files and split character XML file |
| 127 | 131 |
FileCopy.copy(wFile, new File(wDirectory, wFile.getName())); |
| 128 |
|
|
| 132 |
|
|
| 129 | 133 |
ApplyXsl2 builder = new ApplyXsl2(cSplitXSLFile); |
| 130 |
HashMap<String, String> xslParams = new HashMap<String, String>();
|
|
| 134 |
HashMap<String, String> xslParams = new HashMap<>(); |
|
| 131 | 135 |
xslParams.put("output-directory", cDirectory.getAbsoluteFile().toURI().toString());
|
| 132 |
for (String name : xslParams.keySet()) builder.setParam(name, xslParams.get(name)); |
|
| 136 |
for (String name : xslParams.keySet()) |
|
| 137 |
builder.setParam(name, xslParams.get(name)); |
|
| 133 | 138 |
if (!builder.process(cFile, null)) {
|
| 134 |
System.out.println("Error: fail to split "+cFile);
|
|
| 139 |
System.out.println("Error: fail to split " + cFile);
|
|
| 135 | 140 |
return false; |
| 136 | 141 |
} |
| 137 | 142 |
if (!ApplyXsl2.processImportSources(cFrontXSLFile, ApplyXsl2.listFiles(cDirectory), new HashMap<String, Object>())) {
|
| 138 |
System.out.println("Error: fail to apply front XSL with "+cDirectory+" files");
|
|
| 143 |
System.out.println("Error: fail to apply front XSL with " + cDirectory + " files");
|
|
| 139 | 144 |
return false; |
| 140 | 145 |
} |
| 141 |
// INJECT ontologies
|
|
| 146 |
// INJECT ontologies |
|
| 142 | 147 |
System.out.println("Injecting ontologies...");
|
| 143 | 148 |
for (File f : cDirectory.listFiles(IOUtils.HIDDENFILE_FILTER)) {
|
| 144 | 149 |
if (f.getName().startsWith(cDirectory.getName())) {
|
| ... | ... | |
| 146 | 151 |
File outputFile = new File(cDirectory, "temp.xml"); |
| 147 | 152 |
cp.process(outputFile); |
| 148 | 153 |
if (outputFile.exists() && f.delete() && outputFile.renameTo(f)) {
|
| 149 |
|
|
| 150 |
} else {
|
|
| 151 |
System.out.println("Failed to replace XML file "+f+" with "+outputFile);
|
|
| 154 |
|
|
| 155 |
} |
|
| 156 |
else {
|
|
| 157 |
System.out.println("Failed to replace XML file " + f + " with " + outputFile);
|
|
| 152 | 158 |
return false; |
| 153 | 159 |
} |
| 154 | 160 |
} |
| 155 | 161 |
} |
| 156 |
|
|
| 162 |
|
|
| 157 | 163 |
// INJECT word's coordinates |
| 158 | 164 |
System.out.println("Injecting coordinates...");
|
| 159 | 165 |
File xmlFile = new File(wDirectory, wFile.getName()); |
| ... | ... | |
| 163 | 169 |
CoordsProjection cp = new CoordsProjection(xmlFile, img_links_directory, zones_directory, "w"); |
| 164 | 170 |
if (cp.process(outputFile)) {
|
| 165 | 171 |
if (outputFile.exists() && xmlFile.delete() && outputFile.renameTo(xmlFile)) {
|
| 166 |
|
|
| 167 |
} else {
|
|
| 168 |
System.out.println("Failed to replace XML file "+xmlFile+" with "+outputFile);
|
|
| 172 |
|
|
| 173 |
} |
|
| 174 |
else {
|
|
| 175 |
System.out.println("Failed to replace XML file " + xmlFile + " with " + outputFile);
|
|
| 169 | 176 |
return false; |
| 170 | 177 |
} |
| 171 |
} else {
|
|
| 178 |
} |
|
| 179 |
else {
|
|
| 172 | 180 |
System.out.println("Coordinates injection failed. Aborting");
|
| 173 | 181 |
return false; |
| 174 | 182 |
} |
| 175 |
|
|
| 183 |
|
|
| 176 | 184 |
// Create XSL directories |
| 177 |
|
|
| 185 |
|
|
| 178 | 186 |
File wXSLDirectory = new File(wDirectory, "xsl"); |
| 179 | 187 |
File cXSLDirectory = new File(cDirectory, "xsl"); |
| 180 |
|
|
| 181 |
//File cSplitXSLDirectory = new File(cXSLDirectory, "1-split-merge") |
|
| 182 |
//cSplitXSLDirectory.mkdirs() |
|
| 183 |
|
|
| 188 |
|
|
| 189 |
// File cSplitXSLDirectory = new File(cXSLDirectory, "1-split-merge")
|
|
| 190 |
// cSplitXSLDirectory.mkdirs()
|
|
| 191 |
|
|
| 184 | 192 |
File wFrontXSLDirectory = new File(wXSLDirectory, "2-front"); |
| 185 |
//File cFrontXSLDirectory = new File(cXSLDirectory, "2-front") |
|
| 193 |
// File cFrontXSLDirectory = new File(cXSLDirectory, "2-front")
|
|
| 186 | 194 |
wFrontXSLDirectory.mkdirs(); |
| 187 |
//cFrontXSLDirectory.mkdirs() |
|
| 188 |
|
|
| 195 |
// cFrontXSLDirectory.mkdirs()
|
|
| 196 |
|
|
| 189 | 197 |
// Copy Split XSL file |
| 190 |
//File newCSplitXSLFile = new File(cSplitXSLDirectory, cSplitXSLFile.getName()) |
|
| 191 |
//FileCopy.copy(cSplitXSLFile, newCSplitXSLFile); |
|
| 192 |
|
|
| 198 |
// File newCSplitXSLFile = new File(cSplitXSLDirectory, cSplitXSLFile.getName())
|
|
| 199 |
// FileCopy.copy(cSplitXSLFile, newCSplitXSLFile);
|
|
| 200 |
|
|
| 193 | 201 |
// Copy Front XSL file |
| 194 | 202 |
File newWFrontXSLFile = new File(wFrontXSLDirectory, wFrontXSLFile.getName()); |
| 195 |
//File newCFrontXSLFile = new File(cFrontXSLDirectory, cFrontXSLFile.getName()) |
|
| 203 |
// File newCFrontXSLFile = new File(cFrontXSLDirectory, cFrontXSLFile.getName())
|
|
| 196 | 204 |
FileCopy.copy(wFrontXSLFile, newWFrontXSLFile); |
| 197 |
//FileCopy.copy(cFrontXSLFile, newCFrontXSLFile); |
|
| 198 |
|
|
| 205 |
// FileCopy.copy(cFrontXSLFile, newCFrontXSLFile);
|
|
| 206 |
|
|
| 199 | 207 |
// Copy edition XSL file |
| 200 | 208 |
File wEditionXSLDirectory = new File(wXSLDirectory, "4-edition"); |
| 201 | 209 |
File cEditionXSLDirectory = new File(cXSLDirectory, "4-edition"); |
| ... | ... | |
| 213 | 221 |
File newCEditionXSLFile3 = new File(cEditionXSLDirectory, editionXSLFile3.getName()); |
| 214 | 222 |
FileCopy.copy(editionXSLFile3, newWEditionXSLFile3); |
| 215 | 223 |
FileCopy.copy(editionXSLFile3, newCEditionXSLFile3); |
| 216 |
|
|
| 217 |
//patch XSL files with image directory path and set the 'word-element' xsl param |
|
| 224 |
|
|
| 225 |
// patch XSL files with image directory path and set the 'word-element' xsl param
|
|
| 218 | 226 |
File projectImgDirectory = new File(projectDirectory, "img"); |
| 219 |
HashMap<String, String> parameters = new HashMap<String, String>();
|
|
| 227 |
HashMap<String, String> parameters = new HashMap<>(); |
|
| 220 | 228 |
parameters.put("image-directory", projectImgDirectory.getAbsolutePath());
|
| 221 | 229 |
parameters.put("word-element", "w");
|
| 222 |
System.out.println("update "+newWEditionXSLFile3+" with "+parameters);
|
|
| 230 |
System.out.println("update " + newWEditionXSLFile3 + " with " + parameters);
|
|
| 223 | 231 |
UpdateXSLParameters p = new UpdateXSLParameters(newWEditionXSLFile3); |
| 224 | 232 |
if (!p.process(parameters)) {
|
| 225 |
System.out.println("Fail to patch "+newWEditionXSLFile3);
|
|
| 233 |
System.out.println("Fail to patch " + newWEditionXSLFile3);
|
|
| 226 | 234 |
return false; |
| 227 | 235 |
} |
| 228 |
parameters = new HashMap<String, String>();
|
|
| 236 |
parameters = new HashMap<>(); |
|
| 229 | 237 |
parameters.put("image-directory", projectImgDirectory.getAbsolutePath());
|
| 230 | 238 |
parameters.put("word-element", "c");
|
| 231 |
System.out.println("update "+newCEditionXSLFile3+" with "+parameters);
|
|
| 239 |
System.out.println("update " + newCEditionXSLFile3 + " with " + parameters);
|
|
| 232 | 240 |
UpdateXSLParameters p2 = new UpdateXSLParameters(newCEditionXSLFile3); |
| 233 | 241 |
if (!p2.process(parameters)) {
|
| 234 |
System.out.println("Fail to patch "+newCEditionXSLFile3);
|
|
| 242 |
System.out.println("Fail to patch " + newCEditionXSLFile3);
|
|
| 235 | 243 |
return false; |
| 236 | 244 |
} |
| 237 |
|
|
| 245 |
|
|
| 238 | 246 |
// Copy js and images directories |
| 239 |
File wCSSDirectory = new File(wDirectory, cssDirectory.getName());
|
|
| 247 |
File wCSSDirectory = new File(wDirectory, cssDirectory.getName()); |
|
| 240 | 248 |
wCSSDirectory.mkdir(); |
| 241 |
File wJsDirectory = new File(wDirectory, jsDirectory.getName());
|
|
| 249 |
File wJsDirectory = new File(wDirectory, jsDirectory.getName()); |
|
| 242 | 250 |
wJsDirectory.mkdir(); |
| 243 |
File wImagesDirectory = new File(wDirectory, imagesDirectory.getName());
|
|
| 251 |
File wImagesDirectory = new File(wDirectory, imagesDirectory.getName()); |
|
| 244 | 252 |
wImagesDirectory.mkdir(); |
| 245 |
File cCSSDirectory = new File(cDirectory, cssDirectory.getName());
|
|
| 253 |
File cCSSDirectory = new File(cDirectory, cssDirectory.getName()); |
|
| 246 | 254 |
cCSSDirectory.mkdir(); |
| 247 |
File cJsDirectory = new File(cDirectory, jsDirectory.getName());
|
|
| 255 |
File cJsDirectory = new File(cDirectory, jsDirectory.getName()); |
|
| 248 | 256 |
cJsDirectory.mkdir(); |
| 249 |
File cImagesDirectory = new File(cDirectory, imagesDirectory.getName());
|
|
| 257 |
File cImagesDirectory = new File(cDirectory, imagesDirectory.getName()); |
|
| 250 | 258 |
cImagesDirectory.mkdir(); |
| 251 | 259 |
FileCopy.copyFiles(cssDirectory, wCSSDirectory); |
| 252 | 260 |
FileCopy.copyFiles(jsDirectory, wJsDirectory); |
| ... | ... | |
| 254 | 262 |
FileCopy.copyFiles(cssDirectory, cCSSDirectory); |
| 255 | 263 |
FileCopy.copyFiles(jsDirectory, cJsDirectory); |
| 256 | 264 |
FileCopy.copyFiles(imagesDirectory, cImagesDirectory); |
| 257 |
|
|
| 265 |
|
|
| 258 | 266 |
// Prepare import.xml files |
| 259 | 267 |
File wImportXMLFile = new File(wDirectory, "import.xml"); |
| 260 | 268 |
File cImportXMLFile = new File(cDirectory, "import.xml"); |
| 261 |
|
|
| 269 |
|
|
| 262 | 270 |
BaseOldParameters.createEmptyParams(wImportXMLFile, AsciiUtils.buildId(wDirectory.getName()).toUpperCase()); |
| 263 | 271 |
BaseOldParameters wParams = new BaseOldParameters(wImportXMLFile); |
| 264 | 272 |
wParams.load(); |
| ... | ... | |
| 267 | 275 |
wParams.setDoAnnotation(false); |
| 268 | 276 |
wParams.setAnnotationLang("fr");
|
| 269 | 277 |
wParams.setWordsPerPage(9999999); |
| 270 |
wParams.setTextualPlans("", "note", "teiHeader,facsimile","pb,cb,lb");
|
|
| 278 |
wParams.setTextualPlans("", "note", "teiHeader,facsimile", "pb,cb,lb");
|
|
| 271 | 279 |
wParams.getCorpusElement().setAttribute("font", "Junicode");
|
| 272 | 280 |
wParams.getEditionsElement(wParams.getCorpusElement()).setAttribute("default", "default,facsimile");
|
| 273 | 281 |
wParams.getCorpusElement().setAttribute("name", AsciiUtils.buildId(wDirectory.getName()).toUpperCase());
|
| 274 |
|
|
| 275 |
|
|
| 282 |
|
|
| 283 |
|
|
| 276 | 284 |
BaseOldParameters.createEmptyParams(cImportXMLFile, AsciiUtils.buildId(cDirectory.getName()).toUpperCase()); |
| 277 | 285 |
BaseOldParameters cParams = new BaseOldParameters(cImportXMLFile); |
| 278 | 286 |
cParams.load(); |
| ... | ... | |
| 281 | 289 |
cParams.setDoAnnotation(false); |
| 282 | 290 |
cParams.setAnnotationLang("fr");
|
| 283 | 291 |
cParams.setWordsPerPage(9999999); |
| 284 |
cParams.setTextualPlans("", "note", "teiHeader,facsimile","pb,cb,lb");
|
|
| 292 |
cParams.setTextualPlans("", "note", "teiHeader,facsimile", "pb,cb,lb");
|
|
| 285 | 293 |
cParams.getCorpusElement().setAttribute("font", "Junicode");
|
| 286 | 294 |
cParams.getEditionsElement(cParams.getCorpusElement()).setAttribute("default", "default,facsimile");
|
| 287 | 295 |
cParams.getCorpusElement().setAttribute("name", AsciiUtils.buildId(cDirectory.getName()).toUpperCase());
|
| 288 |
|
|
| 296 |
|
|
| 289 | 297 |
return cParams.save() && wParams.save(); |
| 290 | 298 |
} |
| 291 |
} |
|
| 299 |
} |
|
| tmp/org.txm.oriflamms.rcp/src/org/txm/oriflamms/functions/OntologiesProjection.java (revision 2984) | ||
|---|---|---|
| 13 | 13 |
|
| 14 | 14 |
import org.txm.importer.StaxIdentityParser; |
| 15 | 15 |
import org.txm.scripts.importer.StaxParser; |
| 16 |
import org.txm.utils.FileUtils; |
|
| 16 | 17 |
|
| 17 | 18 |
class OntologiesProjection extends StaxIdentityParser {
|
| 18 |
|
|
| 19 |
|
|
| 19 | 20 |
File xmlFile; |
| 20 |
|
|
| 21 |
|
|
| 21 | 22 |
String wordTag; |
| 23 |
|
|
| 22 | 24 |
String textname; |
| 25 |
|
|
| 23 | 26 |
String milestone; |
| 27 |
|
|
| 24 | 28 |
String group; |
| 25 |
|
|
| 26 |
HashMap<String, String[]> links = new HashMap<String, String[]>(); |
|
| 27 |
HashMap<String, List<Serializable>> prefixDefsPatterns = new HashMap<String, List<Serializable>>(); |
|
| 28 |
HashMap<String, HashMap<String, HashMap<String, String>>> ggly_ontologies = new HashMap<String, HashMap<String, HashMap<String, String>>>(); |
|
| 29 |
HashMap<String, HashMap<String, String>> ggly_ontologies_unicodechars = new HashMap<String, HashMap<String, String>>(); |
|
| 30 |
HashMap<String, HashMap> lgly_ontologies = new HashMap<String, HashMap>(); |
|
| 31 |
|
|
| 29 |
|
|
| 30 |
HashMap<String, String[]> links = new HashMap<>(); |
|
| 31 |
|
|
| 32 |
HashMap<String, List<Serializable>> prefixDefsPatterns = new HashMap<>(); |
|
| 33 |
|
|
| 34 |
HashMap<String, HashMap<String, HashMap<String, String>>> ggly_ontologies = new HashMap<>(); |
|
| 35 |
|
|
| 36 |
HashMap<String, HashMap<String, String>> ggly_ontologies_unicodechars = new HashMap<>(); |
|
| 37 |
|
|
| 38 |
HashMap<String, HashMap> lgly_ontologies = new HashMap<>(); |
|
| 39 |
|
|
| 32 | 40 |
String current_ontology_link_file_name = ""; |
| 33 |
|
|
| 41 |
|
|
| 34 | 42 |
File ontologies_links_directory; |
| 35 |
|
|
| 43 |
|
|
| 36 | 44 |
public OntologiesProjection(File xmlFile, File corpusDirectory) throws IOException, XMLStreamException {
|
| 37 | 45 |
super(xmlFile); |
| 38 |
|
|
| 46 |
|
|
| 39 | 47 |
this.xmlFile = xmlFile; |
| 40 | 48 |
this.ontologies_links_directory = new File(corpusDirectory, "ontologies_links"); |
| 41 |
|
|
| 42 |
textname = xmlFile.getName(); |
|
| 43 |
int idx = textname.lastIndexOf(".xml");
|
|
| 44 |
if (idx > 0) textname = textname.substring(0, idx); |
|
| 49 |
|
|
| 50 |
textname = FileUtils.stripExtension(xmlFile); |
|
| 45 | 51 |
textname = textname.replaceAll("-c", "");
|
| 46 |
|
|
| 52 |
|
|
| 47 | 53 |
this.wordTag = "c"; |
| 48 | 54 |
} |
| 49 |
|
|
| 55 |
|
|
| 50 | 56 |
public boolean buildGGlyOntology(String prefix) {
|
| 51 |
String path = (String)prefixDefsPatterns.get(prefix).get(1); |
|
| 57 |
String path = (String) prefixDefsPatterns.get(prefix).get(1);
|
|
| 52 | 58 |
int idx = path.indexOf("#");
|
| 53 | 59 |
if (idx > 0) path = path.substring(0, idx); |
| 54 |
|
|
| 55 |
File ggly_ontology_file = new File(xmlFile.getParentFile(), "../"+path);
|
|
| 60 |
|
|
| 61 |
File ggly_ontology_file = new File(xmlFile.getParentFile(), "../" + path);
|
|
| 56 | 62 |
if (!ggly_ontology_file.exists()) {
|
| 57 |
System.out.println("WARNING: cannot found global ontology file: "+ggly_ontology_file);
|
|
| 63 |
System.out.println("WARNING: cannot found global ontology file: " + ggly_ontology_file);
|
|
| 58 | 64 |
return false; |
| 59 | 65 |
} |
| 60 |
final HashMap<String, HashMap<String, String>> global_ontologies = new HashMap<String, HashMap<String, String>>();
|
|
| 61 |
final HashMap<String, String> unicode_global_ontologies = new HashMap<String, String>();
|
|
| 66 |
final HashMap<String, HashMap<String, String>> global_ontologies = new HashMap<>(); |
|
| 67 |
final HashMap<String, String> unicode_global_ontologies = new HashMap<>(); |
|
| 62 | 68 |
StaxParser pontologies = new StaxParser(ggly_ontology_file) {
|
| 69 |
|
|
| 63 | 70 |
boolean startChar = false, startLocalName = false, startValue = false, startMapping = false; |
| 71 |
|
|
| 64 | 72 |
String unicodeChar, standardizedChar, subtype, type; |
| 73 |
|
|
| 65 | 74 |
String id, charLocalName, charValue; |
| 75 |
|
|
| 66 | 76 |
StringBuilder c = new StringBuilder(); |
| 67 |
|
|
| 77 |
|
|
| 78 |
@Override |
|
| 68 | 79 |
public void processStartElement() {
|
| 69 | 80 |
if (localname.equals("char")) {
|
| 70 | 81 |
// get id |
| 71 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 82 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 72 | 83 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
| 73 | 84 |
id = parser.getAttributeValue(i); |
| 74 | 85 |
break; |
| ... | ... | |
| 76 | 87 |
} |
| 77 | 88 |
startChar = true; |
| 78 | 89 |
c.setLength(0); |
| 79 |
} else if (localname.equals("mapping")) {
|
|
| 90 |
} |
|
| 91 |
else if (localname.equals("mapping")) {
|
|
| 80 | 92 |
subtype = ""; |
| 81 | 93 |
type = ""; |
| 82 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 94 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 83 | 95 |
if (parser.getAttributeLocalName(i).equals("subtype")) {
|
| 84 | 96 |
subtype = parser.getAttributeValue(i); |
| 85 |
} else if (parser.getAttributeLocalName(i).equals("type")) {
|
|
| 97 |
} |
|
| 98 |
else if (parser.getAttributeLocalName(i).equals("type")) {
|
|
| 86 | 99 |
type = parser.getAttributeValue(i); |
| 87 | 100 |
} |
| 88 | 101 |
} |
| 89 | 102 |
startMapping = true; |
| 90 | 103 |
c.setLength(0); |
| 91 |
} else if (localname.equals("localName")) {
|
|
| 104 |
} |
|
| 105 |
else if (localname.equals("localName")) {
|
|
| 92 | 106 |
startLocalName = true; |
| 93 | 107 |
c.setLength(0); |
| 94 |
} else if (localname.equals("value")) {
|
|
| 108 |
} |
|
| 109 |
else if (localname.equals("value")) {
|
|
| 95 | 110 |
startLocalName = true; |
| 96 | 111 |
c.setLength(0); |
| 97 | 112 |
} |
| 98 | 113 |
} |
| 99 |
|
|
| 114 |
|
|
| 115 |
@Override |
|
| 100 | 116 |
public void processCharacters() {
|
| 101 | 117 |
if (startMapping) c.append(parser.getText()); |
| 102 | 118 |
else if (startLocalName) c.append(parser.getText()); |
| 103 | 119 |
else if (startValue) c.append(parser.getText()); |
| 104 | 120 |
} |
| 105 |
|
|
| 121 |
|
|
| 122 |
@Override |
|
| 106 | 123 |
public void processEndElement() {
|
| 107 | 124 |
if (localname.equals("char")) {
|
| 108 | 125 |
startChar = false; |
| 109 |
HashMap<String, String> h = new HashMap<String, String>();
|
|
| 110 |
h.put("standard",standardizedChar);
|
|
| 126 |
HashMap<String, String> h = new HashMap<>(); |
|
| 127 |
h.put("standard", standardizedChar);
|
|
| 111 | 128 |
h.put("unicode", unicodeChar);
|
| 112 |
h.put("value",charValue);
|
|
| 113 |
h.put("localname",charLocalName);
|
|
| 129 |
h.put("value", charValue);
|
|
| 130 |
h.put("localname", charLocalName);
|
|
| 114 | 131 |
global_ontologies.put(id, h); |
| 115 | 132 |
unicode_global_ontologies.put(unicodeChar, standardizedChar); |
| 116 |
} else if (localname.equals("mapping")) {
|
|
| 133 |
} |
|
| 134 |
else if (localname.equals("mapping")) {
|
|
| 117 | 135 |
if (subtype.equals("Unicode")) {
|
| 118 | 136 |
unicodeChar = c.toString().trim(); |
| 119 |
} else if (type.equals("standardized")) {
|
|
| 137 |
} |
|
| 138 |
else if (type.equals("standardized")) {
|
|
| 120 | 139 |
standardizedChar = c.toString().trim(); |
| 121 | 140 |
} |
| 122 | 141 |
startMapping = false; |
| 123 |
} else if (localname.equals("localName")) {
|
|
| 142 |
} |
|
| 143 |
else if (localname.equals("localName")) {
|
|
| 124 | 144 |
charLocalName = c.toString().trim(); |
| 125 | 145 |
startLocalName = false; |
| 126 |
} else if (localname.equals("value")) {
|
|
| 146 |
} |
|
| 147 |
else if (localname.equals("value")) {
|
|
| 127 | 148 |
charValue = c.toString().trim(); |
| 128 | 149 |
startValue = false; |
| 129 | 150 |
} |
| 130 | 151 |
} |
| 131 |
};
|
|
| 152 |
}; |
|
| 132 | 153 |
pontologies.process(); |
| 133 | 154 |
ggly_ontologies.put(prefix, global_ontologies); |
| 134 | 155 |
ggly_ontologies_unicodechars.put(prefix, unicode_global_ontologies); |
| 135 |
//System.out.println(ggly_ontologies |
|
| 156 |
// System.out.println(ggly_ontologies
|
|
| 136 | 157 |
return true; |
| 137 | 158 |
} |
| 138 |
|
|
| 159 |
|
|
| 139 | 160 |
public boolean buildLGlyOntology(String prefix) {
|
| 140 |
String path = (String)prefixDefsPatterns.get(prefix).get(1); |
|
| 161 |
String path = (String) prefixDefsPatterns.get(prefix).get(1);
|
|
| 141 | 162 |
int idx = path.indexOf("#");
|
| 142 | 163 |
if (idx > 0) path = path.substring(0, idx); |
| 143 |
|
|
| 144 |
File lgly_ontology_file = new File(ontologies_links_directory, textname+"-ontolinks.xml"); // add "../" because we are in txm/<corpus>-c directory
|
|
| 164 |
|
|
| 165 |
File lgly_ontology_file = new File(ontologies_links_directory, textname + "-ontolinks.xml"); // add "../" because we are in txm/<corpus>-c directory
|
|
| 145 | 166 |
if (!lgly_ontology_file.exists()) {
|
| 146 |
System.out.println("WARNING: cannot find Local ontology file "+lgly_ontology_file);
|
|
| 167 |
System.out.println("WARNING: cannot find Local ontology file " + lgly_ontology_file);
|
|
| 147 | 168 |
return false; |
| 148 | 169 |
} |
| 149 |
|
|
| 150 |
final HashMap<String, HashMap> local_ontologies = new HashMap<String, HashMap>();
|
|
| 170 |
|
|
| 171 |
final HashMap<String, HashMap> local_ontologies = new HashMap<>(); |
|
| 151 | 172 |
StaxParser pontologies = new StaxParser(lgly_ontology_file) {
|
| 152 |
boolean startNote = false; |
|
| 153 |
String id, change, parent; |
|
| 154 |
StringBuilder c = new StringBuilder(); |
|
| 155 |
HashMap<String, String> glyph = new HashMap<String, String>(); |
|
| 156 |
|
|
| 157 |
public void processStartElement() {
|
|
| 158 |
if (localname.equals("glyph")) {
|
|
| 159 |
// get id |
|
| 160 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 161 |
change = ""; |
|
| 162 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
|
| 163 |
id = parser.getAttributeValue(i); |
|
| 164 |
} else if (parser.getAttributeLocalName(i).equals("change")) {
|
|
| 165 |
change = parser.getAttributeValue(i); |
|
| 166 |
} |
|
| 167 |
} |
|
| 168 |
glyph = new HashMap<String, String>(); |
|
| 169 |
glyph.put("change",change);
|
|
| 170 |
glyph.put("id", id); // new glyph
|
|
| 171 |
parent = null; |
|
| 172 |
} else if (localname.equals("note")) {
|
|
| 173 |
startNote = true; |
|
| 174 |
c.setLength(0); |
|
| 173 |
|
|
| 174 |
boolean startNote = false; |
|
| 175 |
|
|
| 176 |
String id, change, parent; |
|
| 177 |
|
|
| 178 |
StringBuilder c = new StringBuilder(); |
|
| 179 |
|
|
| 180 |
HashMap<String, String> glyph = new HashMap<>(); |
|
| 181 |
|
|
| 182 |
@Override |
|
| 183 |
public void processStartElement() {
|
|
| 184 |
if (localname.equals("glyph")) {
|
|
| 185 |
// get id |
|
| 186 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 187 |
change = ""; |
|
| 188 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
|
| 189 |
id = parser.getAttributeValue(i); |
|
| 175 | 190 |
} |
| 176 |
} |
|
| 177 |
|
|
| 178 |
public void processCharacters() {
|
|
| 179 |
if (startNote) c.append(parser.getText()); |
|
| 180 |
} |
|
| 181 |
|
|
| 182 |
public void processEndElement() {
|
|
| 183 |
if (localname.equals("char")) {
|
|
| 184 |
if (parent != null) |
|
| 185 |
glyph.put("parent", glyph.get(parent));
|
|
| 186 |
local_ontologies.put(id, glyph); |
|
| 187 |
} else if (localname.equals("note")) {
|
|
| 188 |
parent = c.toString().trim(); |
|
| 189 |
startNote = false; |
|
| 191 |
else if (parser.getAttributeLocalName(i).equals("change")) {
|
|
| 192 |
change = parser.getAttributeValue(i); |
|
| 190 | 193 |
} |
| 191 | 194 |
} |
| 192 |
}; |
|
| 195 |
glyph = new HashMap<>(); |
|
| 196 |
glyph.put("change", change);
|
|
| 197 |
glyph.put("id", id); // new glyph
|
|
| 198 |
parent = null; |
|
| 199 |
} |
|
| 200 |
else if (localname.equals("note")) {
|
|
| 201 |
startNote = true; |
|
| 202 |
c.setLength(0); |
|
| 203 |
} |
|
| 204 |
} |
|
| 205 |
|
|
| 206 |
@Override |
|
| 207 |
public void processCharacters() {
|
|
| 208 |
if (startNote) c.append(parser.getText()); |
|
| 209 |
} |
|
| 210 |
|
|
| 211 |
@Override |
|
| 212 |
public void processEndElement() {
|
|
| 213 |
if (localname.equals("char")) {
|
|
| 214 |
if (parent != null) |
|
| 215 |
glyph.put("parent", glyph.get(parent));
|
|
| 216 |
local_ontologies.put(id, glyph); |
|
| 217 |
} |
|
| 218 |
else if (localname.equals("note")) {
|
|
| 219 |
parent = c.toString().trim(); |
|
| 220 |
startNote = false; |
|
| 221 |
} |
|
| 222 |
} |
|
| 223 |
}; |
|
| 193 | 224 |
pontologies.process(); |
| 194 | 225 |
lgly_ontologies.put(prefix, local_ontologies); |
| 195 |
|
|
| 226 |
|
|
| 196 | 227 |
return true; |
| 197 | 228 |
} |
| 198 |
|
|
| 229 |
|
|
| 199 | 230 |
public void loadOntologyLinkFile(String name) {
|
| 200 | 231 |
links = new HashMap(); |
| 201 | 232 |
prefixDefsPatterns = new HashMap(); |
| 202 | 233 |
prefixDefsPatterns.put("ggly", Arrays.asList(Pattern.compile("([a-z]+)"), "../../charDecl.xml#$1"));
|
| 203 |
prefixDefsPatterns.put("lgly", Arrays.asList(Pattern.compile("([a-z]+)"), "../ontologies/"+textname+".xml#$1"));
|
|
| 204 |
prefixDefsPatterns.put("txt", Arrays.asList(Pattern.compile("([a-z]+)"), "../texts/"+textname+".xml#$1"));
|
|
| 205 |
|
|
| 234 |
prefixDefsPatterns.put("lgly", Arrays.asList(Pattern.compile("([a-z]+)"), "../ontologies/" + textname + ".xml#$1"));
|
|
| 235 |
prefixDefsPatterns.put("txt", Arrays.asList(Pattern.compile("([a-z]+)"), "../texts/" + textname + ".xml#$1"));
|
|
| 236 |
|
|
| 206 | 237 |
lgly_ontologies = new HashMap(); |
| 207 | 238 |
ggly_ontologies = new HashMap(); |
| 208 | 239 |
File ontology_link_file = new File(ontologies_links_directory, name); |
| 209 | 240 |
if (!ontology_link_file.exists()) {
|
| 210 |
System.out.println("WARNING: no ontology link file: "+ontology_link_file);
|
|
| 241 |
System.out.println("WARNING: no ontology link file: " + ontology_link_file);
|
|
| 211 | 242 |
return; |
| 212 | 243 |
} |
| 213 |
|
|
| 244 |
|
|
| 214 | 245 |
StaxParser pLinks = new StaxParser(ontology_link_file) {
|
| 215 |
public void processStartElement() {
|
|
| 216 |
if (localname.equals("linkGrp")) {
|
|
| 217 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 218 |
if (parser.getAttributeLocalName(i).equals("type")) {
|
|
| 219 |
group = parser.getAttributeValue(i); |
|
| 220 |
break; |
|
| 221 |
} |
|
| 222 |
} |
|
| 223 |
} else if (localname.equals("prefixDef")) {
|
|
| 224 |
String ident = null, matchPattern = null, replacementPattern = null; |
|
| 225 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 226 |
if (parser.getAttributeLocalName(i).equals("ident")) {
|
|
| 227 |
ident = parser.getAttributeValue(i); |
|
| 228 |
} else if (parser.getAttributeLocalName(i).equals("matchPattern")) {
|
|
| 229 |
matchPattern = parser.getAttributeValue(i); |
|
| 230 |
} else if (parser.getAttributeLocalName(i).equals("replacementPattern")) {
|
|
| 231 |
replacementPattern = parser.getAttributeValue(i); |
|
| 232 |
} |
|
| 233 |
} |
|
| 234 |
if (ident != null && matchPattern != null && replacementPattern != null && !ident.equals("txt")) {
|
|
| 235 |
prefixDefsPatterns.put(ident, Arrays.asList(Pattern.compile(matchPattern), replacementPattern)); |
|
| 236 |
OntologiesProjection.this.getOntology(ident); |
|
| 237 |
} |
|
| 238 |
} else if (localname.equals("link")) {
|
|
| 239 |
String target = ""; |
|
| 240 |
|
|
| 241 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 242 |
if (parser.getAttributeLocalName(i).equals("target")) {
|
|
| 243 |
target = parser.getAttributeValue(i); |
|
| 244 |
break; |
|
| 245 |
} |
|
| 246 |
} |
|
| 247 |
|
|
| 248 |
String[] split = target.split(" ", 2); // first part word id next part are the ontologies id
|
|
| 249 |
links.put(split[0].substring(4), split[1].split(" "));
|
|
| 246 |
|
|
| 247 |
@Override |
|
| 248 |
public void processStartElement() {
|
|
| 249 |
if (localname.equals("linkGrp")) {
|
|
| 250 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 251 |
if (parser.getAttributeLocalName(i).equals("type")) {
|
|
| 252 |
group = parser.getAttributeValue(i); |
|
| 253 |
break; |
|
| 250 | 254 |
} |
| 251 | 255 |
} |
| 252 |
}; |
|
| 256 |
} |
|
| 257 |
else if (localname.equals("prefixDef")) {
|
|
| 258 |
String ident = null, matchPattern = null, replacementPattern = null; |
|
| 259 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 260 |
if (parser.getAttributeLocalName(i).equals("ident")) {
|
|
| 261 |
ident = parser.getAttributeValue(i); |
|
| 262 |
} |
|
| 263 |
else if (parser.getAttributeLocalName(i).equals("matchPattern")) {
|
|
| 264 |
matchPattern = parser.getAttributeValue(i); |
|
| 265 |
} |
|
| 266 |
else if (parser.getAttributeLocalName(i).equals("replacementPattern")) {
|
|
| 267 |
replacementPattern = parser.getAttributeValue(i); |
|
| 268 |
} |
|
| 269 |
} |
|
| 270 |
if (ident != null && matchPattern != null && replacementPattern != null && !ident.equals("txt")) {
|
|
| 271 |
prefixDefsPatterns.put(ident, Arrays.asList(Pattern.compile(matchPattern), replacementPattern)); |
|
| 272 |
OntologiesProjection.this.getOntology(ident); |
|
| 273 |
} |
|
| 274 |
} |
|
| 275 |
else if (localname.equals("link")) {
|
|
| 276 |
String target = ""; |
|
| 277 |
|
|
| 278 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 279 |
if (parser.getAttributeLocalName(i).equals("target")) {
|
|
| 280 |
target = parser.getAttributeValue(i); |
|
| 281 |
break; |
|
| 282 |
} |
|
| 283 |
} |
|
| 284 |
|
|
| 285 |
String[] split = target.split(" ", 2); // first part word id next part are the ontologies id
|
|
| 286 |
links.put(split[0].substring(4), split[1].split(" "));
|
|
| 287 |
} |
|
| 288 |
} |
|
| 289 |
}; |
|
| 253 | 290 |
pLinks.process(); |
| 254 |
// System.out.println("links size: "+links.size()
|
|
| 255 |
// System.out.println("ggly_ontologies size: "+ggly_ontologies.size()
|
|
| 256 |
// System.out.println("lgly_ontologies size: "+lgly_ontologies.size()
|
|
| 291 |
// System.out.println("links size: "+links.size()
|
|
| 292 |
// System.out.println("ggly_ontologies size: "+ggly_ontologies.size()
|
|
| 293 |
// System.out.println("lgly_ontologies size: "+lgly_ontologies.size()
|
|
| 257 | 294 |
} |
| 258 |
|
|
| 295 |
|
|
| 259 | 296 |
public HashMap<String, HashMap<String, String>> getOntology(String prefix) {
|
| 260 | 297 |
if (prefix.startsWith("ggly")) {
|
| 261 | 298 |
if (!ggly_ontologies.containsKey(prefix)) buildGGlyOntology(prefix); |
| 262 | 299 |
return ggly_ontologies.get(prefix); |
| 263 |
} else if (prefix.startsWith("lgly")) {
|
|
| 300 |
} |
|
| 301 |
else if (prefix.startsWith("lgly")) {
|
|
| 264 | 302 |
if (!lgly_ontologies.containsKey(prefix)) buildLGlyOntology(prefix); |
| 265 | 303 |
return lgly_ontologies.get(prefix); |
| 266 | 304 |
} |
| 267 | 305 |
return null; |
| 268 | 306 |
} |
| 269 |
|
|
| 307 |
|
|
| 308 |
@Override |
|
| 270 | 309 |
public void processStartElement() throws XMLStreamException, IOException {
|
| 271 | 310 |
super.processStartElement(); |
| 272 | 311 |
if (localname.equals("milestone")) {
|
| 273 | 312 |
String id = ""; |
| 274 |
String unit= ""; |
|
| 275 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 313 |
String unit = "";
|
|
| 314 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 276 | 315 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
| 277 | 316 |
id = parser.getAttributeValue(i); |
| 278 |
} else if (parser.getAttributeLocalName(i).equals("unit")) {
|
|
| 317 |
} |
|
| 318 |
else if (parser.getAttributeLocalName(i).equals("unit")) {
|
|
| 279 | 319 |
unit = parser.getAttributeValue(i); |
| 280 | 320 |
} |
| 281 | 321 |
} |
| 282 |
|
|
| 322 |
|
|
| 283 | 323 |
if (unit.equals("surface")) {
|
| 284 | 324 |
milestone = id; |
| 285 | 325 |
} |
| 286 |
} else if (localname.equals(wordTag)) {
|
|
| 326 |
} |
|
| 327 |
else if (localname.equals(wordTag)) {
|
|
| 287 | 328 |
String id = ""; |
| 288 | 329 |
String characters = ""; |
| 289 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
|
|
| 330 |
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
| 290 | 331 |
if (parser.getAttributeLocalName(i).equals("id")) {
|
| 291 | 332 |
id = parser.getAttributeValue(i); |
| 292 |
} else if (parser.getAttributeLocalName(i).equals("characters")) {
|
|
| 333 |
} |
|
| 334 |
else if (parser.getAttributeLocalName(i).equals("characters")) {
|
|
| 293 | 335 |
characters = parser.getAttributeValue(i); |
| 294 | 336 |
} |
| 295 | 337 |
} |
| 296 |
|
|
| 297 |
String ontology_link_file_name = textname+"-ontolinks.xml";
|
|
| 338 |
|
|
| 339 |
String ontology_link_file_name = textname + "-ontolinks.xml";
|
|
| 298 | 340 |
if (!current_ontology_link_file_name.equals(ontology_link_file_name)) { // rebuild hashmaps
|
| 299 | 341 |
current_ontology_link_file_name = ontology_link_file_name; |
| 300 | 342 |
loadOntologyLinkFile(ontology_link_file_name); |
| 301 | 343 |
getOntology("ggly");
|
| 302 | 344 |
} |
| 303 |
|
|
| 345 |
|
|
| 304 | 346 |
String sign = null, allographExpert = null, allographAutomatic = null; // default value is attribute characters |
| 305 |
|
|
| 306 |
//AUTO ALLOGRAPH |
|
| 347 |
|
|
| 348 |
// AUTO ALLOGRAPH
|
|
| 307 | 349 |
if (links.containsKey(id)) |
| 308 | 350 |
for (String link : links.get(id)) { // automatic allograph loop
|
| 309 |
if (link.startsWith("lgly")) {
|
|
| 310 |
int idx = link.indexOf(":");
|
|
| 311 |
link = link.substring(idx+1); |
|
| 312 |
if (link.startsWith("auto_")) { // automatic lgly
|
|
| 313 |
if (allographAutomatic == null) allographAutomatic = link.substring(5); |
|
| 314 |
else if (allographAutomatic.length()+5 < link.length()) allographAutomatic = link.substring(5); |
|
| 315 |
} else { // manual lgly
|
|
| 316 |
|
|
| 317 |
} |
|
| 318 |
} |
|
| 351 |
if (link.startsWith("lgly")) {
|
|
| 352 |
int idx = link.indexOf(":");
|
|
| 353 |
link = link.substring(idx + 1); |
|
| 354 |
if (link.startsWith("auto_")) { // automatic lgly
|
|
| 355 |
if (allographAutomatic == null) allographAutomatic = link.substring(5); |
|
| 356 |
else if (allographAutomatic.length() + 5 < link.length()) allographAutomatic = link.substring(5); |
|
| 319 | 357 |
} |
| 358 |
else { // manual lgly
|
|
| 359 |
|
|
| 360 |
} |
|
| 361 |
} |
|
| 362 |
} |
|
| 320 | 363 |
if (allographAutomatic == null) allographAutomatic = characters; |
| 321 |
|
|
| 322 |
//EXPERT ALLOGRAPH |
|
| 364 |
|
|
| 365 |
// EXPERT ALLOGRAPH
|
|
| 323 | 366 |
if (links.containsKey(id)) |
| 324 | 367 |
for (String link : links.get(id)) { // expert allograph loop, try to find a ggly entity
|
| 325 |
//getOntology("ggly")
|
|
| 326 |
if (link.startsWith("ggly")) {
|
|
| 327 |
int idx = link.indexOf(":");
|
|
| 328 |
String prefix = link.substring(0, idx); |
|
| 329 |
link = link.substring(idx+1); |
|
| 330 |
|
|
| 331 |
HashMap<String, HashMap<String, String>> onto = getOntology(prefix); |
|
| 332 |
if (onto != null) {
|
|
| 333 |
HashMap<String, String> charOnto = onto.get(link); |
|
| 334 |
if (charOnto != null) {
|
|
| 335 |
String localname = charOnto.get("localname");
|
|
| 336 |
String value = charOnto.get("value");
|
|
| 337 |
if ("entity".equals(localname)) {
|
|
| 338 |
allographExpert = value; |
|
| 339 |
} |
|
| 340 |
} |
|
| 341 |
} |
|
| 342 |
} |
|
| 368 |
// getOntology("ggly")
|
|
| 369 |
if (link.startsWith("ggly")) {
|
|
| 370 |
int idx = link.indexOf(":");
|
|
| 371 |
String prefix = link.substring(0, idx); |
|
| 372 |
link = link.substring(idx + 1); |
|
| 373 |
|
|
| 374 |
HashMap<String, HashMap<String, String>> onto = getOntology(prefix); |
|
| 375 |
if (onto != null) {
|
|
| 376 |
HashMap<String, String> charOnto = onto.get(link); |
|
| 377 |
if (charOnto != null) {
|
|
| 378 |
String localname = charOnto.get("localname");
|
|
| 379 |
String value = charOnto.get("value");
|
|
| 380 |
if ("entity".equals(localname)) {
|
|
| 381 |
allographExpert = value; |
|
| 343 | 382 |
} |
| 383 |
} |
|
| 384 |
} |
|
| 385 |
} |
|
| 386 |
} |
|
| 344 | 387 |
if (allographExpert == null) |
| 345 | 388 |
if (links.containsKey(id)) |
| 346 | 389 |
for (String link : links.get(id)) { // expert allograph loop, try to find the longest non-autolgly entity
|
| 347 |
if (link.startsWith("lgly")) {
|
|
| 348 |
int idx = link.indexOf(":");
|
|
| 349 |
link = link.substring(idx+1); |
|
| 350 |
if (!link.startsWith("auto_")) { // non automatic lgly
|
|
| 351 |
//System.out.println("link= "+link
|
|
| 352 |
if (allographExpert == null) allographExpert = link; |
|
| 353 |
else if (allographExpert.length()+5 < link.length()) allographExpert = link; |
|
| 354 |
} |
|
| 355 |
} |
|
| 390 |
if (link.startsWith("lgly")) {
|
|
| 391 |
int idx = link.indexOf(":");
|
|
| 392 |
link = link.substring(idx + 1); |
|
| 393 |
if (!link.startsWith("auto_")) { // non automatic lgly
|
|
| 394 |
// System.out.println("link= "+link
|
|
| 395 |
if (allographExpert == null) allographExpert = link; |
|
| 396 |
else if (allographExpert.length() + 5 < link.length()) allographExpert = link; |
|
| 356 | 397 |
} |
| 398 |
} |
|
| 399 |
} |
|
| 357 | 400 |
if (allographExpert == null) allographExpert = allographAutomatic; |
| 358 |
|
|
| 359 |
//SIGN |
|
| 401 |
|
|
| 402 |
// SIGN
|
|
| 360 | 403 |
if (sign == null) |
| 361 | 404 |
if (links.containsKey(id)) |
| 362 | 405 |
for (String link : links.get(id)) { // expert allograph loop, try to find the shortest ggly entity
|
| 363 |
//getOntology("ggly")
|
|
| 364 |
if (link.startsWith("ggly")) {
|
|
| 365 |
int idx = link.indexOf(":");
|
|
| 366 |
String prefix = link.substring(0, idx); |
|
| 367 |
link = link.substring(idx+1); |
|
| 368 |
|
|
| 369 |
HashMap<String, HashMap<String, String>> onto = getOntology(prefix); |
|
| 370 |
if (onto != null) {
|
|
| 371 |
HashMap<String, String> charOnto = onto.get(link); |
|
| 372 |
if (charOnto != null) {
|
|
| 373 |
sign = charOnto.get("standard");
|
|
| 374 |
} |
|
| 375 |
} |
|
| 376 |
} |
|
| 406 |
// getOntology("ggly")
|
|
| 407 |
if (link.startsWith("ggly")) {
|
|
| 408 |
int idx = link.indexOf(":");
|
|
| 409 |
String prefix = link.substring(0, idx); |
|
| 410 |
link = link.substring(idx + 1); |
|
| 411 |
|
|
| 412 |
HashMap<String, HashMap<String, String>> onto = getOntology(prefix); |
|
| 413 |
if (onto != null) {
|
|
| 414 |
HashMap<String, String> charOnto = onto.get(link); |
|
| 415 |
if (charOnto != null) {
|
|
| 416 |
sign = charOnto.get("standard");
|
|
| 377 | 417 |
} |
| 418 |
} |
|
| 419 |
} |
|
| 420 |
} |
|
| 378 | 421 |
if (sign == null) |
| 379 | 422 |
if (links.containsKey(id)) |
| 380 | 423 |
for (String link : links.get(id)) { // sign loop, try to find the shortest non-autolgly entity
|
| 381 |
if (link.startsWith("lgly")) {
|
|
| 382 |
int idx = link.indexOf(":");
|
|
| 383 |
link = link.substring(idx+1); |
|
| 384 |
if (!link.startsWith("auto_")) { // non automatic lgly
|
|
| 385 |
if (sign == null) sign = link; |
|
| 386 |
else if (sign.length()+5 > link.length()) sign = link; |
|
| 387 |
} |
|
| 388 |
} |
|
| 424 |
if (link.startsWith("lgly")) {
|
|
| 425 |
int idx = link.indexOf(":");
|
|
| 426 |
link = link.substring(idx + 1); |
|
| 427 |
if (!link.startsWith("auto_")) { // non automatic lgly
|
|
| 428 |
if (sign == null) sign = link; |
|
| 429 |
else if (sign.length() + 5 > link.length()) sign = link; |
|
| 389 | 430 |
} |
| 431 |
} |
|
| 432 |
} |
|
| 390 | 433 |
if (sign == null) {
|
Formats disponibles : Unified diff