Revision 36
SRC/src/fr/triangle/hyperalign/io/input/HyperalignBuilder.java (revision 36) | ||
---|---|---|
54 | 54 |
import fr.triangle.hyperalign.kernel.DataManager.EquivalenceManager; |
55 | 55 |
import fr.triangle.hyperalign.kernel.DataManager.TerminologyManager; |
56 | 56 |
import fr.triangle.hyperalign.kernel.annot.Metadata; |
57 |
import fr.triangle.hyperalign.kernel.bd.DatabaseManager; |
|
57 |
import fr.triangle.hyperalign.kernel.bd.AnnotationDatabaseManager;
|
|
58 | 58 |
import fr.triangle.hyperalign.kernel.corpus.Corpus; |
59 | 59 |
import fr.triangle.hyperalign.kernel.corpus.ParallelCorpus; |
60 | 60 |
import fr.triangle.hyperalign.kernel.corpus.ParallelCorpusParameters; |
... | ... | |
105 | 105 |
|
106 | 106 |
public void buildProject(String directory, Document document, boolean dropAnnotTable){ |
107 | 107 |
System.out.println("HyperalignBuilder.buildProject() - DIR = "+directory); |
108 |
DatabaseManager dbManager = new DatabaseManager(manager.getAnnotationManager());
|
|
109 |
DatabaseManager.createAnnotationsTable(dropAnnotTable); |
|
108 |
AnnotationDatabaseManager dbManager = new AnnotationDatabaseManager(manager.getAnnotationManager());
|
|
109 |
AnnotationDatabaseManager.createAnnotationsTable(dropAnnotTable);
|
|
110 | 110 |
if (document!=null){ |
111 | 111 |
//ANALYSE PROJECT-NAME |
112 | 112 |
Element root = document.getRootElement(); |
... | ... | |
355 | 355 |
if(corpus.getUrl()!=null){ |
356 | 356 |
builder.buildMetadata(corpus.getUrl()); |
357 | 357 |
System.out.println("HyperalignBuilder.buildParallelCorpus() PATH corpus /"+corpus.getUrl().getPath()); |
358 |
|
|
358 |
|
|
359 | 359 |
try { |
360 | 360 |
corpusDoc = FileManager.openXmlJDOMFile(corpus.getUrl()); |
361 | 361 |
if (corpusDoc!=null){ |
... | ... | |
576 | 576 |
*/ |
577 | 577 |
@SuppressWarnings("unchecked") |
578 | 578 |
public void build(int nbOcc, Element textNode){ |
579 |
System.out.println("HyperTextReader.buildText => "+text.getId());
|
|
579 |
System.out.println("TextBuilder.buildText => "+text.getId());
|
|
580 | 580 |
//debug = new Debug(text.getId()); |
581 | 581 |
//equivalences = new HashMap<String, OccurrenceSet>(); |
582 | 582 |
List<Element> children = textNode.getChildren(); |
... | ... | |
599 | 599 |
xmlId, "text", "", text, "", ""+nb, manager.getAnnotationManager()); |
600 | 600 |
ParallelTextElement father = corpusBuilder.createParallelDivisions(text, oeuvre, true, null); |
601 | 601 |
|
602 |
System.out.println("buildText =>>>>>> ROOT ["+father.getName()+"]"); |
|
602 |
System.out.println(" buildText =>>>>>> ROOT ["+father.getName()+"]");
|
|
603 | 603 |
List<Element> children2 = currentElement.getChildren(); |
604 | 604 |
if(children2!=null){ |
605 | 605 |
++divisionLevel; |
... | ... | |
607 | 607 |
Iterator<Element> it2 = children2.iterator(); |
608 | 608 |
while(it2.hasNext()){ |
609 | 609 |
Element currentElement2 = it2.next(); |
610 |
//System.out.println("buildDivision after ROOT : "+currentElement2.getName() +"with FATHER "+father.getId());
|
|
610 |
System.out.println("buildDivision after ROOT : "+currentElement2.getName() +"with FATHER "+father.getId()); |
|
611 | 611 |
|
612 | 612 |
buildDivision(currentElement2, father); |
613 | 613 |
|
... | ... | |
711 | 711 |
} |
712 | 712 |
Division divFather = father.getDivision(text); |
713 | 713 |
Division div = new Division(divFather, node.getName(), nameDiv, id, xmlId, type, content, text, "", order, manager.getAnnotationManager()); |
714 |
//debug.addToFileForDebug("\t\tTextBuilder.buildDivision() - ID : "+id+" | NAME "+div.getName()+" (xml="+div.getXmlName()+") - TYPE "+div.getType()+" N "+div.getOrder());
|
|
714 |
//System.out.println("DIVISION (ID : "+id+" | NAME "+div.getName()+" (xml="+div.getXmlName()+") - TYPE "+div.getType()+" N "+div.getOrder());
|
|
715 | 715 |
|
716 | 716 |
if(node.getName().equals(TEITags.TEI_SEG)||node.getName().equals(TEITags.TEI_HEAD)){ |
717 | 717 |
if(!isSegForAnnotation){ |
... | ... | |
1310 | 1310 |
} |
1311 | 1311 |
return ref; |
1312 | 1312 |
} |
1313 |
|
|
1314 |
|
|
1313 |
|
|
1315 | 1314 |
public String getText(){ |
1316 | 1315 |
return textMessage; |
1317 | 1316 |
} |
... | ... | |
1426 | 1425 |
try { |
1427 | 1426 |
currentTextURL = new URL(corpusFile.getPath()); |
1428 | 1427 |
System.out.println("HyperalignBuilder.buildParallelCorpus : PATH = "+corpusFile.getAbsolutePath()); |
1429 |
manager.getAnnotationManager().setNbAnnots(new Integer(nbAnnots).intValue()); |
|
1428 |
if(!nbAnnots.equals("")){ |
|
1429 |
manager.getAnnotationManager().setNbAnnots(new Integer(nbAnnots).intValue()); |
|
1430 |
} |
|
1430 | 1431 |
manager.initCorpusManager(currentTextURL, corpusName, new Boolean(isToIndex).booleanValue(), new Boolean(isParallelCorpus).booleanValue(), |
1431 | 1432 |
new Boolean(isHyperalignModel).booleanValue(), new Boolean(isTreeTagger).booleanValue(), idPath, langPath, textPath); |
1432 | 1433 |
manager.getCorpusManager().buildCorpus(); |
... | ... | |
1440 | 1441 |
if(root!=null){ |
1441 | 1442 |
System.out.println("CorpusBuilder.buildParallelElements () /"+root.getId()+"/"); |
1442 | 1443 |
manager.getCorpusManager().buildAnnotationsForDivision(root); |
1443 |
Vector<ParallelTextElement> children = root.children(); |
|
1444 |
Vector<ParallelTextElement> children = root.childrenVect();
|
|
1444 | 1445 |
if(children.size()!=0) { |
1445 | 1446 |
for(int i = 0 ; i < children.size() ; ++i){ |
1446 | 1447 |
ParallelTextElement newRoot = (ParallelTextElement) children.get(i); |
... | ... | |
1503 | 1504 |
|
1504 | 1505 |
|
1505 | 1506 |
/** |
1506 |
* |
|
1507 |
* @param textURL
|
|
1507 |
* create TEI headers for all texts in the corpus
|
|
1508 |
* @param urlCorpus is the URL of the corpus in which we build metadata for each TEI text
|
|
1508 | 1509 |
* @return |
1509 | 1510 |
*/ |
1510 |
protected Vector<HyperalignText> buildTextReferences(URL textURL){
|
|
1511 |
protected Vector<HyperalignText> buildTextReferences(URL urlCorpus){
|
|
1511 | 1512 |
Vector<HyperalignText> texts = new Vector<HyperalignText>(); |
1512 |
if(textURL!=null){
|
|
1513 |
if(urlCorpus!=null){
|
|
1513 | 1514 |
Document textDoc; |
1514 | 1515 |
try { |
1515 |
textDoc = FileManager.openXmlJDOMFile(textURL);
|
|
1516 |
textDoc = FileManager.openXmlJDOMFile(urlCorpus);
|
|
1516 | 1517 |
if (textDoc!=null){ |
1517 | 1518 |
//BUILD METADATA of each TEXT in the CORPUS |
1518 | 1519 |
Element root = textDoc.getRootElement(); |
... | ... | |
1812 | 1813 |
if(loc!=null){ |
1813 | 1814 |
|
1814 | 1815 |
String idEqui = currentEl.getAttributeValue(HyperalignTags.HM_EQUIVALENCE_ID_ATT); |
1815 |
Vector<OccurrenceSet> occsInEqui = new Vector<OccurrenceSet>();
|
|
1816 |
HashMap<OccurrenceSet, String> occsInEqui = new HashMap<OccurrenceSet, String>();
|
|
1816 | 1817 |
List<Element> children2 = currentEl.getChildren(); |
1817 | 1818 |
String comment = ""; |
1819 |
String occComment = ""; |
|
1818 | 1820 |
EquivalenceElement currentEquivalence = null; |
1819 | 1821 |
for(int j = 0 ; j < children2.size() ; ++j){ |
1820 | 1822 |
Element currentElement2 = children2.get(j); |
1821 | 1823 |
if(currentElement2.getName().equals(HyperalignTags.HM_EQUIVALENCE_WORD)){ |
1822 | 1824 |
String attrId = currentElement2.getAttributeValue(HyperalignTags.HM_EQUIVALENCE_ID_ATT); |
1823 | 1825 |
String attrTextId = currentElement2.getAttributeValue(HyperalignTags.HM_EQUIVALENCE_REFTEXT_ATT); |
1824 |
|
|
1826 |
|
|
1827 |
List<Element> children3 = currentElement2.getChildren(); |
|
1828 |
for(int k = 0 ; k < children3.size() ; ++k){ |
|
1829 |
Element currentElement3 = children3.get(k); |
|
1830 |
if(currentElement3.getName().equals(HyperalignTags.HM_EQUIVALENCE_WORD_COMMENT)){ |
|
1831 |
if(currentElement3.getText()!=null){ |
|
1832 |
occComment = currentElement3.getText(); |
|
1833 |
} |
|
1834 |
} |
|
1835 |
} |
|
1836 |
|
|
1825 | 1837 |
HyperalignText text = manager.getCorpusManager().getCorpus().getTextById(attrTextId); |
1826 | 1838 |
OccurrenceSet occ = manager.getAnnotationManager().findEquivalenceAnnotation(div, attrId, text); |
1827 | 1839 |
if(text!=null){ |
... | ... | |
1832 | 1844 |
System.out.println("\t\t WARNING : text is null"); |
1833 | 1845 |
} |
1834 | 1846 |
if(occ!=null){ |
1835 |
occsInEqui.add(occ);
|
|
1847 |
occsInEqui.put(occ, occComment);
|
|
1836 | 1848 |
}else{ |
1837 | 1849 |
System.out.println("\t\t WARNING : occ "+attrId+" is null"); |
1838 | 1850 |
} |
... | ... | |
1893 | 1905 |
ParallelTextElement textEl = manager.getCorpusManager().findTextElementById(loc); |
1894 | 1906 |
|
1895 | 1907 |
String idEqui = currentElement.getAttributeValue(HyperalignTags.HM_EQUIVALENCE_ID_ATT); |
1896 |
Vector<OccurrenceSet> occsInEqui = new Vector<OccurrenceSet>();
|
|
1908 |
HashMap<OccurrenceSet, String> occsInEqui = new HashMap<OccurrenceSet,String>();
|
|
1897 | 1909 |
List<Element> children2 = currentElement.getChildren(); |
1898 | 1910 |
String comment = ""; |
1911 |
String occComment = ""; |
|
1899 | 1912 |
EquivalenceElement currentEquivalence = null; |
1900 | 1913 |
for(int j = 0 ; j < children2.size() ; ++j){ |
1901 | 1914 |
Element currentElement2 = children2.get(j); |
... | ... | |
1905 | 1918 |
String attrId = currentElement2.getAttributeValue(HyperalignTags.HM_EQUIVALENCE_ID_ATT); |
1906 | 1919 |
String attrLang = currentElement2.getAttributeValue(HyperalignTags.HM_EQUIVALENCE_LANG_ATT); |
1907 | 1920 |
String attrIdText = currentElement2.getAttributeValue(HyperalignTags.HM_EQUIVALENCE_REFTEXT_ATT); |
1921 |
|
|
1922 |
List<Element> children3 = currentElement2.getChildren(); |
|
1923 |
for(int k = 0 ; k < children3.size() ; ++k){ |
|
1924 |
Element currentElement3 = children3.get(j); |
|
1925 |
if(currentElement3.getName().equals(HyperalignTags.HM_EQUIVALENCE_WORD_COMMENT)){ |
|
1926 |
if(currentElement3.getText()!=null){ |
|
1927 |
occComment = currentElement3.getText(); |
|
1928 |
} |
|
1929 |
} |
|
1930 |
} |
|
1908 | 1931 |
|
1909 |
|
|
1910 | 1932 |
HyperalignText text = manager.getCorpusManager().getCorpus().getTextById(attrIdText); |
1911 | 1933 |
System.out.println("buildEquivalences() "+attrId+" - TEXT ["+attrIdText+"] => "+text+" in LOCALISATION "+textEl+" ("+loc+")"); |
1912 | 1934 |
//System.out.println("\t => "+text.getName()+" and xml:id = "+text.getXmlId()); |
... | ... | |
1919 | 1941 |
System.out.println("EquivalenceManager.buildEquivalences() - WARNING : text is null"); |
1920 | 1942 |
} |
1921 | 1943 |
if(occ!=null){ |
1922 |
occsInEqui.add(occ);
|
|
1944 |
occsInEqui.put(occ, occComment);
|
|
1923 | 1945 |
}else{ |
1924 | 1946 |
System.out.println("EquivalenceManager.buildEquivalences() - WARNING : occ "+attrId+" is null"); |
1925 | 1947 |
} |
... | ... | |
2868 | 2890 |
} |
2869 | 2891 |
|
2870 | 2892 |
|
2871 |
|
|
2872 | 2893 |
} |
2894 |
|
|
2895 |
public Object buildParallelDivisionsForNewText(ParallelCorpus corpus, HyperalignText newText) { |
|
2896 |
// TODO Auto-generated method stub |
|
2897 |
return null; |
|
2898 |
} |
|
2873 | 2899 |
|
2900 |
|
|
2901 |
|
|
2874 | 2902 |
} |
Also available in: Unified diff