Révision 2897
| tmp/org.txm.analec.rcp/src/org/txm/annotation/urs/export/ExportToGlozzMacro.java (revision 2897) | ||
|---|---|---|
| 21 | 21 |
import java.util.ArrayList; |
| 22 | 22 |
import java.util.HashSet; |
| 23 | 23 |
|
| 24 |
import org.eclipse.osgi.util.NLS; |
|
| 24 | 25 |
import org.txm.annotation.urs.URSCorpora; |
| 25 | 26 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
| 26 | 27 |
import org.txm.searchengine.cqp.CQPSearchEngine; |
| ... | ... | |
| 33 | 34 |
import visuAnalec.donnees.Corpus; |
| 34 | 35 |
import visuAnalec.donnees.Structure; |
| 35 | 36 |
import visuAnalec.elements.Unite; |
| 37 |
import visuAnalec.fichiers.FichiersGlozz.ModelGlozzPrinter; |
|
| 36 | 38 |
|
| 37 | 39 |
public class ExportToGlozzMacro {
|
| 38 | 40 |
|
| 39 | 41 |
public void doExport(MainCorpus corpus, String unit_type, String filename) throws NumberFormatException, IOException, CqiServerError, CqiClientException {
|
| 40 | 42 |
|
| 41 |
|
|
| 43 |
// export the annotation structure as a glozz model |
|
| 44 |
Corpus analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 45 |
if (analecCorpus == null) {
|
|
| 46 |
System.out.println("No Analec corpus associated with $corpus");
|
|
| 47 |
return; |
|
| 48 |
} |
|
| 49 |
PrintWriter model = null; |
|
| 50 |
File fichierModel = new File(filename + ".aam"); |
|
| 51 |
try {
|
|
| 52 |
Structure structure = analecCorpus.getStructure(); |
|
| 53 |
model = new PrintWriter(fichierModel, "UTF-8"); |
|
| 54 |
ModelGlozzPrinter gp = new ModelGlozzPrinter(structure, model); |
|
| 55 |
gp.ecrireModel(); |
|
| 56 |
} catch (Exception ex) {
|
|
| 57 |
System.out.println("Erreur d'écriture de fichier: "+ex);
|
|
| 58 |
} finally {
|
|
| 59 |
if (model!=null) model.close(); |
|
| 60 |
System.out.println(NLS.bind("Corpus written to ''{0}''.", fichierModel));
|
|
| 61 |
} |
|
| 62 |
|
|
| 42 | 63 |
int size = corpus.getSize(); // you may also use: |
| 43 |
// corpus.getTextEndLimits() (= index of
|
|
| 44 |
// last token = size-1)
|
|
| 64 |
// corpus.getTextEndLimits() (= index of |
|
| 65 |
// last token = size-1) |
|
| 45 | 66 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
| 46 | 67 |
Property word = corpus.getWordProperty(); |
| 47 | 68 |
// note: using "lbn" seems to work better than "pn" (some imported |
| ... | ... | |
| 54 | 75 |
|
| 55 | 76 |
String rawText = ""; // the corpus for the .ac file |
| 56 | 77 |
ArrayList<int[]> positions = new ArrayList<int[]>(); // each element is an array |
| 57 |
// [start, end] indicating the
|
|
| 58 |
// position in the rawText
|
|
| 78 |
// [start, end] indicating the |
|
| 79 |
// position in the rawText |
|
| 59 | 80 |
int pnCount = 0; // the par counter, used for indexing the pns array |
| 60 | 81 |
int lastPn = -1; // the last paragraph number |
| 61 | 82 |
ArrayList<int[]> pns = new ArrayList<int[]>(); // each element is an array [start, |
| 62 |
// end] representing the start and
|
|
| 63 |
// end of the paragraph in the
|
|
| 64 |
// rawText
|
|
| 83 |
// end] representing the start and |
|
| 84 |
// end of the paragraph in the |
|
| 85 |
// rawText |
|
| 65 | 86 |
int end = -1; |
| 66 | 87 |
for (int i = 0; i < size; i++) {
|
| 67 | 88 |
String f = CQI.cpos2Str(word.getQualifiedName(), new int[] { i })[0];
|
| ... | ... | |
| 93 | 114 |
|
| 94 | 115 |
// note that unit_type has been defined with an option of the dialog at |
| 95 | 116 |
// the beginning |
| 96 |
Corpus analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 97 | 117 |
|
| 118 |
|
|
| 98 | 119 |
// list of properties |
| 99 | 120 |
|
| 100 | 121 |
Structure struct = analecCorpus.getStructure(); |
Formats disponibles : Unified diff