Statistics
| Revision:

root / tmp / org.txm.tigersearch.rcp / groovy / org / txm / scripts / importer / tigersearch / TSImporter.groovy @ 2080

History | View | Annotate | Download (3.9 kB)

1
package org.txm.scripts.importer.tigersearch
2

    
3
import org.txm.Toolbox
4
import org.txm.importer.xtz.ImportModule;
5
import org.txm.metadatas.Metadatas
6
import org.txm.utils.io.FileCopy
7
import org.txm.utils.io.IOUtils
8
import org.txm.importer.xtz.*
9
import org.txm.scripts.importer.xtz.*
10
import org.txm.importer.ApplyXsl2;
11

    
12
/**
13
 * Only build the Metadatas object since all XML-TXM files already exists.
14
 * Metadatas is used to build text order.
15
 * 
16
 * 
17
 * @author mdecorde
18
 *
19
 */
20
class TSImporter extends XTZImporter {
21

    
22
        public TSImporter(ImportModule module) {
23
                super(module);
24
        }
25

    
26
        @Override
27
        public void process() {
28
                
29
                //prepare metadata if any
30
                File allMetadataFile = Metadatas.findMetadataFile(inputDirectory);
31
                if (allMetadataFile.exists()) {
32
                        File copy = new File(module.getBinaryDirectory(), allMetadataFile.getName())
33
                        if (!FileCopy.copy(allMetadataFile, copy)) {
34
                                println "Error: could not create a copy of the metadata file "+allMetadataFile.getAbsoluteFile();
35
                                return;
36
                        }
37
                        metadata = new Metadatas(copy,
38
                        Toolbox.getPreference(TBXPreferences.METADATA_ENCODING),
39
                        Toolbox.getPreference(TBXPreferences.METADATA_COLSEPARATOR),
40
                        Toolbox.getPreference(TBXPreferences.METADATA_TXTSEPARATOR), 1)
41
                }
42

    
43

    
44
                File sourceDirectory = inputDirectory
45
                File binaryDirectory = module.getBinaryDirectory()
46

    
47
                File master = new File(sourceDirectory, "main.xml")
48
                
49
                def xmlFiles = [] // the TIGER XML files
50
                
51
                if (!master.exists() ) {
52
//                        master = new File(sourceDirectory, "main.xml")
53
//                        String subcorpora = "";
54
                        xmlFiles = sourceDirectory.listFiles(new FileFilter() {
55
                                                boolean accept(File file) {
56
                                                        if (file.isDirectory()) return false;
57
                                                        if (file.isHidden()) return false;
58
                                                        String filename = file.getName()
59
                                                        if (filename.equals("import.xml")) return false;
60
                                                        if (!filename.endsWith(".xml")) return false;
61
                                                        
62
                                                        return true;
63
                                                }
64
                                        });
65
                        
66
                        if (xmlFiles.size() > 1) {
67
                                println "Error, the source directory contains more than one TIGER XML file ?"
68
                                isSuccessFul = false;
69
                                return;
70
                        }
71
                        if (xmlFiles.size() == 0) {
72
                                println "Error no XML file found in $sourceDirectory directory"
73
                                isSuccessFul = false;
74
                                return;
75
                        }
76
                        println "No TIGER XML 'main.xml' file found. Using $xmlFiles as TIGER XML source file "
77
                        
78
                } else { // parse the master file
79
                        for (def s : new XmlSlurper().parse(master).body.subcorpus) {
80
                                String name = ""+s.@external
81
                                if (name.startsWith("file:")) {
82
                                        xmlFiles << new File(sourceDirectory, name.substring(5))
83
                                }
84
                        }
85
                        
86
                        if (xmlFiles.size() == 0) {
87
                                println "Error no XML file found in $master file"
88
                                isSuccessFul = false;
89
                                return;
90
                        }
91
                }
92

    
93
                // get the last version of the TIGER XML -> XML-TXM XSL
94
                File tsXSLFile = new File(Toolbox.getTxmHomePath(), "xsl/ts.xsl");
95
                BundleUtils.copyFiles("org.txm.tigersearch.rcp", "groovy", "org/txm/scripts/importer/tigersearch", "ts.xsl", tsXSLFile.getParentFile());
96

    
97
                File xmltxmSrcDir = new File(binaryDirectory, "src"); // output directory of the TS XSL transformation
98
                xmltxmSrcDir.mkdirs();
99
                println "Main.xml files: "+xmlFiles
100
                println "N="+xmlFiles.size()
101
                for (File xmlTigerFile : xmlFiles) {
102
                        FileCopy.copy(xmlTigerFile, new File(xmltxmSrcDir, xmlTigerFile.getName()));
103
                }
104

    
105
                File tokenizedDir = new File(module.getBinaryDirectory(),"tokenized");
106
                
107
                if (!ApplyXsl2.processImportSources(tsXSLFile, xmltxmSrcDir, tokenizedDir)) {
108
                        println "Error while applying TS XSL file to $xmltxmSrcDir"
109
                        isSuccessFul = false;
110
                        return;
111
                }
112

    
113
                File[] files = tokenizedDir.listFiles(IOUtils.HIDDENFILE_FILTER);
114
                if (files == null || files.length == 0) {
115
                        println "Error while applying TS XSL file to $xmltxmSrcDir is empty"
116
                        isSuccessFul = false;
117
                        return;
118
                }
119

    
120
                if (!doToXMLTXMStep()) return;
121
                if (!doInjectMetadataStep()) return;
122

    
123
                File txmdDir = new File(module.getBinaryDirectory(),"txm");
124
                isSuccessFul = txmdDir.listFiles(IOUtils.HIDDENFILE_FILTER).size() > 0
125
        }
126
}