Révision 3896

TXM/trunk/bundles/org.txm.core/src/java/org/txm/importer/ConvertDocument.java (revision 3896)
1
package org.txm.importer;
2
import java.io.File;
3
import java.util.Arrays;
4
import java.util.List;
5

  
6
import org.jodconverter.core.office.OfficeException;
7
import org.jodconverter.core.office.OfficeManager;
8
import org.jodconverter.local.JodConverter;
9
import org.jodconverter.local.office.LocalOfficeManager;
10

  
11
public class ConvertDocument {
12

  
13
	boolean DEBUG = false;
14
	List<String> supportedInput = Arrays.asList("odt", "doc", "docx", "html", "pdf"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
15
	List<String> supportedOutput = Arrays.asList("odt", "doc", "pdf"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
16
	OfficeManager officeManager = null;
17

  
18
	public ConvertDocument(String officeHome) throws OfficeException {
19
	    //officeManager.setConnectionProtocol(OfficeConnectionProtocol.PIPE);
20
	    //officeManager.setPipeNames("office1", "office2");
21
	    //officeManager.setTaskExecutionTimeout(30000L);
22
		officeManager = LocalOfficeManager.builder().build();
23
		
24
		try {
25
			officeManager.start();
26
		} catch(Exception e) {
27
			officeManager.stop();
28
			org.txm.utils.logger.Log.printStackTrace(e);
29
		}
30
	}
31
	
32
	public ConvertDocument() throws OfficeException {
33
		
34
		officeManager = LocalOfficeManager.builder().build();
35
		try {
36
			officeManager.start();
37
		} catch(Exception e) {
38
			officeManager.stop();
39
			org.txm.utils.logger.Log.printStackTrace(e);
40
		}
41
	}
42

  
43
	public void setDebug(boolean b) {
44
		DEBUG = b;
45
	}
46
	
47
	public void stop() throws OfficeException {
48
		officeManager.stop();
49
	}
50

  
51
	public File toODT(File document, File outdir) throws Exception {
52
		return auto(document, outdir, "odt"); //$NON-NLS-1$
53
	}
54

  
55
	public File toDOC(File document, File outdir) throws Exception {
56
		return auto(document, outdir, "doc"); //$NON-NLS-1$
57
	}
58

  
59
	public File toPDF(File document, File outdir) throws Exception {
60
		return auto(document, outdir, "pdf"); //$NON-NLS-1$
61
	}
62

  
63
	public File auto(File document, File outdir, String ext) throws Exception {
64
		
65
		//get filename without ext
66
		int idx = document.getName().lastIndexOf("."); //$NON-NLS-1$
67
		if (idx == -1) return null;
68
		String name = document.getName().substring(0, idx-1);
69
		
70
		File outfile = new File(outdir, name+".odt"); //$NON-NLS-1$
71
		try {
72
			return autoFile(document, outfile, ext);
73
		} catch(Exception e) {
74
			if (e instanceof OfficeException) {
75
				OfficeException oe = (OfficeException) e;
76
				if (oe.getMessage().contains("officeHome not set")) {
77
					throw new OfficeException("Could not find LibreOffice or OpenOffice 3.x installation. Reason="+e.getMessage());
78
				} else if (oe.getMessage().contains("is already running")) {
79
					String pid = oe.getMessage();
80
					if (pid.indexOf("pid") > 0) {
81
						pid = pid.substring(pid.indexOf("pid"));
82
					}
83
					throw new OfficeException("Could not run LibreOffice or OpenOffice because a 'soffice' process is already running (process id="+pid+"). Please kill the process and restart. Reason="+e.getMessage());
84
				} else
85
					throw e;
86
			} else
87
				throw e;
88
		}
89
	}
90

  
91
	public File autoFile(File document, File outdir, String ext) throws Exception {
92
		
93
		ConvertDocument converter = new ConvertDocument();
94
		if ("doc".equals(ext.toLowerCase())) {
95
			return converter.toDOC(document, outdir);
96
		} else if ("odt".equals(ext.toLowerCase())) {
97
			return converter.toODT(document, outdir);
98
		} if ("pdf".equals(ext.toLowerCase())) {
99
			return converter.toPDF(document, outdir);
100
		} else {
101
			return null;
102
		}
103
	}
104

  
105
	/**
106
	 * @param args
107
	 * @throws OfficeException 
108
	 */
109
	public static void main(String[] args) throws OfficeException {
110
		//converter.convert(infile, outfile, new )
111
		//		String[] srcexts = { "doc" };
112
		//		String[] exts = { "odt" };//, "odt", "doc", "docx", "html", "pdf"}; // html pdf
113
		//		for(String srcext : srcexts) {
114
		//			File infile = new File("/home/mdecorde/Bureau/matrice/témoignages/CONVERSIONS/orig."+srcext);
115
		//			for(String ext : exts) {
116
		//				System.out.println("Convert from "+srcext+" to "+ext);
117
		//				OfficeManager officeManager = null;
118
		//				try {
119
		//					officeManager = new DefaultOfficeManagerConfiguration().buildOfficeManager();
120
		//					officeManager.start();
121
		//
122
		//					OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);
123
		//					System.out.println(converter.getFormatRegistry().getClass());
124
		//					File outfile = new File("/home/mdecorde/Bureau/matrice/témoignages/CONVERSIONS/jod/"+srcext+"."+ext);
125
		//					converter.convert(infile, outfile, converter.getFormatRegistry().getFormatByExtension(ext));
126
		//
127
		//				} catch(Exception e) {
128
		//					System.out.println(e.getLocalizedMessage());
129
		//				} finally {
130
		//					officeManager.stop();
131
		//				}
132
		//			}
133
		//		}
134

  
135
		File infile = new File("/home/mdecorde/TEMP/Corpus_Riverains_TXM/EP27_txm.doc"); //$NON-NLS-1$
136
		File outdir = new File("/home/mdecorde/TEMP"); //$NON-NLS-1$
137
		JodConverter.convert(infile).to(new File(outdir, "test.odt"));
138
		//System.out.println("result: "+ConvertDocument.toODT(infile, outdir));
139
		ConvertDocument convert = new ConvertDocument();
140
		try {
141
			System.out.println("result: "+convert.toODT(infile, outdir)); //$NON-NLS-1$
142
		} catch(Exception e) { org.txm.utils.logger.Log.printStackTrace(e);}
143
		convert.stop();
144
	}
145

  
146
}
TXM/trunk/bundles/org.txm.libs.jodconverter/src/org/txm/libs/jodconverter/ConvertDocument.java (revision 3896)
7 7
import org.jodconverter.core.office.OfficeUtils;
8 8
import org.jodconverter.local.JodConverter;
9 9
import org.jodconverter.local.office.LocalOfficeManager;
10
import org.jodconverter.local.office.LocalOfficeManager.Builder;
10 11

  
11 12
public class ConvertDocument {
12 13
	
13
	public static void test2(File inputFile, File outputFile) throws OfficeException {
14
		// Create an office manager using the default configuration.
15
		// The default port is 2002. Note that when an office manager
16
		// is installed, it will be the one used by default when
17
		// a converter is created.
18
		final LocalOfficeManager officeManager = LocalOfficeManager.install(); 
14
	public static void convert(File inputFile, File outputFile) throws OfficeException {
15
		convert(inputFile, outputFile, null);
16
	}
17
	
18
	public static void convert(File inputFile, File outputFile, String officePath) throws OfficeException {
19
		
20
		Builder builder = LocalOfficeManager.builder().install();
21
		if (officePath != null && officePath.length() > 0) {
22
			builder.officeHome(officePath);
23
		}
24
		OfficeManager officeManager = builder.build();
19 25
		try {
20

  
21
		    // Start an office process and connect to the started instance (on port 2002).
22
		    officeManager.start();
23

  
24
		    // Convert
25
		    JodConverter
26
		             .convert(inputFile)
27
		             .to(outputFile)
28
		             .execute();
26
			// Start an office process and connect to the started instance (on port 2002).
27
			officeManager.start();
28
			// Convert
29
			JodConverter
30
			.convert(inputFile)
31
			.to(outputFile)
32
			.execute();
29 33
		} finally {
30
		    // Stop the office process
31
		    OfficeUtils.stopQuietly(officeManager);
34
			// Stop the office process
35
			OfficeUtils.stopQuietly(officeManager);
32 36
		}
33 37
	}
34 38
	
35
	public static void test1(File inputFile, File outputFile) throws OfficeException {
39
	public static void setOfficeHome(String officeHome) {
36 40
		
37
		 OfficeManager officeManager = LocalOfficeManager.builder()
38
	                .install()
39
	                .officeHome("/opt/libreoffice7.5")
40
	                .build();
41
	        try {
42
	            // Start an office process and connect to the started instance (on port 2002).
43
	            officeManager.start();
44
	            // Convert
45
	            JodConverter
46
	                    .convert(inputFile)
47
	                    .to(outputFile)
48
	                    .execute();
49
	        } finally {
50
	            // Stop the office process
51
	            OfficeUtils.stopQuietly(officeManager);
52
	        }
41
		if (officeHome == null) {
42
			System.setProperty("office.home", "");
43
		} else {
44
			System.setProperty("office.home", officeHome);
45
			
46
			Builder builder = LocalOfficeManager.builder().install();
47
			builder.officeHome(officeHome);
48
		}
53 49
	}
54 50
	
55 51
	public static void main(String[] args) throws OfficeException {
56 52
		
57
		File inputFile = new File("/home/mdecorde/Documents/FICHE d'inscription à une formation ENS - DECORDE.doc");
58
		File outputFile = new File("/home/mdecorde/Documents/FICHE d'inscription à une formation ENS - DECORDE.odt");
53
		File inputFile = new File("/home/mdecorde/xml/doc/Formulaire_SFT 2022 - Decorde.docx");
54
		File outputFile = new File("/home/mdecorde/xml/doc/Formulaire_SFT 2022 - Decorde.odt");
59 55
		
60
		test2(inputFile, outputFile);
56
		convert(inputFile, outputFile);
61 57
	}
62 58
}
TXM/trunk/bundles/org.txm.libs.jodconverter/META-INF/MANIFEST.MF (revision 3896)
15 15
 org.jodconverter.local.office,
16 16
 org.jodconverter.local.office.utils,
17 17
 org.jodconverter.local.process,
18
 org.jodconverter.local.task
18
 org.jodconverter.local.task,
19
 org.txm.libs.jodconverter
19 20
Require-Bundle: org.txm.libs.gson;bundle-version="2.8.6"
20 21
Bundle-Vendor: JodConverter
21 22
Automatic-Module-Name: org.txm.libs.jodconverter
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/text/Text2TXTMacro.groovy (revision 3896)
1 1
package org.txm.macro.text;
2 2
// STANDARD DECLARATIONS
3 3

  
4
import org.kohsuke.args4j.*
5 4
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.txm.doc.*;
8
import org.txm.importer.*;
5
import org.txm.libs.jodconverter.ConvertDocument
9 6

  
10 7
// BEGINNING OF PARAMETERS
11 8
@Field @Option(name="inputDirectory", usage="the directory containing the DOC/ODT/RTF files to convert", widget="Folder", required=true, def="")
......
19 16

  
20 17
boolean debug = false;
21 18

  
22
ConvertDocument converter;
23 19
def files = []
24 20
try {
25
	converter = new ConvertDocument();
26 21
	inputDirectory.eachFileMatch(~/.+\.$extension/) { docFile ->
27 22
		String name = docFile.getName()
28 23
		name = name.substring(0, name.lastIndexOf("."))
29 24
		def txtFile = new File(docFile.getParentFile(), name+".txt")
30
		converter.autoFile(docFile, txtFile, "txt")
25
		ConvertDocument.convert(docFile, txtFile)
31 26
		files << docFile
32 27
	}
33 28
} catch(Exception e) {
34 29
	println "Error while processing directory: "+e;
35 30
	if (debug) e.printStackTrace();
36 31
}
37
if (converter != null) converter.stop();
38 32

  
39 33
println "Processed directory: $inputDirectory"
40 34
println "files: "+files
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/text/SetOfficeMacro.groovy (revision 3896)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.office
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.txm.libs.jodconverter.ConvertDocument
8

  
9
// BEGINNING OF PARAMETERS
10

  
11
@Field @Option(name="office_path", usage="Path to LibreOffice or OpenOffice installation directory", widget="Folder", required=false, def="libreoffice or openoffice install directory")
12
def office_path
13

  
14
// Open the parameters input dialog box
15
if (!ParametersDialog.open(this)) return;
16

  
17
// END OF PARAMETERS
18

  
19
if (office_path == null) {
20
	println "No path to office directory given."
21
	return
22
}
23

  
24
if (!office_path.exists()) {
25
	println "'$office_path' directory not found."
26
	return
27
}
28

  
29
if (!office_path.isDirectory()) {
30
	println "'$office_path' exists but is not a directory."
31
	return
32
}
33

  
34
if (!office_path.canExecute()) {
35
	println "'$office_path' exists but has not sufficent rights to be used."
36
	return
37
}
38

  
39
def old = System.getProperty("office.home")
40
ConvertDocument.setOfficeHome(office_path.getAbsolutePath())
41
println "Office path set to '"+System.getProperty("office.home")+"'."
42
if (old != null) {
43
	println "	Previous path was '${old}'."
44
}
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/export/office/SetOfficeMacro.groovy (revision 3896)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.office
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7

  
8
// BEGINNING OF PARAMETERS
9

  
10
@Field @Option(name="office_path", usage="Path to LibreOffice or OpenOffice installation directory", widget="Folder", required=false, def="libreoffice or openoffice install directory")
11
def office_path
12

  
13
// Open the parameters input dialog box
14
if (!ParametersDialog.open(this)) return;
15

  
16
// END OF PARAMETERS
17

  
18
if (office_path == null) {
19
	println "No path to office directory given."
20
	return
21
}
22

  
23
if (!office_path.exists()) {
24
	println "'$office_path' directory not found."
25
	return
26
}
27

  
28
if (!office_path.isDirectory()) {
29
	println "'$office_path' exists but is not a directory."
30
	return
31
}
32

  
33
if (!office_path.canExecute()) {
34
	println "'$office_path' exists but has not sufficent rights to be used."
35
	return
36
}
37

  
38
def old = System.getProperty("office.home")
39
System.setProperty("office.home", office_path.getAbsolutePath())
40
println "Office path set to '"+System.getProperty("office.home")+"'."
41
if (old != null) {
42
	println "	Previous path was '${old}'."
43
}
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Transana2TRS.groovy (revision 3896)
58 58
import java.text.ParseException;
59 59
import java.text.SimpleDateFormat;
60 60
import java.util.Date;
61
import org.txm.libs.jodconverter.ConvertDocument
61 62

  
62 63
class Transana2TRS {
64
	
63 65
	boolean debug = false;
64 66
	boolean isDirectory = false;
65 67
	File outDir;
66 68
	File dtd;
67
	ConvertDocument converter;
68 69
	def monitor
69 70

  
70 71
	Transana2TRS(File outDir, File dtd, boolean debug, def monitor) {
......
92 93
		boolean ret = true
93 94
		try {
94 95
			println "* Processing $dir directory"
95
			converter = new ConvertDocument();
96 96
			def files = dir.listFiles()
97 97
			def okfiles = [];
98 98
			if (files != null)
......
126 126
			println "Error while processing directory: "+e;
127 127
			if (debug) e.printStackTrace();
128 128
		}
129
		if (converter != null) converter.stop();
130 129
		return ret;
131 130
	}
132 131

  
......
202 201
	boolean DOCtoHTML(File docFile, File htmlFile) {
203 202
		println "*** ODT -> HTML"
204 203
		try {
205
			if (!isDirectory) converter = new ConvertDocument();
206
			converter.setDebug(debug)
207
			converter.autoFile(docFile, htmlFile, "html")
204
			ConvertDocument.convert(docFile, htmlFile)
208 205
		} catch(Exception e) {
209 206
			println "Error while converting $docFile : $e"
210 207
			if (debug) e.printStackTrace()
211 208
		} finally {
212
			if (!isDirectory && converter != null) converter.stop()
213 209
		}
214 210
		return htmlFile.exists() && htmlFile.length() > 0
215 211
	}
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/TextTranscription2TRS.groovy (revision 3896)
36 36

  
37 37
import org.txm.scripts.importer.*;
38 38
import org.xml.sax.Attributes;
39
import org.txm.importer.ConvertDocument;
40 39
import org.txm.importer.scripts.filters.*;
41 40

  
42 41
import java.io.File;
......
56 55
import java.text.ParseException;
57 56
import java.text.SimpleDateFormat;
58 57
import java.util.Date;
58
import org.txm.libs.jodconverter.ConvertDocument
59

  
59 60
/**
60 61
 * BUGS:
61 62
 * - n'a pas repéré les timings autour des commentaires
......
68 69
	boolean debug = false;
69 70
	boolean isDirectory = false;
70 71
	File outDir;
71
	ConvertDocument converter;
72 72
	def monitor
73 73

  
74 74
	TextTranscription2TRS(File outDir, boolean debug, def monitor) {
......
95 95
		boolean ret = true
96 96
		try {
97 97
			println "* Processing $dir directory"
98
			converter = new ConvertDocument();
99 98
			def files = dir.listFiles()
100 99
			def okfiles = [];
101 100
			if (files != null)
......
129 128
			println "Error while processing directory: "+e;
130 129
			if (debug) e.printStackTrace();
131 130
		}
132
		if (converter != null) converter.stop();
133 131
		return ret;
134 132
	}
135 133

  
......
198 196
	boolean DOCtoTXT(File docFile, File txtFile) {
199 197
		println "*** ODT -> TXT"
200 198
		try {
201
			if (!isDirectory) converter = new ConvertDocument();
202
			converter.setDebug(debug)
203
			converter.autoFile(docFile, txtFile, "txt")
199
			ConvertDocument.convert(docFile, txtFile)
204 200
		} catch(Exception e) {
205 201
			println "Error while converting $docFile : $e"
206 202
			if (debug) e.printStackTrace()
207
		} finally {
208
			if (!isDirectory && converter != null) converter.stop()
209 203
		}
210 204
		return txtFile.exists() && txtFile.length() > 0
211 205
	}
212 206

  
213
//	boolean HTMLtoHTMLforTidy(File htmlFile, File htmlFile2) {
214
//		println "*** HTML -> HTML for tidy"
215
//		try {
216
//			println "replace TABS with 4 spaces"
217
//			String text2 = htmlFile.getText("UTF-8")
218
//			text2 = text2.replaceAll("&nbsp;", " ")
219
//			text2 = text2.replaceAll("’", "'")
220
//			text2 = text2.replaceAll("&rsquo;", "'")
221
//			text2 = text2.replaceAll("\t", "&nbsp;&nbsp;&nbsp;&nbsp;")
222
//			text2 = text2.replaceAll("\n", '&nbsp;')
223
//			text2 = text2.replaceAll("\r\n", '&nbsp;')
224
//			htmlFile2.withWriter("UTF-8") { writer ->
225
//				writer.write(text2);
226
//			}
227
//		} catch(Exception e) {
228
//			println "Error while preparing HTML of $htmlFile : $e"
229
//			if (debug) e.printStackTrace()
230
//		}
231
//		return htmlFile2.exists() && htmlFile2.length() > 0
232
//	}
233
//
234
//	boolean HTMLtoXHTML(File htmlFile2, File xhtmlFile) {
235
//		println "*** HTML for tidy -> XHTML"
236
//		try {
237
//			Tidy tidy = new Tidy(); // obtain a new Tidy instance
238
//			tidy.setXHTML(true); // set desired config options using tidy setters
239
//			tidy.setInputEncoding("UTF-8")
240
//			tidy.setOutputEncoding("UTF-8")
241
//			tidy.setShowErrors(100)
242
//			tidy.setShowWarnings(debug)
243
//			tidy.setTabsize(10)
244
//			tidy.setWraplen(9999)
245
//			tidy.setForceOutput(true) // Tidy won't stop if error are found
246
//			xhtmlFile.withWriter("UTF-8") { out ->
247
//				def input = new InputStreamReader(htmlFile2.toURI().toURL().newInputStream(), "UTF-8")
248
//				tidy.parse(input, out); // run tidy, providing an input and output stream
249
//			}
250
//			if (xhtmlFile.exists()) {
251
//				// JTidy produced a "0x0" char. removing them
252
//				// fix separated < and / ???
253
//				def c = Character.toChars(0)[0]
254
//				String txttmp = xhtmlFile.getText("UTF-8");
255
//				xhtmlFile.withWriter("UTF-8") { out ->
256
//					out.write(txttmp.replace("<\n/", "</").replace("<\r\n/", "</"))
257
//				}
258
//			}
259
//		} catch(Exception e) {
260
//			println "Error while applying JTidy: "+e
261
//			if (debug) e.printStackTrace()
262
//		}
263
//		return xhtmlFile.exists() && xhtmlFile.length() > 0
264
//	}
265

  
266 207
	boolean TXTtoTRS(File txtFile, File trsFile) {
267 208
		println "*** TXT -> TRS"
268 209
		try {
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/scripts/clix/treetagger-wrapper-definition.xml (revision 3896)
1
<?xml version="1.0">
1
<?xml version="1.0"?>
2 2
<application name="TreeTagger" version="0.0.0" desc="Tag files">
3 3
	<progs>
4 4
		<prog exec="tree-tagger" version="0.4.6" desc="Tag">
......
10 10
				<arg state="optional" type="none" name="prob" desc="Print tag probabilities"/>
11 11
				<arg state="optional" type="none" name="ignore-prefix" desc=" Ignore prefix when guessing pos for unknown words"/>
12 12
				<arg state="optional" type="none" name="no-unknown" desc="Print the token rather than [unknown] for unknown lemma"/>
13
				<arg state="optional" type="none" name="cap-heuristics" desc="Look up unknown capitalized words in the list of lower-case words"/>
14 13
				<arg state="optional" type="none" name="hyphen-heuristics" desc="Turn on the heuristics fur guessing the parts of speech of unknown hyphenated words"/>
15 14
				<arg state="optional" type="none" name="quiet" desc="quiet mode"/>
16 15
				<arg state="optional" type="none" name="pt-with-prob" desc="pretagging with probabilities"/>
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/doc/docLoader.groovy (revision 3896)
133 133
	if (infile.isDirectory()) {
134 134
		File unzipDir = new File(docfiles, infile.getName())
135 135
		infile.renameTo(unzipDir)
136

  
136
		
137 137
		//println "zipdir "+unzipDir
138 138
		StylesToCSS converter = new StylesToCSS(unzipDir);
139 139
		if (!converter.process(new File(unzipDir, "style.css"))) {
140 140
			println "WARNING: Failed to build css file of $unzipDir"
141 141
		}
142 142
		// and get the soft page breaks and styles parents
143
		def parentStyles = converter.parentStyles;
143
		def parentStyles = converter.parentStyles
144 144
		def beforebreaks = converter.beforebreaks
145 145
		def afterbreaks = converter.afterbreaks
146

  
146
		
147 147
		//println "BEFORES: "+beforebreaks
148 148
		//println "AFTERS: "+afterbreaks
149 149
		//println "PARENTS: "+parentStyles
150

  
150
		
151 151
		// se servir de ça pour insérer <pb/> et remplacer styles automatiques
152 152
		File xmlFile = new File(txmDir, unzipDir.getName().substring(6))
153 153
		//println "PATCH : $xmlFile"
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/scripts/doc/DocumentToTei.groovy (revision 3896)
30 30
import java.io.File;
31 31

  
32 32
import org.txm.importer.ApplyXsl2;
33
import org.txm.importer.ConvertDocument;
34 33
import org.txm.utils.zip.Zip
35 34
import org.txm.utils.FileUtils
35
import org.txm.libs.jodconverter.ConvertDocument
36 36

  
37 37
/**
38 38
 * The Class DocumentToTei.
......
49 49
		if (!xsldir.exists()) { println "XslDir does not exists: "+xsldir; return false;}
50 50
		if (!xslOdtTei.exists()) { println "xslOdtTei file does not exists: "+xslOdtTei; return false;}
51 51

  
52
		ConvertDocument converter;
53 52
		for (File file : files) {
54 53
			print "."
55 54
			String name = file.getName();
......
69 68
				File odtFile = File.createTempFile("workflowdocx", "sfsdf.odt", outdir);
70 69
				
71 70
				try {
72
					converter = new ConvertDocument();
73
					converter.setDebug(DEBUG)
74
					converter.autoFile(file, odtFile, "odt")
71
					ConvertDocument.convert(file, odtFile)
75 72
					if (!new DocumentToTei().run(odtFile, xslOdtTei, teifile)) {
76 73
						println "Docx to Odt to Tei failed: "+file
77 74
						odtFile.delete();
......
79 76
				} catch(Exception e) { println "DOCX to ODT to TEI failed: $file: $e"; }
80 77
				finally { 
81 78
					odtFile.delete();
82
					if (converter != null) converter.stop();
83 79
				}
84 80
				
85 81
			} else if (FileUtils.isExtension(file, "doc")) {
......
88 84
				File odtFile = File.createTempFile("workflowdoc", "sfsdf.odt", outdir);
89 85
				
90 86
				try {
91
					converter = new ConvertDocument();
92
					converter.setDebug(DEBUG)
93
					converter.autoFile(file, odtFile, "odt")
87
					ConvertDocument.convert(file, odtFile)
94 88
					if (!new DocumentToTei().run(odtFile, xslOdtTei, teifile)) {
95 89
						println "DOC to ODT to TEI failed: "+file
96 90
						odtFile.delete();
......
98 92
				} catch(Exception e) { println "DOC to ODT to TEI failed: $file: $e"; }
99 93
				finally {
100 94
					odtFile.delete();
101
					if (converter != null) converter.stop();
102 95
				}
103
				
104 96
			}
105 97
		}
106 98

  
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/scripts/doc/Workflow.groovy (revision 3896)
32 32
//
33 33
package org.txm.scripts.doc
34 34

  
35
import org.txm.importer.ConvertDocument;
35
import org.txm.libs.jodconverter.ConvertDocument
36 36

  
37 37
// TODO: Auto-generated Javadoc
38 38
/* (non-Javadoc)
......
55 55

  
56 56
println "start"
57 57
for (File file : srcdir.listFiles()) {
58
	ConvertDocument converter = new ConvertDocument();
59 58
	try {
60 59
		File teifile = new File(outdir, file.getName()+".xml");
61 60
		if (file.getName().endsWith(".odt")) {
......
66 65
		} else if (file.getName().endsWith(".docx")) {
67 66
			File odtFile = File.createTempFile("workflowodt", "sfsdf.odt", srcdir);
68 67
			// convert doc to odt
69
			converter.autoFile(file, odtFile, "odt")
68
			ConvertDocument.convert(file, odtFile)
70 69
			if (!new DocumentToTei().run(odtFile, xslOdtTei, teifile)) {
71 70
				println "Docx to Odt to Tei failed: "+file
72 71
				odtFile.delete();
......
76 75
		} else if (file.getName().endsWith(".doc")) {
77 76
			File odtFile = File.createTempFile("workflowodt", "sfsdf.odt", srcdir);
78 77
			// convert doc to odt
79
			converter.autoFile(file, odtFile, "odt")
78
			ConvertDocument.convert(file, odtFile)
80 79
			if (!new DocumentToTei().run(odtFile, xslOdtTei, teifile)) {
81 80
				println "Doc to Odt to Tei failed: "+file
82 81
				odtFile.delete();
......
86 85
		}
87 86
	}
88 87
	catch(Exception e) { e.printStackTrace() }
89
	finally { converter.stop(); }
88
	finally { }
90 89
}
91 90

  
92 91
println "--Done"
TXM/trunk/bundles/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/preferences/TreeTaggerPreferencePage.java (revision 3896)
71 71
		runOptions.setLayout(new GridLayout(3, false));
72 72
		
73 73
		this.addField(new BooleanFieldEditor(TreeTaggerPreferences.OPTIONS_UNKNOWN, "Print the token rather than <unknown> for unknown lemma", runOptions));
74
		this.addField(new BooleanFieldEditor(TreeTaggerPreferences.OPTIONS_CAPHEURISTIC, "Look up unknown capitalized words in the list of lower-case words", runOptions));
75 74
		this.addField(new BooleanFieldEditor(TreeTaggerPreferences.OPTIONS_HYPHENHEURISTIC, "Turn on the heuristics fur guessing the parts of speech of unknown hyphenated words", runOptions));
76 75
		this.addField(new BooleanFieldEditor(TreeTaggerPreferences.OPTIONS_PROB, "Print tag probabilities", runOptions));
77 76
		this.addField(new FileFieldEditor(TreeTaggerPreferences.OPTIONS_LEX, "Read auxiliary lexicon entries from a file", runOptions));
TXM/trunk/bundles/org.txm.rcp/src/main/java/org/txm/rcp/corpuswizard/ImportWizard.java (revision 3896)
105 105
					project.setAnnotate(e.isRunning());
106 106
				}
107 107
				
108
				if (project.getAnnotate()) {
109
					Log.info(TXMUIMessages.TheAnnotateImportParameterHasBeenActivatedSinceTreeTaggerIsInstalled);
110
				}
111
				else {
112
					Log.info(TXMUIMessages.TheAnnotateImportParameterWasNotActivatedSinceTreeTaggerIsNotInstalled);
113
				}
108
//				if (project.getAnnotate()) {
109
//					Log.info(TXMUIMessages.TheAnnotateImportParameterHasBeenActivatedSinceTreeTaggerIsInstalled);
110
//				}
111
//				else {
112
//					Log.info(TXMUIMessages.TheAnnotateImportParameterWasNotActivatedSinceTreeTaggerIsNotInstalled);
113
//				}
114 114
				
115 115
				File importxml = new File(path, "import.xml"); //$NON-NLS-1$
116 116
				if (importxml.exists()) {
......
132 132
//					Log.info(TXMUIMessages.abort);
133 133
//					return false;
134 134
//				}
135
				
136
				
137 135
			}
138 136
			
139 137
			// if (!project.hasEditionDefinition("default")) {
TXM/trunk/bundles/org.txm.utils.core/src/org/txm/utils/treetagger/TreeTagger.java (revision 3896)
217 217
		this.isnounknown = false;
218 218
	}
219 219

  
220
	// Look up unknown capitalized words in the list of lower-case words
221
	/** The iscapheuristics. */
222
	private Boolean iscapheuristics = false;
223

  
224
	/**
225
	 * Setcapheuristics.
226
	 */
227
	public void setcapheuristics() {
228
		this.iscapheuristics = true;
229
	}
230

  
231
	/**
232
	 * Unsetcapheuristics.
233
	 */
234
	public void unsetcapheuristics() {
235
		this.iscapheuristics = false;
236
	}
237

  
238 220
	// Turn on the heuristics fur guessing the parts of speech of unknown
239 221
	// hyphenated words
240 222
	/** The ishyphenheuristics. */
......
481 463
			args.add("-ignore-prefix"); //$NON-NLS-1$
482 464
		if (isnounknown)
483 465
			args.add("-no-unknown"); //$NON-NLS-1$
484
		if (iscapheuristics)
485
			args.add("-cap-heuristics"); //$NON-NLS-1$
486 466
		if (ishyphenheuristics)
487 467
			args.add("-hyphen-heuristics"); //$NON-NLS-1$
488 468
		if (isquiet)
TXM/trunk/features/org.txm.core.feature/feature.xml (revision 3896)
192 192
         version="0.0.0"
193 193
         unpack="false"/>
194 194

  
195
   <plugin
196
         id="org.txm.libs.jodconverter"
197
         download-size="0"
198
         install-size="0"
199
         version="0.0.0"
200
         unpack="false"/>
201

  
195 202
</feature>

Formats disponibles : Unified diff