Révision 3536

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/pdf/PDF2PNGMacro.groovy (revision 3536)
1
package org.txm.macro.pdf
2
// STANDARD DECLARATIONS
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.apache.pdfbox.Loader
8
import org.apache.pdfbox.rendering.PDFRenderer
9
import org.apache.pdfbox.tools.imageio.ImageIOUtil
10
import org.apache.pdfbox.rendering.ImageType
11

  
12
// BEGINNING OF PARAMETERS
13

  
14
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="")
15
def input_file
16

  
17
@Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="")
18
def input_dir
19

  
20

  
21
// Open the parameters input dialog box
22
if (!ParametersDialog.open(this)) return
23

  
24
// END OF PARAMETERS
25

  
26
if (input_dir != null &&  input_dir.exists()) {
27
	nFiles = 0
28
	input_dir.eachFileMatch(~/.*.pdf/) { f ->
29
		name = f.getName()
30
		idx = name.lastIndexOf(".")
31
		if (idx > 0) name = name.substring(0, idx)
32
		dir = f.getParentFile()
33
		println "Processing "+name+"..."
34

  
35
		pdfFile = f.getAbsolutePath()
36
		if (pdfFile.toUpperCase().endsWith(".PDF")) {
37
			textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
38
			try {
39
				doc = Loader.loadPDF(f)
40
				pdfRenderer = new PDFRenderer(doc)
41
				for (page = 0; page < doc.getNumberOfPages(); ++page) {
42
    					bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB)
43
    					fos = new FileOutputStream(new File(dir, name + "-" + (page+1) + ".png"))
44
					ImageIOUtil.writeImage(bim, "png", fos, 300)
45
				}
46
				fos.close()
47
				doc.close()
48
				nFiles++
49
			} catch (Exception e) {
50
				e.printStackTrace()
51
			}
52
		}
53
	}
54
	println "Processed "+nFiles+" files."
55
} else {
56
	name = input_file.getName()
57
	dir = input_file.getParentFile()
58
	idx = name.lastIndexOf(".")
59
	if (idx > 0) name = name.substring(0, idx)
60

  
61
	pdfFile = input_file.getAbsolutePath()
62
	if (pdfFile.toUpperCase().endsWith(".PDF")) {
63
		textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
64
		try {
65
			doc = Loader.loadPDF(input_file)
66
			pdfRenderer = new PDFRenderer(doc)
67
			for (page = 0; page < doc.getNumberOfPages(); ++page) {
68
    				bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB)
69
    				fos = new FileOutputStream(new File(dir, name + "-" + (page+1) + ".png"))
70
				ImageIOUtil.writeImage(bim, "png", fos, 300)
71
			}
72
			fos.close()
73
			doc.close()
74
		} catch (Exception e) {
75
			e.printStackTrace()
76
		}
77
	}
78
}
79

  
80

  
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/pdf/PDFPropertiesMacro.groovy (revision 3536)
1
package org.txm.macro.pdf
2
// STANDARD DECLARATIONS
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.apache.pdfbox.Loader
8
import org.apache.pdfbox.text.PDFTextStripper
9

  
10
// BEGINNING OF PARAMETERS
11

  
12
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="")
13
def input_file
14

  
15
@Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="")
16
def input_dir
17

  
18

  
19
// Open the parameters input dialog box
20
if (!ParametersDialog.open(this)) return
21

  
22
// END OF PARAMETERS
23

  
24
if (input_dir != null &&  input_dir.exists()) {
25
	nFiles = 0
26
	println "File\tAuthor\tTitle\tSubject\tCreator\tProducer\tCreation date\tModification date\tKeywords\tPages"
27
	input_dir.eachFileMatch(~/.*.pdf/) { f ->
28
		pdfFile = f.getAbsolutePath()
29
		if (pdfFile.toUpperCase().endsWith(".PDF")) {
30
			try {
31
				doc = Loader.loadPDF(f)
32
				pdd = doc.getDocumentInformation()
33
      				print "\t\""+ ((pdd.getAuthor()==null)?"":pdd.getAuthor())+"\""
34
      				print "\t\""+ ((pdd.getTitle()==null)?"":pdd.getTitle())+"\""
35
      				print "\t\""+ ((pdd.getSubject()==null)?"":pdd.getSubject())+"\""
36
      				print "\t\""+ ((pdd.getCreator()==null)?"":pdd.getCreator())+"\""
37
      				print "\t\""+ ((pdd.getProducer()==null)?"":pdd.getProducer())+"\""
38
      				print "\t"+ ((pdd.getCreationDate()==null)?"":pdd.getCreationDate().toZonedDateTime().toInstant())
39
      				print "\t"+ ((pdd.getModificationDate()==null)?"":pdd.getModificationDate().toZonedDateTime().toInstant())
40
      				print "\t\""+ ((pdd.getKeywords()==null)?"":pdd.getKeywords())+"\""
41
      				println "\t"+ doc.getNumberOfPages()
42
      				
43
      				doc.close()
44
      				
45
				nFiles++
46

  
47
			} catch (Exception e) {
48
			// e.printStackTrace()
49
			}
50
		}
51
	}
52
	println "Processed "+nFiles+" files."
53
} else {
54
	pdfFile = input_file.getAbsolutePath()
55
	if (pdfFile.toUpperCase().endsWith(".PDF")) {
56
		try {
57
			doc = Loader.loadPDF(input_file)
58
			pdd = doc.getDocumentInformation()
59

  
60
      			println "Author: "+((pdd.getAuthor()==null)?"":pdd.getAuthor())
61
      			println "Title: "+ ((pdd.getTitle()==null)?"":pdd.getTitle())
62
      			println "Subject: "+ ((pdd.getSubject()==null)?"":pdd.getSubject())
63
      			println "Creator: "+ ((pdd.getCreator()==null)?"":pdd.getCreator())
64
      			println "Producer: "+ ((pdd.getProducer()==null)?"":pdd.getProducer())
65
      			println "Creation date: "+ ((pdd.getCreationDate()==null)?"":pdd.getCreationDate().toZonedDateTime().toInstant())
66
      			println "Modification date: "+ ((pdd.getModificationDate()==null)?"":pdd.getModificationDate().toZonedDateTime().toInstant())
67
      			println "Keywords: "+ ((pdd.getKeywords()==null)?"":pdd.getKeywords())
68
      			println "Pages: "+ doc.getNumberOfPages()
69
			
70
			doc.close()
71
		} catch (Exception e) {
72
			// e.printStackTrace()
73
		}
74
	}
75
}
76

  
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/pdf/PDF2TXTMacro.groovy (revision 3536)
7 7
import org.apache.pdfbox.Loader
8 8
import org.apache.pdfbox.text.PDFTextStripper
9 9

  
10
// WARNING THIS MACRO NEEDS THE INSTALLATION OF THE PDFBOX (https://pdfbox.apache.org) Java library: https://dlcdn.apache.org/pdfbox/3.0.0-alpha3/pdfbox-app-3.0.0-alpha3.jar (standalone library)
11

  
12 10
// BEGINNING OF PARAMETERS
13 11

  
14
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=true, def="")
15
		def input_file
12
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="")
13
def input_file
16 14

  
17 15
@Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="")
18
		def input_dir
16
def input_dir
19 17

  
18

  
20 19
// Open the parameters input dialog box
21 20
if (!ParametersDialog.open(this)) return
22 21

  
23
	// END OF PARAMETERS
22
// END OF PARAMETERS
24 23

  
25
	if (input_dir != null && input_dir.exists() && input_dir.isDirectory()) {
26
		
27
		nFiles = 0
28
		input_dir.eachFileMatch(~/.*.pdf/) { f ->
29
			name = f.getName()
30
			println "Processing $name..."
31
			idx = name.lastIndexOf(".")
32
			if (idx > 0) name = name.substring(0, idx)
33
			outputFile = new File(f.getParentFile(), name + ".txt")
34
			
35
			pdfFile = f.getAbsolutePath()
36
			if (pdfFile.toUpperCase().endsWith(".PDF")) {
37
				textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
38
				try {
39
					outputFile.withWriter("UTF-8") { writer ->
40
						doc = Loader.loadPDF(f)
41
						strip = new PDFTextStripper()
42
						writer.print strip.getText(doc)
43
						doc.close()
44
						writer.close()
45
						nFiles++
46
					}
47
				} catch (Exception e) {
48
					e.printStackTrace()
49
				}
50
			}
51
		}
52
		println "Done processed $nFiles files."
53
	} else if (input_file != null && input_file.exists() && input_file.isFile()) {
54
		
55
		name = input_file.getName()
24
if (input_dir != null &&  input_dir.exists()) {
25
	nFiles = 0
26
	input_dir.eachFileMatch(~/.*.pdf/) { f ->
27
		name = f.getName()
28
		println "Processing "+name+"..."
56 29
		idx = name.lastIndexOf(".")
57 30
		if (idx > 0) name = name.substring(0, idx)
58
		outputFile = new File(input_file.getParentFile(), name + ".txt")
59
		
60
		pdfFile = input_file.getAbsolutePath()
31
		outputFile = new File(f.getParentFile(), name + ".txt")
32

  
33
		pdfFile = f.getAbsolutePath()
61 34
		if (pdfFile.toUpperCase().endsWith(".PDF")) {
62 35
			textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
63 36
			try {
64 37
				outputFile.withWriter("UTF-8") { writer ->
65
					doc = Loader.loadPDF(input_file)
66
					strip = new PDFTextStripper()
67
					writer.print strip.getText(doc)
68
					doc.close()
69
					writer.close()
38
				doc = Loader.loadPDF(f)
39
				strip = new PDFTextStripper()
40
				strip.setSortByPosition(true)
41
				strip.setPageStart("<pb/>")
42
				// strip.setParagraphStart("\n<p>")
43
				// strip.setParagraphEnd("</p>")
44
				writer.print strip.getText(doc)
45
				doc.close()
46
				writer.close()
47
				nFiles++
70 48
				}
71 49
			} catch (Exception e) {
72
				e.printStackTrace()
50
			e.printStackTrace()
73 51
			}
74 52
		}
75 53
	}
54
	println "Processed "+nFiles+" files."
55
} else {
56
	name = input_file.getName()
57
	idx = name.lastIndexOf(".")
58
	if (idx > 0) name = name.substring(0, idx)
59
	outputFile = new File(input_file.getParentFile(), name + ".txt")
60

  
61
	pdfFile = input_file.getAbsolutePath()
62
	if (pdfFile.toUpperCase().endsWith(".PDF")) {
63
		textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
64
		try {
65
			outputFile.withWriter("UTF-8") { writer ->
66
			doc = Loader.loadPDF(input_file)
67
			strip = new PDFTextStripper()
68
			println "SpacingTolerance = "+strip.getSpacingTolerance()
69
			// Set the space width-based tolerance value that is used to estimate where spaces in text should be added. Note that the default value for this has been determined from trial and error. Setting this value larger will reduce the number of spaces added.
70
			println "AverageCharTolerance = "+strip.getAverageCharTolerance()
71
			// Set the character width-based tolerance value that is used to estimate where spaces in text should be added. Note that the default value for this has been determined from trial and error. Setting this value larger will reduce the number of spaces added.
72
			println "IndentThreshold = "+strip.getIndentThreshold()
73
			// sets the multiple of whitespace character widths for the current text which the current line start can be indented from the previous line start beyond which the current line start is considered to be a paragraph start. The default value is 2.0.
74
			println "DropThreshold = "+strip.getDropThreshold()
75
			// sets the minimum whitespace, as a multiple of the max height of the current characters beyond which the current line start is considered to be a paragraph start. The default value is 2.5.
76
			strip.setSortByPosition(true)
77
			strip.setPageStart("<pb/>")
78
			// strip.setParagraphStart("\n<p>")
79
			// strip.setParagraphEnd("</p>")
80
			writer.print strip.getText(doc)
81
			doc.close()
82
			writer.close()
83
			}
84
		} catch (Exception e) {
85
			e.printStackTrace()
86
		}
87
	}
88
}
89

  
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/AddMacroMacro.groovy (revision 3536)
1
// Copyright © 2022 MY_INSTITUTION
2
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html)
3
// @author mdecorde
4

  
5
// STANDARD DECLARATIONS
6
package org.txm.macro
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.Toolbox;
12
import org.txm.utils.io.FileCopy
13
import org.txm.rcp.views.fileexplorer.MacroExplorer
14

  
15
// BEGINNING OF PARAMETERS
16

  
17
@Field @Option(name="files", usage="an example file", widget="FilesOpen", metaVar="*.groovy;*.jar", required=true, def="")
18
def files
19

  
20
// Open the parameters input dialog box
21
if (!ParametersDialog.open(this)) return
22

  
23
// END OF PARAMETERS
24

  
25
File rootDir = new File(Toolbox.getTxmHomePath(), "scripts/groovy/")
26

  
27
for (File f : files) {
28

  
29
	if (!f.isFile()) continue;
30
	
31
	if (f.getName().endsWith(".jar")) {
32
	
33
		File f2 = new File(rootDir, "lib/"+f.getName())
34
		if (f2.exists()) {
35
			println "Updating library $f2"
36
			f2.delete()
37
		} else {
38
			println "New library: $f2"
39
		}
40
		FileCopy.copy(f, f2)
41
		
42
	} else if (f.getName().endsWith(".groovy")) {
43
	
44
		String ppackage = "org.txm.macro"
45
		def content = f.readLines("UTF-8")
46
		for (def line : content) {
47
			if (line.startsWith("package org.txm.macro")) {
48
				ppackage = line.substring(8)
49
				break
50
			}
51
		}
52
		
53
		File f2 = new File(rootDir, "user/"+ppackage.replace(".", "/")+ "/"+ f.getName())
54
		if (f2.exists()) {
55
			println "Update macro: $f2"
56
			f2.delete()
57
		} else {
58
			println "New macro: $f2"
59
		}
60
		f2.getParentFile().mkdirs()
61
		
62
		FileCopy.copy(f, f2)
63
		
64
	} else {
65
		println "Ignoring $f"
66
	}
67
}
68

  
69
// update macros view
70
monitor.syncExec(new Runnable() {
71
		public void run() {
72
			MacroExplorer.refresh();
73
		}
74
	});
TXM/trunk/org.txm.groovy.core/src/java/org/txm/groovy/core/GSERunner.java (revision 3536)
8 8
import java.util.HashSet;
9 9
import java.util.Map;
10 10

  
11
import org.apache.commons.lang.StringUtils;
12 11
import org.codehaus.groovy.control.CompilerConfiguration;
13 12
import org.codehaus.groovy.control.customizers.ImportCustomizer;
14 13
import org.eclipse.core.internal.runtime.InternalPlatform;

Formats disponibles : Unified diff