Révision 3536
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/pdf/PDF2PNGMacro.groovy (revision 3536) | ||
---|---|---|
1 |
package org.txm.macro.pdf |
|
2 |
// STANDARD DECLARATIONS |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.apache.pdfbox.Loader |
|
8 |
import org.apache.pdfbox.rendering.PDFRenderer |
|
9 |
import org.apache.pdfbox.tools.imageio.ImageIOUtil |
|
10 |
import org.apache.pdfbox.rendering.ImageType |
|
11 |
|
|
12 |
// BEGINNING OF PARAMETERS |
|
13 |
|
|
14 |
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="") |
|
15 |
def input_file |
|
16 |
|
|
17 |
@Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="") |
|
18 |
def input_dir |
|
19 |
|
|
20 |
|
|
21 |
// Open the parameters input dialog box |
|
22 |
if (!ParametersDialog.open(this)) return |
|
23 |
|
|
24 |
// END OF PARAMETERS |
|
25 |
|
|
26 |
if (input_dir != null && input_dir.exists()) { |
|
27 |
nFiles = 0 |
|
28 |
input_dir.eachFileMatch(~/.*.pdf/) { f -> |
|
29 |
name = f.getName() |
|
30 |
idx = name.lastIndexOf(".") |
|
31 |
if (idx > 0) name = name.substring(0, idx) |
|
32 |
dir = f.getParentFile() |
|
33 |
println "Processing "+name+"..." |
|
34 |
|
|
35 |
pdfFile = f.getAbsolutePath() |
|
36 |
if (pdfFile.toUpperCase().endsWith(".PDF")) { |
|
37 |
textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt" |
|
38 |
try { |
|
39 |
doc = Loader.loadPDF(f) |
|
40 |
pdfRenderer = new PDFRenderer(doc) |
|
41 |
for (page = 0; page < doc.getNumberOfPages(); ++page) { |
|
42 |
bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB) |
|
43 |
fos = new FileOutputStream(new File(dir, name + "-" + (page+1) + ".png")) |
|
44 |
ImageIOUtil.writeImage(bim, "png", fos, 300) |
|
45 |
} |
|
46 |
fos.close() |
|
47 |
doc.close() |
|
48 |
nFiles++ |
|
49 |
} catch (Exception e) { |
|
50 |
e.printStackTrace() |
|
51 |
} |
|
52 |
} |
|
53 |
} |
|
54 |
println "Processed "+nFiles+" files." |
|
55 |
} else { |
|
56 |
name = input_file.getName() |
|
57 |
dir = input_file.getParentFile() |
|
58 |
idx = name.lastIndexOf(".") |
|
59 |
if (idx > 0) name = name.substring(0, idx) |
|
60 |
|
|
61 |
pdfFile = input_file.getAbsolutePath() |
|
62 |
if (pdfFile.toUpperCase().endsWith(".PDF")) { |
|
63 |
textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt" |
|
64 |
try { |
|
65 |
doc = Loader.loadPDF(input_file) |
|
66 |
pdfRenderer = new PDFRenderer(doc) |
|
67 |
for (page = 0; page < doc.getNumberOfPages(); ++page) { |
|
68 |
bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB) |
|
69 |
fos = new FileOutputStream(new File(dir, name + "-" + (page+1) + ".png")) |
|
70 |
ImageIOUtil.writeImage(bim, "png", fos, 300) |
|
71 |
} |
|
72 |
fos.close() |
|
73 |
doc.close() |
|
74 |
} catch (Exception e) { |
|
75 |
e.printStackTrace() |
|
76 |
} |
|
77 |
} |
|
78 |
} |
|
79 |
|
|
80 |
|
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/pdf/PDFPropertiesMacro.groovy (revision 3536) | ||
---|---|---|
1 |
package org.txm.macro.pdf |
|
2 |
// STANDARD DECLARATIONS |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.apache.pdfbox.Loader |
|
8 |
import org.apache.pdfbox.text.PDFTextStripper |
|
9 |
|
|
10 |
// BEGINNING OF PARAMETERS |
|
11 |
|
|
12 |
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="") |
|
13 |
def input_file |
|
14 |
|
|
15 |
@Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="") |
|
16 |
def input_dir |
|
17 |
|
|
18 |
|
|
19 |
// Open the parameters input dialog box |
|
20 |
if (!ParametersDialog.open(this)) return |
|
21 |
|
|
22 |
// END OF PARAMETERS |
|
23 |
|
|
24 |
if (input_dir != null && input_dir.exists()) { |
|
25 |
nFiles = 0 |
|
26 |
println "File\tAuthor\tTitle\tSubject\tCreator\tProducer\tCreation date\tModification date\tKeywords\tPages" |
|
27 |
input_dir.eachFileMatch(~/.*.pdf/) { f -> |
|
28 |
pdfFile = f.getAbsolutePath() |
|
29 |
if (pdfFile.toUpperCase().endsWith(".PDF")) { |
|
30 |
try { |
|
31 |
doc = Loader.loadPDF(f) |
|
32 |
pdd = doc.getDocumentInformation() |
|
33 |
print "\t\""+ ((pdd.getAuthor()==null)?"":pdd.getAuthor())+"\"" |
|
34 |
print "\t\""+ ((pdd.getTitle()==null)?"":pdd.getTitle())+"\"" |
|
35 |
print "\t\""+ ((pdd.getSubject()==null)?"":pdd.getSubject())+"\"" |
|
36 |
print "\t\""+ ((pdd.getCreator()==null)?"":pdd.getCreator())+"\"" |
|
37 |
print "\t\""+ ((pdd.getProducer()==null)?"":pdd.getProducer())+"\"" |
|
38 |
print "\t"+ ((pdd.getCreationDate()==null)?"":pdd.getCreationDate().toZonedDateTime().toInstant()) |
|
39 |
print "\t"+ ((pdd.getModificationDate()==null)?"":pdd.getModificationDate().toZonedDateTime().toInstant()) |
|
40 |
print "\t\""+ ((pdd.getKeywords()==null)?"":pdd.getKeywords())+"\"" |
|
41 |
println "\t"+ doc.getNumberOfPages() |
|
42 |
|
|
43 |
doc.close() |
|
44 |
|
|
45 |
nFiles++ |
|
46 |
|
|
47 |
} catch (Exception e) { |
|
48 |
// e.printStackTrace() |
|
49 |
} |
|
50 |
} |
|
51 |
} |
|
52 |
println "Processed "+nFiles+" files." |
|
53 |
} else { |
|
54 |
pdfFile = input_file.getAbsolutePath() |
|
55 |
if (pdfFile.toUpperCase().endsWith(".PDF")) { |
|
56 |
try { |
|
57 |
doc = Loader.loadPDF(input_file) |
|
58 |
pdd = doc.getDocumentInformation() |
|
59 |
|
|
60 |
println "Author: "+((pdd.getAuthor()==null)?"":pdd.getAuthor()) |
|
61 |
println "Title: "+ ((pdd.getTitle()==null)?"":pdd.getTitle()) |
|
62 |
println "Subject: "+ ((pdd.getSubject()==null)?"":pdd.getSubject()) |
|
63 |
println "Creator: "+ ((pdd.getCreator()==null)?"":pdd.getCreator()) |
|
64 |
println "Producer: "+ ((pdd.getProducer()==null)?"":pdd.getProducer()) |
|
65 |
println "Creation date: "+ ((pdd.getCreationDate()==null)?"":pdd.getCreationDate().toZonedDateTime().toInstant()) |
|
66 |
println "Modification date: "+ ((pdd.getModificationDate()==null)?"":pdd.getModificationDate().toZonedDateTime().toInstant()) |
|
67 |
println "Keywords: "+ ((pdd.getKeywords()==null)?"":pdd.getKeywords()) |
|
68 |
println "Pages: "+ doc.getNumberOfPages() |
|
69 |
|
|
70 |
doc.close() |
|
71 |
} catch (Exception e) { |
|
72 |
// e.printStackTrace() |
|
73 |
} |
|
74 |
} |
|
75 |
} |
|
76 |
|
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/pdf/PDF2TXTMacro.groovy (revision 3536) | ||
---|---|---|
7 | 7 |
import org.apache.pdfbox.Loader |
8 | 8 |
import org.apache.pdfbox.text.PDFTextStripper |
9 | 9 |
|
10 |
// WARNING THIS MACRO NEEDS THE INSTALLATION OF THE PDFBOX (https://pdfbox.apache.org) Java library: https://dlcdn.apache.org/pdfbox/3.0.0-alpha3/pdfbox-app-3.0.0-alpha3.jar (standalone library) |
|
11 |
|
|
12 | 10 |
// BEGINNING OF PARAMETERS |
13 | 11 |
|
14 |
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=true, def="")
|
|
15 |
def input_file
|
|
12 |
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="")
|
|
13 |
def input_file |
|
16 | 14 |
|
17 | 15 |
@Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="") |
18 |
def input_dir
|
|
16 |
def input_dir |
|
19 | 17 |
|
18 |
|
|
20 | 19 |
// Open the parameters input dialog box |
21 | 20 |
if (!ParametersDialog.open(this)) return |
22 | 21 |
|
23 |
// END OF PARAMETERS
|
|
22 |
// END OF PARAMETERS |
|
24 | 23 |
|
25 |
if (input_dir != null && input_dir.exists() && input_dir.isDirectory()) { |
|
26 |
|
|
27 |
nFiles = 0 |
|
28 |
input_dir.eachFileMatch(~/.*.pdf/) { f -> |
|
29 |
name = f.getName() |
|
30 |
println "Processing $name..." |
|
31 |
idx = name.lastIndexOf(".") |
|
32 |
if (idx > 0) name = name.substring(0, idx) |
|
33 |
outputFile = new File(f.getParentFile(), name + ".txt") |
|
34 |
|
|
35 |
pdfFile = f.getAbsolutePath() |
|
36 |
if (pdfFile.toUpperCase().endsWith(".PDF")) { |
|
37 |
textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt" |
|
38 |
try { |
|
39 |
outputFile.withWriter("UTF-8") { writer -> |
|
40 |
doc = Loader.loadPDF(f) |
|
41 |
strip = new PDFTextStripper() |
|
42 |
writer.print strip.getText(doc) |
|
43 |
doc.close() |
|
44 |
writer.close() |
|
45 |
nFiles++ |
|
46 |
} |
|
47 |
} catch (Exception e) { |
|
48 |
e.printStackTrace() |
|
49 |
} |
|
50 |
} |
|
51 |
} |
|
52 |
println "Done processed $nFiles files." |
|
53 |
} else if (input_file != null && input_file.exists() && input_file.isFile()) { |
|
54 |
|
|
55 |
name = input_file.getName() |
|
24 |
if (input_dir != null && input_dir.exists()) { |
|
25 |
nFiles = 0 |
|
26 |
input_dir.eachFileMatch(~/.*.pdf/) { f -> |
|
27 |
name = f.getName() |
|
28 |
println "Processing "+name+"..." |
|
56 | 29 |
idx = name.lastIndexOf(".") |
57 | 30 |
if (idx > 0) name = name.substring(0, idx) |
58 |
outputFile = new File(input_file.getParentFile(), name + ".txt")
|
|
59 |
|
|
60 |
pdfFile = input_file.getAbsolutePath()
|
|
31 |
outputFile = new File(f.getParentFile(), name + ".txt")
|
|
32 |
|
|
33 |
pdfFile = f.getAbsolutePath()
|
|
61 | 34 |
if (pdfFile.toUpperCase().endsWith(".PDF")) { |
62 | 35 |
textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt" |
63 | 36 |
try { |
64 | 37 |
outputFile.withWriter("UTF-8") { writer -> |
65 |
doc = Loader.loadPDF(input_file) |
|
66 |
strip = new PDFTextStripper() |
|
67 |
writer.print strip.getText(doc) |
|
68 |
doc.close() |
|
69 |
writer.close() |
|
38 |
doc = Loader.loadPDF(f) |
|
39 |
strip = new PDFTextStripper() |
|
40 |
strip.setSortByPosition(true) |
|
41 |
strip.setPageStart("<pb/>") |
|
42 |
// strip.setParagraphStart("\n<p>") |
|
43 |
// strip.setParagraphEnd("</p>") |
|
44 |
writer.print strip.getText(doc) |
|
45 |
doc.close() |
|
46 |
writer.close() |
|
47 |
nFiles++ |
|
70 | 48 |
} |
71 | 49 |
} catch (Exception e) { |
72 |
e.printStackTrace()
|
|
50 |
e.printStackTrace() |
|
73 | 51 |
} |
74 | 52 |
} |
75 | 53 |
} |
54 |
println "Processed "+nFiles+" files." |
|
55 |
} else { |
|
56 |
name = input_file.getName() |
|
57 |
idx = name.lastIndexOf(".") |
|
58 |
if (idx > 0) name = name.substring(0, idx) |
|
59 |
outputFile = new File(input_file.getParentFile(), name + ".txt") |
|
60 |
|
|
61 |
pdfFile = input_file.getAbsolutePath() |
|
62 |
if (pdfFile.toUpperCase().endsWith(".PDF")) { |
|
63 |
textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt" |
|
64 |
try { |
|
65 |
outputFile.withWriter("UTF-8") { writer -> |
|
66 |
doc = Loader.loadPDF(input_file) |
|
67 |
strip = new PDFTextStripper() |
|
68 |
println "SpacingTolerance = "+strip.getSpacingTolerance() |
|
69 |
// Set the space width-based tolerance value that is used to estimate where spaces in text should be added. Note that the default value for this has been determined from trial and error. Setting this value larger will reduce the number of spaces added. |
|
70 |
println "AverageCharTolerance = "+strip.getAverageCharTolerance() |
|
71 |
// Set the character width-based tolerance value that is used to estimate where spaces in text should be added. Note that the default value for this has been determined from trial and error. Setting this value larger will reduce the number of spaces added. |
|
72 |
println "IndentThreshold = "+strip.getIndentThreshold() |
|
73 |
// sets the multiple of whitespace character widths for the current text which the current line start can be indented from the previous line start beyond which the current line start is considered to be a paragraph start. The default value is 2.0. |
|
74 |
println "DropThreshold = "+strip.getDropThreshold() |
|
75 |
// sets the minimum whitespace, as a multiple of the max height of the current characters beyond which the current line start is considered to be a paragraph start. The default value is 2.5. |
|
76 |
strip.setSortByPosition(true) |
|
77 |
strip.setPageStart("<pb/>") |
|
78 |
// strip.setParagraphStart("\n<p>") |
|
79 |
// strip.setParagraphEnd("</p>") |
|
80 |
writer.print strip.getText(doc) |
|
81 |
doc.close() |
|
82 |
writer.close() |
|
83 |
} |
|
84 |
} catch (Exception e) { |
|
85 |
e.printStackTrace() |
|
86 |
} |
|
87 |
} |
|
88 |
} |
|
89 |
|
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/AddMacroMacro.groovy (revision 3536) | ||
---|---|---|
1 |
// Copyright © 2022 MY_INSTITUTION |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author mdecorde |
|
4 |
|
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.Toolbox; |
|
12 |
import org.txm.utils.io.FileCopy |
|
13 |
import org.txm.rcp.views.fileexplorer.MacroExplorer |
|
14 |
|
|
15 |
// BEGINNING OF PARAMETERS |
|
16 |
|
|
17 |
@Field @Option(name="files", usage="an example file", widget="FilesOpen", metaVar="*.groovy;*.jar", required=true, def="") |
|
18 |
def files |
|
19 |
|
|
20 |
// Open the parameters input dialog box |
|
21 |
if (!ParametersDialog.open(this)) return |
|
22 |
|
|
23 |
// END OF PARAMETERS |
|
24 |
|
|
25 |
File rootDir = new File(Toolbox.getTxmHomePath(), "scripts/groovy/") |
|
26 |
|
|
27 |
for (File f : files) { |
|
28 |
|
|
29 |
if (!f.isFile()) continue; |
|
30 |
|
|
31 |
if (f.getName().endsWith(".jar")) { |
|
32 |
|
|
33 |
File f2 = new File(rootDir, "lib/"+f.getName()) |
|
34 |
if (f2.exists()) { |
|
35 |
println "Updating library $f2" |
|
36 |
f2.delete() |
|
37 |
} else { |
|
38 |
println "New library: $f2" |
|
39 |
} |
|
40 |
FileCopy.copy(f, f2) |
|
41 |
|
|
42 |
} else if (f.getName().endsWith(".groovy")) { |
|
43 |
|
|
44 |
String ppackage = "org.txm.macro" |
|
45 |
def content = f.readLines("UTF-8") |
|
46 |
for (def line : content) { |
|
47 |
if (line.startsWith("package org.txm.macro")) { |
|
48 |
ppackage = line.substring(8) |
|
49 |
break |
|
50 |
} |
|
51 |
} |
|
52 |
|
|
53 |
File f2 = new File(rootDir, "user/"+ppackage.replace(".", "/")+ "/"+ f.getName()) |
|
54 |
if (f2.exists()) { |
|
55 |
println "Update macro: $f2" |
|
56 |
f2.delete() |
|
57 |
} else { |
|
58 |
println "New macro: $f2" |
|
59 |
} |
|
60 |
f2.getParentFile().mkdirs() |
|
61 |
|
|
62 |
FileCopy.copy(f, f2) |
|
63 |
|
|
64 |
} else { |
|
65 |
println "Ignoring $f" |
|
66 |
} |
|
67 |
} |
|
68 |
|
|
69 |
// update macros view |
|
70 |
monitor.syncExec(new Runnable() { |
|
71 |
public void run() { |
|
72 |
MacroExplorer.refresh(); |
|
73 |
} |
|
74 |
}); |
TXM/trunk/org.txm.groovy.core/src/java/org/txm/groovy/core/GSERunner.java (revision 3536) | ||
---|---|---|
8 | 8 |
import java.util.HashSet; |
9 | 9 |
import java.util.Map; |
10 | 10 |
|
11 |
import org.apache.commons.lang.StringUtils; |
|
12 | 11 |
import org.codehaus.groovy.control.CompilerConfiguration; |
13 | 12 |
import org.codehaus.groovy.control.customizers.ImportCustomizer; |
14 | 13 |
import org.eclipse.core.internal.runtime.InternalPlatform; |
Formats disponibles : Unified diff