using Leadtools;
using Leadtools.Codecs;
using Leadtools.Ocr;
using Leadtools.Document.Writer;
using Leadtools.Forms.Common;
using Leadtools.WinForms;
public class RunJobExample
{
// Number of documents that are pending
private int _documentsPending;
// Event to trigger when all documents are finished
private AutoResetEvent _allDocumentsFinishedEvent;
public void Start()
{
string imagesDirectory = LEAD_VARS.ImagesDir;
string documentsDirectory = Path.Combine(LEAD_VARS.ImagesDir, "RunJobExample");
// Create the output (documents) directory
if (!Directory.Exists(documentsDirectory))
{
Directory.CreateDirectory(documentsDirectory);
}
// Get all TIF files in input (images) directory
string[] imageFileNames = Directory.GetFiles(imagesDirectory, "*.tif");
if (imageFileNames.Length == 0)
{
Console.WriteLine("No images to OCR");
return;
}
// Create a new OCR engine instance
OcrEngineType engineType = OcrEngineType.LEAD;
Console.WriteLine(string.Format("Starting up {0} engine", engineType));
using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(engineType))
{
ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir);
// Setup document PDF save options: Image/Text with CCITT G4 encoding for B/W
DocumentWriter docWriter = ocrEngine.DocumentWriterInstance;
PdfDocumentOptions pdfOptions = docWriter.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions;
pdfOptions.ImageOverText = true;
pdfOptions.DocumentType = PdfDocumentType.Pdf;
pdfOptions.FontEmbedMode = DocumentFontEmbedMode.None;
pdfOptions.OneBitImageCompression = OneBitImageCompressionType.FaxG4;
docWriter.SetOptions(DocumentFormat.Pdf, pdfOptions);
// We are going to use multiple-threads, so disable threading in
// IOcrAutoRecognizeManager
IOcrAutoRecognizeManager autoRecognizeManager = ocrEngine.AutoRecognizeManager;
autoRecognizeManager.MaximumThreadsPerJob = 1;
// Tell the recognize manager to continue on errors
autoRecognizeManager.JobErrorMode = OcrAutoRecognizeManagerJobErrorMode.Continue;
// Instead of using events to trigger when documents are done,
// we will use the JobCompleted events of IOcrAutoRecognizeManager
// to decrement a counter and trigger one event when the counter reaches 0
autoRecognizeManager.JobStarted += new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobStarted);
autoRecognizeManager.JobCompleted += new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobCompleted);
int count = imageFileNames.Length;
_documentsPending = count;
_allDocumentsFinishedEvent = new AutoResetEvent(false);
for (int i = 0; i < count; i++)
{
// Create the job data
string imageFileName = imageFileNames[i];
string name = "Document " + (i + 1).ToString();
Console.WriteLine("Queuing {0} file {1}", name, imageFileName);
JobData data = new JobData();
data.AutoRecognizeManager = autoRecognizeManager;
data.ImageFileName = imageFileName;
data.DocumentFileName = Path.Combine(documentsDirectory, Path.GetFileNameWithoutExtension(imageFileName) + ".pdf");
data.JobName = name;
// Queue this job
ThreadPool.QueueUserWorkItem(new WaitCallback(RunJob), data);
}
// Wait for all documents to finish
_allDocumentsFinishedEvent.WaitOne();
_allDocumentsFinishedEvent.Close();
autoRecognizeManager.JobStarted -= new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobStarted);
autoRecognizeManager.JobCompleted -= new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobCompleted);
Console.WriteLine("All documents finished, check the result files in {0}", documentsDirectory);
}
}
private void autoRecognizeManager_JobStarted(object sender, OcrAutoRecognizeRunJobEventArgs e)
{
// This is not strictly needed in this example, we will
// use it to show information
Console.WriteLine("{0} started...", e.Job.JobData.JobName);
// Check if we need to abort
if (AbortJobs(e.Job))
{
// Yes, abort all jobs
e.Job.AutoRecognizeManager.AbortAllJobs();
}
}
private void autoRecognizeManager_JobCompleted(object sender, OcrAutoRecognizeRunJobEventArgs e)
{
string message = string.Format("{0} completed ", e.Job.JobData.JobName);
IOcrAutoRecognizeJob job = e.Job;
// Show any errors
if (job.Errors.Count == 0)
{
message += "successfully...";
}
else
{
message += "with errors, first error is " + job.Errors[0].Exception.Message;
// And save the errors to a text file in the document directory
string documentFileName = job.JobData.DocumentFileName;
string textPathName = Path.Combine(Path.GetDirectoryName(documentFileName), Path.GetFileNameWithoutExtension(documentFileName) + "_errors.txt");
using (StreamWriter writer = File.CreateText(textPathName))
{
writer.WriteLine(job.JobData.JobName);
writer.WriteLine("Data:");
writer.WriteLine(" Image file name: " + job.JobData.ImageFileName);
writer.WriteLine(" First page number: " + job.JobData.FirstPageNumber);
writer.WriteLine(" Last page number: " + job.JobData.LastPageNumber);
writer.WriteLine(" Format:" + job.JobData.Format);
writer.WriteLine(" Document file name: " + job.JobData.DocumentFileName);
writer.WriteLine("Errors:");
foreach (OcrAutoRecognizeManagerJobError error in job.Errors)
{
writer.WriteLine(" Page: {0} during {1}. Error: {2}", error.ImagePageNumber, error.Operation, error.Exception.Message);
}
}
}
Console.WriteLine(message);
// Decrement the documents count, when we reach 0, we are done
// Since this will be called from multiple threads, we need
// to use a thread-safety procedure
int pending = Interlocked.Decrement(ref _documentsPending);
// If we are the last document, wait up main thread
if (pending == 0)
{
_allDocumentsFinishedEvent.Set();
}
}
private class JobData
{
public IOcrAutoRecognizeManager AutoRecognizeManager;
public string ImageFileName;
public string DocumentFileName;
public string JobName;
}
private void RunJob(object state)
{
JobData data = state as JobData;
Console.WriteLine("Running {0}", data.JobName);
// Run it
OcrAutoRecognizeJobData jobData = new OcrAutoRecognizeJobData(data.ImageFileName, DocumentFormat.Pdf, data.DocumentFileName);
jobData.JobName = data.JobName;
IOcrAutoRecognizeJob job = data.AutoRecognizeManager.CreateJob(jobData);
data.AutoRecognizeManager.RunJob(job);
}
private bool AbortJobs(IOcrAutoRecognizeJob ocrJob)
{
// In your application, you can check if abortion is required, for example, if the user
// has pressed the Cancel button on a progress bar or if your service is shutting down.
// In this example, we will never abort, but you can change this code to return true
// upon any condition (or when a specific job is about to start)
// and the engine will abort all current and pending jobs
return false;
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";
public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime";
}
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.*;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
import org.junit.runner.notification.Failure;
import static org.junit.Assert.*;
import leadtools.*;
import leadtools.document.writer.*;
import leadtools.internal.AutoResetEvent;
import leadtools.ocr.*;
// Number of documents that are pending
private int _documentsPending;
// Event to trigger when all documents are finished
private AutoResetEvent _allDocumentsFinishedEvent;
// Thread usage
private final static AtomicInteger at = new AtomicInteger();
public void OcrAutoRecognizeManagerRunJobExample() throws IOException {
String LEAD_VARS_ImagesDir = "C:\\LEADTOOLS23\\Resources\\Images";
String LEAD_VARS_OcrLEADRuntimeDir = "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime";
String docsDir = combine(LEAD_VARS_ImagesDir, "RunJobExample");
String imageDir = LEAD_VARS_ImagesDir;
// Create the output (documents) directory
Path docsPath = Paths.get(docsDir);
Files.createDirectories(docsPath);
// Get all TIF files in input (images) directory
Path imagePath = Paths.get(imageDir);
Files.createDirectories(imagePath);
FilenameFilter tifFileFilter = (d, s) -> {
return s.toLowerCase().endsWith(".tif");
};
File imageFolder = new File(imageDir);
String[] imageFileNames = imageFolder.list(tifFileFilter);
if (imageFileNames.length == 0) {
System.out.println("No images to OCR");
return;
}
// Create a new OCR engine instance
OcrEngineType engineType = OcrEngineType.LEAD;
System.out.println("Starting up " + engineType + " engine");
OcrEngine ocrEngine = OcrEngineManager.createEngine(engineType);
ocrEngine.startup(null, null, null, LEAD_VARS_OcrLEADRuntimeDir);
// Setup document PDF save options: Image/Text with CCITT G4 encoding for B/W
DocumentWriter docWriter = ocrEngine.getDocumentWriterInstance();
PdfDocumentOptions pdfOptions = (PdfDocumentOptions) docWriter.getOptions(DocumentFormat.PDF);
pdfOptions.setImageOverText(true);
pdfOptions.setDocumentType(PdfDocumentType.PDF);
pdfOptions.setFontEmbedMode(DocumentFontEmbedMode.NONE);
pdfOptions.setOneBitImageCompression(OneBitImageCompressionType.FAX_G4);
docWriter.setOptions(DocumentFormat.PDF, pdfOptions);
// We are going to use multiple-threads, so disable threading in IOcrAutoRecognizeManager
OcrAutoRecognizeManager autoRecognizeManager = ocrEngine.getAutoRecognizeManager();
autoRecognizeManager.setMaximumThreadsPerJob(1);
// Tell the recognize manager to continue on errors
autoRecognizeManager.setJobErrorMode(OcrAutoRecognizeManagerJobErrorMode.CONTINUE);
// Instead of using events to trigger when documents are done,
// we will use the JobCompleted events of IOcrAutoRecognizeManager
// to decrement a counter and trigger one event when the counter reaches 0
autoRecognizeManager.addJobStartedListener(autoRecognizeManager_JobStarted);
autoRecognizeManager.addJobCompletedListener(autoRecognizeManager_JobCompleted);
int count = imageFileNames.length;
_documentsPending = count;
at.set(_documentsPending);
_allDocumentsFinishedEvent = new AutoResetEvent();
ExecutorService executorService = Executors.newFixedThreadPool(1);
System.out.println("Starting the threads and waiting...");
for (int i = 0; i < count; i++) {
// Create the job data
String imageFileName = imageFileNames[i];
String name = "Document " + (i + 1);
System.out.println("Queuing " + name + " file " + imageFileName);
JobData data = new JobData();
data.AutoRecognizeManager = autoRecognizeManager;
data.ImageFileName = combine(LEAD_VARS_ImagesDir, imageFileName);
data.DocumentFileName = combine(docsDir,imageFileName.substring(0, imageFileName.indexOf(".")) + ".pdf");
data.JobName = name;
File dataFile = new File(combine(docsDir,imageFileName.substring(0, imageFileName.indexOf(".")) + ".pdf"));
if (!dataFile.exists()) dataFile.createNewFile();
Runnable runnableTask = new Runnable(){
@Override
public void run(){
RunJob(data);
}
};
executorService.submit(runnableTask);
}
// Wait for all documents to finish
_allDocumentsFinishedEvent.waitOne();
_allDocumentsFinishedEvent.close();
System.out.println("All documents finished, check the result files in " + docsDir);
ocrEngine.dispose();
}
OcrAutoRecognizeRunJobListener autoRecognizeManager_JobStarted = new OcrAutoRecognizeRunJobListener(){
@Override public void onJob(OcrAutoRecognizeRunJobEvent e) {
// This is not strictly needed in this example, we will
// use it to show information
System.out.println(e.getJob().getJobData().getJobName()+" started...");
// Check if we need to abort
if(AbortJobs(e.getJob())){
// Yes, abort all jobs
e.getJob().getAutoRecognizeManager().abortAllJobs();}
}
};
OcrAutoRecognizeRunJobListener autoRecognizeManager_JobCompleted = new OcrAutoRecognizeRunJobListener() {
@Override
public void onJob(OcrAutoRecognizeRunJobEvent e) {
OcrAutoRecognizeJob job = e.getJob();
String message = job.getJobData().getJobName() + " completed ";
// Show any errors
if (job.getErrors().size()== 0) {
message += "successfully...";
}
else {
message += " with errors, first error is " + job.getErrors().get(0).getException().getMessage();
// And save the errors to a text file in the document directory
String documentFileName = job.getJobData().getDocumentFileName();
File doc = new File(documentFileName);
String textPathName = combine(doc.getParent(), documentFileName.substring(0,documentFileName.indexOf(".")) + "_errors.txt");
try (FileWriter writer = new FileWriter(textPathName)) {
writer.write(job.getJobData().getJobName());
writer.write("Data:"+"\n");
writer.write(" Image file name: " + job.getJobData().getImageFileName()+"\n");
writer.write(" First page number: " + job.getJobData().getFirstPageNumber()+"\n");
writer.write(" Last page number: " + job.getJobData().getLastPageNumber()+"\n");
writer.write(" Format:" + job.getJobData().getFormat()+"\n");
writer.write(" Document file name: " + job.getJobData().getDocumentFileName()+"\n");
writer.write("Errors:"+"\n");
for (OcrAutoRecognizeManagerJobError error : job.getErrors())
{
writer.write(" Page: "+ error.getImagePageNumber() + " during " + error.getOperation() + ". Error: " + error.getException().getMessage() + "\n");
}
writer.close();
} catch (IOException e1) {
e1.printStackTrace();
}
}
System.out.println(message);
// Decrement the documents count, when we reach 0, we are done
// Since this will be called from multiple threads, we need
// to use a thread-safety procedure
int pending = at.decrementAndGet();
System.out.println(pending);
// If we are the last document, wait up main thread
if (pending == 0)
_allDocumentsFinishedEvent.set();
}
};
class JobData {
public OcrAutoRecognizeManager AutoRecognizeManager;
public String ImageFileName;
public String DocumentFileName;
public String JobName;
}
private void RunJob(JobData state) {
JobData data = state;
System.out.println("Running " + data.JobName);
// Run it
OcrAutoRecognizeJobData jobData = new OcrAutoRecognizeJobData(
data.ImageFileName,
DocumentFormat.PDF,
data.DocumentFileName
);
jobData.setJobName(data.JobName);
OcrAutoRecognizeJob job = data.AutoRecognizeManager.createJob(jobData);
data.AutoRecognizeManager.runJob(job);
}
private boolean AbortJobs(OcrAutoRecognizeJob ocrJob) {
// In your application, you can check if abortion is required, for example, if the user
// has pressed the Cancel button on a progress bar or if your service is shutting down.
// In this example, we will never abort, but you can change this code to return true
// upon any condition (or when a specific job is about to start)
// and the engine will abort all current and pending jobs
return false;
}
public String combine(String path1, String path2) {
File file = new File(path1, path2);
return file.getPath();
}