[SerializableAttribute()]
public class OcrAutoRecognizeRunJobEventArgs : EventArgs
IOcrAutoRecognizeManager.JobStarted and IOcrAutoRecognizeManager.JobCompleted will trigger when Run, RunJob or RunJobAsync is called.
The JobStarted event occurs when a job is about to start. You can use this event to keep track of the number of jobs pending running. You can also examine OcrAutoRecognizeRunJobEventArgs.Job to get information on whether the job finished successfully and get error status if any. RunJob example shows complete source code of how to easily accomplish these tasks in your application.
The JobCompleted event will always occur when a job is run whether the job is aborted or completed.
To abort pending jobs in a multi-threaded application using IOcrAutoRecognizeManager, you subscribe to this event and set the value of OcrAutoRecognizeRunJobEventArgs.Status to OcrAutoRecognizeManagerJobStatus.Abort. You can also examine OcrAutoRecognizeRunJobEventArgs.Job and only abort certain jobs depending on your application logic.
IOcrAutoRecognizeManager allows you to modify the raster image, OCR page or OCR document during some parts of the operation. Refer to OcrAutoRecognizeJobOperationEventArgs.PageImage for more information an example.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Ocr;
using Leadtools.Document.Writer;
using Leadtools.Forms.Common;
using Leadtools.WinForms;
public class RunJobExample
{
// Number of documents that are pending
private int _documentsPending;
// Event to trigger when all documents are finished
private AutoResetEvent _allDocumentsFinishedEvent;
public void Start()
{
string imagesDirectory = LEAD_VARS.ImagesDir;
string documentsDirectory = Path.Combine(LEAD_VARS.ImagesDir, "RunJobExample");
// Create the output (documents) directory
if (!Directory.Exists(documentsDirectory))
{
Directory.CreateDirectory(documentsDirectory);
}
// Get all TIF files in input (images) directory
string[] imageFileNames = Directory.GetFiles(imagesDirectory, "*.tif");
if (imageFileNames.Length == 0)
{
Console.WriteLine("No images to OCR");
return;
}
// Create a new OCR engine instance
OcrEngineType engineType = OcrEngineType.LEAD;
Console.WriteLine(string.Format("Starting up {0} engine", engineType));
using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(engineType))
{
ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir);
// Setup document PDF save options: Image/Text with CCITT G4 encoding for B/W
DocumentWriter docWriter = ocrEngine.DocumentWriterInstance;
PdfDocumentOptions pdfOptions = docWriter.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions;
pdfOptions.ImageOverText = true;
pdfOptions.DocumentType = PdfDocumentType.Pdf;
pdfOptions.FontEmbedMode = DocumentFontEmbedMode.None;
pdfOptions.OneBitImageCompression = OneBitImageCompressionType.FaxG4;
docWriter.SetOptions(DocumentFormat.Pdf, pdfOptions);
// We are going to use multiple-threads, so disable threading in
// IOcrAutoRecognizeManager
IOcrAutoRecognizeManager autoRecognizeManager = ocrEngine.AutoRecognizeManager;
autoRecognizeManager.MaximumThreadsPerJob = 1;
// Tell the recognize manager to continue on errors
autoRecognizeManager.JobErrorMode = OcrAutoRecognizeManagerJobErrorMode.Continue;
// Instead of using events to trigger when documents are done,
// we will use the JobCompleted events of IOcrAutoRecognizeManager
// to decrement a counter and trigger one event when the counter reaches 0
autoRecognizeManager.JobStarted += new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobStarted);
autoRecognizeManager.JobCompleted += new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobCompleted);
int count = imageFileNames.Length;
_documentsPending = count;
_allDocumentsFinishedEvent = new AutoResetEvent(false);
for (int i = 0; i < count; i++)
{
// Create the job data
string imageFileName = imageFileNames[i];
string name = "Document " + (i + 1).ToString();
Console.WriteLine("Queuing {0} file {1}", name, imageFileName);
JobData data = new JobData();
data.AutoRecognizeManager = autoRecognizeManager;
data.ImageFileName = imageFileName;
data.DocumentFileName = Path.Combine(documentsDirectory, Path.GetFileNameWithoutExtension(imageFileName) + ".pdf");
data.JobName = name;
// Queue this job
ThreadPool.QueueUserWorkItem(new WaitCallback(RunJob), data);
}
// Wait for all documents to finish
_allDocumentsFinishedEvent.WaitOne();
_allDocumentsFinishedEvent.Close();
autoRecognizeManager.JobStarted -= new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobStarted);
autoRecognizeManager.JobCompleted -= new EventHandler<OcrAutoRecognizeRunJobEventArgs>(autoRecognizeManager_JobCompleted);
Console.WriteLine("All documents finished, check the result files in {0}", documentsDirectory);
}
}
private void autoRecognizeManager_JobStarted(object sender, OcrAutoRecognizeRunJobEventArgs e)
{
// This is not strictly needed in this example, we will
// use it to show information
Console.WriteLine("{0} started...", e.Job.JobData.JobName);
// Check if we need to abort
if (AbortJobs(e.Job))
{
// Yes, abort all jobs
e.Job.AutoRecognizeManager.AbortAllJobs();
}
}
private void autoRecognizeManager_JobCompleted(object sender, OcrAutoRecognizeRunJobEventArgs e)
{
string message = string.Format("{0} completed ", e.Job.JobData.JobName);
IOcrAutoRecognizeJob job = e.Job;
// Show any errors
if (job.Errors.Count == 0)
{
message += "successfully...";
}
else
{
message += "with errors, first error is " + job.Errors[0].Exception.Message;
// And save the errors to a text file in the document directory
string documentFileName = job.JobData.DocumentFileName;
string textPathName = Path.Combine(Path.GetDirectoryName(documentFileName), Path.GetFileNameWithoutExtension(documentFileName) + "_errors.txt");
using (StreamWriter writer = File.CreateText(textPathName))
{
writer.WriteLine(job.JobData.JobName);
writer.WriteLine("Data:");
writer.WriteLine(" Image file name: " + job.JobData.ImageFileName);
writer.WriteLine(" First page number: " + job.JobData.FirstPageNumber);
writer.WriteLine(" Last page number: " + job.JobData.LastPageNumber);
writer.WriteLine(" Format:" + job.JobData.Format);
writer.WriteLine(" Document file name: " + job.JobData.DocumentFileName);
writer.WriteLine("Errors:");
foreach (OcrAutoRecognizeManagerJobError error in job.Errors)
{
writer.WriteLine(" Page: {0} during {1}. Error: {2}", error.ImagePageNumber, error.Operation, error.Exception.Message);
}
}
}
Console.WriteLine(message);
// Decrement the documents count, when we reach 0, we are done
// Since this will be called from multiple threads, we need
// to use a thread-safety procedure
int pending = Interlocked.Decrement(ref _documentsPending);
// If we are the last document, wait up main thread
if (pending == 0)
{
_allDocumentsFinishedEvent.Set();
}
}
private class JobData
{
public IOcrAutoRecognizeManager AutoRecognizeManager;
public string ImageFileName;
public string DocumentFileName;
public string JobName;
}
private void RunJob(object state)
{
JobData data = state as JobData;
Console.WriteLine("Running {0}", data.JobName);
// Run it
OcrAutoRecognizeJobData jobData = new OcrAutoRecognizeJobData(data.ImageFileName, DocumentFormat.Pdf, data.DocumentFileName);
jobData.JobName = data.JobName;
IOcrAutoRecognizeJob job = data.AutoRecognizeManager.CreateJob(jobData);
data.AutoRecognizeManager.RunJob(job);
}
private bool AbortJobs(IOcrAutoRecognizeJob ocrJob)
{
// In your application, you can check if abortion is required, for example, if the user
// has pressed the Cancel button on a progress bar or if your service is shutting down.
// In this example, we will never abort, but you can change this code to return true
// upon any condition (or when a specific job is about to start)
// and the engine will abort all current and pending jobs
return false;
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";
public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime";
}