OcrEngineManager Class

Summary

Provides methods to create OCR engine instances.

Syntax

Objective-C

C++/CLI

Java

Python

public static class OcrEngineManager

@interface LTOcrEngineManager : NSObject // STATIC CLASS

public class OcrEngineManager

public ref class OcrEngineManager abstract sealed

class OcrEngineManager:

Remarks

The OcrEngineManager class and its methods are your entry point to using the Leadtools.Ocr class library.

This class provides the methods you need to create an object of the IOcrEngine interface. Afterwards, you can use the properties and methods of this interface to perform your OCR tasks.

Based on the engine type passed to the CreateEngine methods, OcrEngineManager will load the OCR engine defined in one of the supporting assemblies and return an interface to IOcrEngine. Use this interface and its included types to start using the Leadtools.Ocr class library. For more information about the engine types, refer to OcrEngineType.

The CreateEngine method lets you create an instance of IOcrEngine, loading the corresponding Leadtools.Ocr.[EngineName].dll assembly using the .NET System.Reflection.Assembly.Load(string assemblyString) method. You cannot unload this assembly once it has been successfully loaded.

The LEADTOOLS OCR Module - LEAD Engine is now thread-safe and so the "THUNK" mechanism is no longer required.

Example

This example shows how to OCR documents using multiple threads.

Java

using Leadtools; 
using Leadtools.Ocr; 
using Leadtools.Document.Writer; 
using Leadtools.Codecs; 
 
public void MultiThreadedOcrExample() 
{ 
   // The image file names we are going to OCR and convert to PDF 
   string[] tifFileNames = 
   { 
      Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif"), 
      Path.Combine(LEAD_VARS.ImagesDir, "Ocr2.tif"), 
      Path.Combine(LEAD_VARS.ImagesDir, "Ocr3.tif"), 
      Path.Combine(LEAD_VARS.ImagesDir, "Ocr4.tif") 
   }; 
 
   int threadCount = tifFileNames.Length; 
 
   // Create the thread 
   Thread[] threads = new Thread[threadCount]; 
   AutoResetEvent[] waitHandles = new AutoResetEvent[threadCount]; 
   for (int i = 0; i < threadCount; i++) 
   { 
      threads[i] = new Thread(new ParameterizedThreadStart(MyThreadProc)); 
      threads[i].Name = "OCR thread + " + i.ToString(); 
 
      waitHandles[i] = new AutoResetEvent(false); 
   } 
 
   Console.WriteLine("Starting the threads and waiting..."); 
 
   // Start the threads 
   for (int i = 0; i < threadCount; i++) 
   { 
      MyThreadData threadData = new MyThreadData(); 
      threadData.ImageFileName = tifFileNames[i]; 
      threadData.WaitHandle = waitHandles[i]; 
      threads[i].Start(threadData); 
   } 
 
   // Wait till all threads are done 
   WaitHandle.WaitAny(waitHandles); 
   Console.WriteLine("All threads finished"); 
} 
private struct MyThreadData 
{ 
   public string ImageFileName; 
   public AutoResetEvent WaitHandle; 
} 
 
private void MyThreadProc(object data) 
{ 
   // Grab the data 
   MyThreadData threadData = (MyThreadData)data; 
   string imageFileName = threadData.ImageFileName; 
 
   // Show a status message 
   Console.WriteLine("Begin: {0}", imageFileName); 
 
   // Create an instance of the OCR engine using. If you change the engine type from Advantage, 
   // then the last parameter indicates the use of the LEADTOOLS Thunk Server 
   using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD)) 
   { 
      // Start the engine using default parameters 
      ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir); 
 
      // Get the PDf file name 
      string pdfFileName = Path.ChangeExtension(imageFileName, "pdf"); 
 
      // Create an OCR document 
      using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument()) 
      { 
 
         // Add a page to the document 
         IOcrPage ocrPage = ocrDocument.Pages.AddPage(imageFileName, null); 
 
         // Recognize the page 
         // Note, Recognize can be called without calling AutoZone or manually adding zones. The engine will 
         // check and automatically auto-zones the page 
         ocrPage.Recognize(null); 
 
         // Save the document we have as PDF 
         ocrDocument.Save(pdfFileName, DocumentFormat.Pdf, null); 
      } 
 
      // Shutdown the engine 
      // Note: calling Dispose will also automatically shutdown the engine if it has been started 
      ocrEngine.Shutdown(); 
   } 
 
   Console.WriteLine("End: {0}", imageFileName); 
 
   // Singal the main thread 
   threadData.WaitHandle.Set(); 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images"; 
   public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime"; 
}

 
import static org.junit.Assert.assertTrue; 
 
import java.io.File; 
import java.io.IOException; 
 
import java.util.ArrayList; 
import java.util.List; 
import java.util.concurrent.ExecutionException; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.Future; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
 
import leadtools.*; 
import leadtools.document.writer.DocumentFormat; 
import leadtools.ocr.*; 
 
 
public void OcrEngineManagerMultiThreadedOcrExample() { 
   final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images"; 
 
   // The image file names we are going to OCR and convert to PDF 
   String[] tifFileNames = { 
         combine(LEAD_VARS_IMAGES_DIR, "Ocr1.tif"), 
         combine(LEAD_VARS_IMAGES_DIR, "Ocr2.tif"), 
         combine(LEAD_VARS_IMAGES_DIR, "Ocr3.tif"), 
         combine(LEAD_VARS_IMAGES_DIR, "Ocr4.tif") 
   }; 
 
   ExecutorService executorService = Executors.newFixedThreadPool(tifFileNames.length); 
 
   List<Future<Void>> futures = new ArrayList<>(); 
 
   for (String tifFileName : tifFileNames) { 
      Future<Void> future = executorService.submit(() -> { 
         threadProc(tifFileName); 
         return null; 
      }); 
 
      futures.add(future); 
   } 
 
   System.out.println("Starting the threads and waiting..."); 
 
   for (Future<Void> future : futures) { 
      try { 
         future.get(); // wait for each task to complete 
      } catch (InterruptedException | ExecutionException e) { 
         System.out.println("An error occurred during the execution of a task."); 
         e.printStackTrace(); 
      } 
   } 
 
   for (String tifFileName : tifFileNames) { 
      System.out.println(tifFileName.substring(0, tifFileName.indexOf(".")) + ".pdf"); 
      assertTrue("Check that file exists", 
            (new File(tifFileName.substring(0, tifFileName.indexOf(".")) + ".pdf")).exists()); 
   } 
 
   System.out.println("All files exist"); 
   System.out.println("All threads finished"); 
 
   executorService.shutdown(); // always remember to shutdown the executor 
} 
 
private void threadProc(String fileName) { 
   final String OCR_LEAD_RUNTIME_DIR = "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime"; 
 
   try { 
      // Show a status message 
      System.out.println("Begin: " + fileName); 
 
      // Create an instance of the OCR engine using. If you change the engine type 
      // from Advantage, 
      // then the last parameter indicates the use of the LEADTOOLS Thunk Server 
      OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD); 
 
      // Start the engine using default parameters 
      ocrEngine.startup(null, null, null, OCR_LEAD_RUNTIME_DIR); 
 
      // Get the PDf file name 
      String pdfFileName = fileName.substring(0, fileName.indexOf(".")) + ".pdf"; 
      ILeadStream leadStream = LeadStreamFactory.create("C:\\LEADTOOLS23\\Resources\\Images\\Ocr1.tif"); 
 
      // Create an OCR document 
      OcrDocument ocrDocument = ocrEngine.getDocumentManager().createDocument(); 
 
      // Add a page to the document 
      OcrPage ocrPage = ocrDocument.getPages().addPage(leadStream, null); 
 
      // Recognize the page 
      // Note, Recognize can be called without calling AutoZone or manually adding 
      // zones. The engine will 
      // check and automatically auto-zones the page 
      ocrPage.recognize(null); 
 
      // Save the document we have as PDF 
      ocrDocument.save(pdfFileName, DocumentFormat.PDF, null); 
      assertTrue("Doc saved", new File(pdfFileName).exists()); 
      System.out.println("Command run, document saved to " + pdfFileName); 
 
      // Shutdown the engine 
      // Note: calling Dispose will also automatically shutdown the engine if it has 
      // been started 
      ocrEngine.shutdown(); 
 
      System.out.println("End: " + fileName); 
   } catch (Exception ex) { 
      System.out.println(ex.getMessage()); 
   } 
}