Leadtools.Forms.Ocr Namespace : OcrEngineManager Class |
public static class OcrEngineManager
'Declaration Public MustInherit NotInheritable Class OcrEngineManager
'Usage Dim instance As OcrEngineManager
public sealed static class OcrEngineManager
function Leadtools.Forms.Ocr.OcrEngineManager()
public ref class OcrEngineManager abstract sealed
The OcrEngineManager class and its methods are your entry point to using the Leadtools.Forms.Ocr class library.
This class provides the methods you need to create an object of the IOcrEngine interface. Afterwards, you can use the properties and methods of this interface to perform your OCR tasks.
Based on the engine type passed to the CreateEngine methods, OcrEngineManager will load the OCR engine defined in one of the supporting assemblies and return an interface to IOcrEngine. Use this interface and its included types to start using the Leadtools.Forms.Ocr class library. For more information about the engine types, refer to OcrEngineType.
The CreateEngine method lets you create an instance of IOcrEngine, loading the corresponding Leadtools.Forms.Ocr.[EngineName].dll assembly using the .NET System.Reflection.Assembly.Load(string assemblyString) method. You cannot unload this assembly once it has been successfully loaded.
Depending on your application requirement, platform and OCR engine type; a "thunk" mechanism might be required. Refer to Multi-Threading with LEADTOOLS OCR for more information and on how to use the useThunkServer parameter of OcrEngineManager.CreateEngine.
Private Structure MyThreadData Public ImageFileName As String Public WaitHandle As AutoResetEvent End Structure Public Sub MultiThreadedOcrExample() ' The image file names we are going to OCR and convert to PDF Dim tifFileNames() As String = _ { _ Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif"), _ Path.Combine(LEAD_VARS.ImagesDir, "Ocr2.tif"), _ Path.Combine(LEAD_VARS.ImagesDir, "Ocr3.tif"), _ Path.Combine(LEAD_VARS.ImagesDir, "Ocr4.tif") _ } Dim threadCount As Integer = tifFileNames.Length ' Create the thread Dim threads(threadCount - 1) As Thread Dim waitHandles(threadCount - 1) As AutoResetEvent For i As Integer = 0 To threadCount - 1 threads(i) = New Thread(AddressOf MyThreadProc) threads(i).Name = "OCR thread + " + i.ToString() waitHandles(i) = New AutoResetEvent(False) Next Console.WriteLine("Starting the threads and waiting...") ' Start the threads For i As Integer = 0 To threadCount - 1 Dim threadData As New MyThreadData() threadData.ImageFileName = tifFileNames(i) threadData.WaitHandle = waitHandles(i) threads(i).Start(threadData) Next ' Wait till all threads are done WaitHandle.WaitAll(waitHandles) Console.WriteLine("All threads finished") End Sub Private Sub MyThreadProc(ByVal data As Object) ' Grab the data Dim threadData As MyThreadData = CType(data, MyThreadData) Dim imageFileName As String = threadData.ImageFileName ' Show a status message Console.WriteLine("Begin: {0}", imageFileName) ' Create an instance of the OCR engine using the LEADTOOLS Thunk Server Using ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Plus, True) ' Start the engine using default parameters ocrEngine.Startup(Nothing, Nothing, Nothing, Nothing) ' Get the PDf file name Dim pdfFileName As String = Path.ChangeExtension(imageFileName, "pdf") ' Create an OCR document Using ocrDocument As IOcrDocument = ocrEngine.DocumentManager.CreateDocument() ' Add a page to the document Dim ocrPage As IOcrPage = ocrDocument.Pages.AddPage(imageFileName, Nothing) ' Recognize the page ' Note, Recognize can be called without calling AutoZone or manually adding zones. The engine will ' check and automatically auto-zones the page ocrPage.Recognize(Nothing) ' Save the document we have as PDF ocrDocument.Save(pdfFileName, DocumentFormat.Pdf, Nothing) End Using ' Shutdown the engine ' Note: calling Dispose will also automatically shutdown the engine if it has been started ocrEngine.Shutdown() End Using Console.WriteLine("End: {0}", imageFileName) ' Singal the main thread threadData.WaitHandle.Set() End Sub Public NotInheritable Class LEAD_VARS Public Const ImagesDir As String = "C:\Users\Public\Documents\LEADTOOLS Images" End Class
private struct MyThreadData { public string ImageFileName; public AutoResetEvent WaitHandle; } public void MultiThreadedOcrExample() { // The image file names we are going to OCR and convert to PDF string[] tifFileNames = { Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif"), Path.Combine(LEAD_VARS.ImagesDir, "Ocr2.tif"), Path.Combine(LEAD_VARS.ImagesDir, "Ocr3.tif"), Path.Combine(LEAD_VARS.ImagesDir, "Ocr4.tif") }; int threadCount = tifFileNames.Length; // Create the thread Thread[] threads = new Thread[threadCount]; AutoResetEvent[] waitHandles = new AutoResetEvent[threadCount]; for(int i = 0; i < threadCount; i++) { threads[i] = new Thread(new ParameterizedThreadStart(MyThreadProc)); threads[i].Name = "OCR thread + " + i.ToString(); waitHandles[i] = new AutoResetEvent(false); } Console.WriteLine("Starting the threads and waiting..."); // Start the threads for(int i = 0; i < threadCount; i++) { MyThreadData threadData = new MyThreadData(); threadData.ImageFileName = tifFileNames[i]; threadData.WaitHandle = waitHandles[i]; threads[i].Start(threadData); } // Wait till all threads are done WaitHandle.WaitAll(waitHandles); Console.WriteLine("All threads finished"); } private void MyThreadProc(object data) { // Grab the data MyThreadData threadData = (MyThreadData)data; string imageFileName = threadData.ImageFileName; // Show a status message Console.WriteLine("Begin: {0}", imageFileName); // Create an instance of the OCR engine using the LEADTOOLS Thunk Server using(IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Plus, true)) { // Start the engine using default parameters ocrEngine.Startup(null, null, null, null); // Get the PDf file name string pdfFileName = Path.ChangeExtension(imageFileName, "pdf"); // Create an OCR document using(IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument()) { // Add a page to the document IOcrPage ocrPage = ocrDocument.Pages.AddPage(imageFileName, null); // Recognize the page // Note, Recognize can be called without calling AutoZone or manually adding zones. The engine will // check and automatically auto-zones the page ocrPage.Recognize(null); // Save the document we have as PDF ocrDocument.Save(pdfFileName, DocumentFormat.Pdf, null); } // Shutdown the engine // Note: calling Dispose will also automatically shutdown the engine if it has been started ocrEngine.Shutdown(); } Console.WriteLine("End: {0}", imageFileName); // Singal the main thread threadData.WaitHandle.Set(); } static class LEAD_VARS { public const string ImagesDir = @"C:\Users\Public\Documents\LEADTOOLS Images"; }
[TestMethod] public async Task MultiThreadedOcrExample() { try { string imageFileName = @"Multipage.tif"; bool usePdf = true; System.Diagnostics.Debug.WriteLine(string.Format("Image: " + imageFileName)); // First get the number of pages in the documents, then do each page in its own thread int pageCount; StorageFile imageFile = await Tools.AppInstallFolder.GetFileAsync(@"Assets\" + imageFileName); ILeadStream leadStream = LeadStreamFactory.Create(imageFile); using (IDisposable leadStreamDisposable = leadStream as IDisposable) { using (RasterCodecs codecs = new RasterCodecs()) { System.Diagnostics.Debug.WriteLine("RasterCodecs.GetInformationAsync"); using (CodecsImageInfo imageInfo = await codecs.GetInformationAsync(leadStream, true, 1)) { pageCount = imageInfo.TotalPages; System.Diagnostics.Debug.WriteLine(string.Format("{0} pages in the file", pageCount)); } } } // Create a LEADTOOLS IOcrEngine string engineDirectory = Path.Combine(Tools.AppInstallFolder.Path, @"OcrRuntime"); IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false); using (IDisposable ocrEngineDisposable = ocrEngine as IDisposable) { System.Diagnostics.Debug.WriteLine("IOcrEngine.Startup"); ocrEngine.Startup(null, null, String.Empty, engineDirectory); // Counter to tell us how many pages are left int pagesLeft = pageCount; // Event to trigger when all pages are completed AutoResetEvent completedEvent = new AutoResetEvent(false); // Run a thread for each job for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++) { int workPageNumber = pageNumber; Task.Factory.StartNew(async () => { System.Diagnostics.Debug.WriteLine(string.Format("Thread working on page {0} started", workPageNumber)); // Create the document try { IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(); using (IDisposable ocrDocumentDisposable = ocrDocument as IDisposable) { IOcrPage ocrPage = null; // Load the page RasterCodecs codecs = ocrDocument.RasterCodecsInstance; ILeadStream pageStream = LeadStreamFactory.Create(imageFile); using (IDisposable pageStreamDisposable = pageStream as IDisposable) { System.Diagnostics.Debug.WriteLine(string.Format("Thread loading page {0}", workPageNumber)); using (RasterImage pageImage = await codecs.LoadAsync(pageStream, 0, CodecsLoadByteOrder.BgrOrGray, workPageNumber, workPageNumber)) { // Add it to the document System.Diagnostics.Debug.WriteLine(string.Format("Adding page {0}", workPageNumber)); ocrPage = ocrDocument.Pages.AddPage(pageImage, null); } } System.Diagnostics.Debug.WriteLine(string.Format("Recognize page {0}", workPageNumber)); ocrPage.Recognize(null); System.Diagnostics.Debug.WriteLine(string.Format("Saving page {0}", workPageNumber)); string outputFileName; DocumentFormat format; if (usePdf) { outputFileName = string.Format("Page{0}.pdf", workPageNumber); format = DocumentFormat.Pdf; } else { outputFileName = string.Format("Page{0}.txt", workPageNumber); format = DocumentFormat.Text; } StorageFile outputFile = await Tools.AppLocalFolder.CreateFileAsync(outputFileName, CreationCollisionOption.ReplaceExisting); leadStream = LeadStreamFactory.Create(outputFile); using (IDisposable leadStreamDisposable = leadStream as IDisposable) { System.Diagnostics.Debug.WriteLine("IOcrDocument.SaveAsync"); System.Diagnostics.Debug.WriteLine(outputFile.Path); // take the comment out, it hangs after saving the first file await ocrDocument.SaveAsync(leadStream, format, null); } } } catch (Exception ex) { OutputError(ex); } finally { System.Diagnostics.Debug.WriteLine(string.Format("Thread working on page {0} completed", workPageNumber)); // Decrement the counter if (Interlocked.Decrement(ref pagesLeft) == 0) { // We are done completedEvent.Set(); } } }); } // Wait on all work to finish completedEvent.WaitOne(); } } catch (Exception ex) { OutputError(ex); } } private void OutputError(Exception error) { string message = string.Empty; RasterException rasterException = RasterException.FromHResult(error.HResult); if (rasterException != null) message = string.Format("ERROR LEADTOOLS: {0} - {1}", rasterException.Code, rasterException.Message); else message = string.Format("ERROR: {0} - {1}", error.HResult, error.Message); System.Diagnostics.Debug.WriteLine(message, true); }
Target Platforms: Windows 7, Windows Vista SP1 or later, Windows XP SP3, Windows Server 2008 (Server Core not supported), Windows Server 2008 R2 (Server Core supported with SP1 or later), Windows Server 2003 SP2
OcrEngineManager Members
Leadtools.Forms.Ocr Namespace
OcrEngineType Enumeration
IOcrEngine Interface
Programming with the LEADTOOLS .NET OCR
Creating an OCR Engine Instance
Starting and Shutting Down the OCR Engine
Multi-Threading with LEADTOOLS OCR
LEADTOOLS OCR Thunk Server
Files to be Included with Your Application