OcrEngineName Property

Summary

Name of the OCR engine to use with the conversion.

Syntax

C++/CLI

Python

public string OcrEngineName { get; set; }

public:  
   property String^ OcrEngineName 
   { 
      String^ get() 
      void set(String^ value) 
   }

OcrEngineName # get and set (StatusJobData)

Property Value

The name of the OCR engine to use with the conversion. The default value is null.

Remarks

If this property is set, then this value should be set to one of the OcrEngineType enumeration members as string. The runner will create a new IOcrEngine instance from this type and use it during the conversion.

Example

using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
using Leadtools.Svg; 
using LeadtoolsExamples.Common; 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Document.Converter; 
using Leadtools.Annotations.Rendering; 
 
// This is Server.exe, gets a StatusJobData as JSON from the command line and run it 
class Server 
{ 
   public static void Main(string[] args) 
   { 
      Console.WriteLine("Server running the job"); 
      StatusJobData jobData = null; 
 
      // The command line contain the path to a StatusJobData saved as JSON, load it 
      string jsonFile = args[0]; 
      using (var stream = File.OpenRead(jsonFile)) 
      { 
         var jsonSerializer = new DataContractJsonSerializer(typeof(StatusJobData)); 
         jobData = jsonSerializer.ReadObject(stream) as StatusJobData; 
      } 
 
      // Run it 
      using (var runner = new StatusJobDataRunner()) 
      { 
         runner.Prepare(jobData); 
         runner.Run(); 
      } 
   } 
 
   // This is Client.exe 
   class Client 
   { 
      public static void Main(string[] args) 
      { 
         // We will be converting this TIFF file to PDF 
         string imageUrl = "https://demo.leadtools.com/images/tiff/ocr.tif"; 
         // Download the final document to this file 
         string outputFile = @"c:\temp\output.pdf"; 
 
         // Setup the cache 
         FileCache cache = new FileCache(); 
         // This should be changed to a network location if the service and client are not on the same machine 
         // Or use something like the Redis cache that supports access from multiple processes and machines 
         cache.CacheDirectory = @"c:\temp\cache"; 
 
         string documentId = LoadDocumentIntoCache(cache, imageUrl); 
 
         // Setup the StatusJobData we will use for conversion 
         var jobData = SetupJobData(cache, documentId); 
 
         // We are ready, save the statusJobData as a JSON file and call the service 
         string tempFile = null; 
         using (var ms = new MemoryStream()) 
         { 
            var jsonSerializer = new DataContractJsonSerializer(typeof(StatusJobData)); 
            jsonSerializer.WriteObject(ms, jobData); 
 
            // Save it to a temp file 
            string json = Encoding.UTF8.GetString(ms.ToArray()); 
            tempFile = Path.GetTempFileName(); 
            File.WriteAllText(tempFile, json); 
         } 
 
         // We are ready, call the Server to perform the conversion and monitor the status of the job 
         Process.Start("Server.exe", tempFile); 
 
         bool isDone = false; 
         StatusJobData statusJobData = null; 
         while (!isDone) 
         { 
            // We could abort at any time by calling this 
            // StatusJobDataRunner.AbortJob(cache, jobData.UserToken, jobData.JobToken); 
 
            // Get the status of the job 
            statusJobData = StatusJobDataRunner.QueryJobStatus(cache, jobData.UserToken, jobData.JobToken); 
            if (statusJobData != null) 
            { 
               // Print the status message 
               Console.WriteLine("Status {0} IsCompleted {1} Abort {2} started at {3} jobStatus at {4} query at {5} - {6}", 
                  statusJobData.JobStatus, statusJobData.IsCompleted, statusJobData.Abort, 
                  ToLocalTime(statusJobData.JobStartedTimestamp), 
                  ToLocalTime(statusJobData.JobStatusTimestamp), 
                  ToLocalTime(statusJobData.QueryJobStatusTimestamp), 
                  statusJobData.JobStatusMessage); 
               if (statusJobData.IsCompleted) 
               { 
                  Console.WriteLine("Completed"); 
                  // The job has been completed, check the error messages (if any) 
                  var errorMessages = statusJobData.ErrorMessages; 
                  if (errorMessages != null && errorMessages.Length > 0) 
                  { 
                     foreach (var errorMessage in errorMessages) 
                        Console.WriteLine("{0}", errorMessage); 
                  } 
               } 
 
               isDone = statusJobData.IsCompleted; 
            } 
            else 
            { 
               Console.WriteLine("Did not start yet"); 
            } 
 
            // Breathe for a little bit 
            Thread.Sleep(100); 
         } 
 
         // Delete the job 
         StatusJobDataRunner.DeleteJob(cache, jobData.UserToken, jobData.JobToken); 
 
         // If successful, download the document 
         if (statusJobData.JobStatus != DocumentConverterJobStatus.Aborted) 
         { 
            Console.WriteLine("Downloading the document"); 
            using (var stream = File.Create(outputFile)) 
            { 
               var downloadDocumentOptions = new DownloadDocumentOptions(); 
               downloadDocumentOptions.Cache = cache; 
               downloadDocumentOptions.DocumentId = statusJobData.OutputDocumentUri.ToString(); 
               downloadDocumentOptions.Offset = 0; 
               downloadDocumentOptions.Length = -1; 
               downloadDocumentOptions.Stream = stream; 
               DocumentFactory.DownloadDocument(downloadDocumentOptions); 
            } 
         } 
 
 
         File.Delete(tempFile); 
 
         // Finally, delete the document from the cache since we finished it 
         var deleteFromCacheOptions = new LoadFromCacheOptions(); 
         deleteFromCacheOptions.Cache = cache; 
         deleteFromCacheOptions.DocumentId = documentId; 
         DocumentFactory.DeleteFromCache(deleteFromCacheOptions); 
 
         // Show the final document 
         if (statusJobData.JobStatus != DocumentConverterJobStatus.Aborted) 
         { 
            Process.Start(outputFile); 
         } 
      } 
 
      private static string LoadDocumentIntoCache(ObjectCache cache, string url) 
      { 
         // Load the document and save it to the cache 
         var loadDocumentOptions = new LoadDocumentOptions(); 
         loadDocumentOptions.Cache = cache; 
         Console.WriteLine("Client loading and saving document into the cache"); 
         string documentId; 
         using (LEADDocument document = DocumentFactory.LoadFromUri(new Uri(url), loadDocumentOptions)) 
         { 
            // Store the document ID to use it later 
            documentId = document.DocumentId; 
            // Make sure the document persist on the cache after we dispose it 
            document.AutoSaveToCache = false; 
            document.AutoDeleteFromCache = false; 
            document.SaveToCache(); 
         } 
 
         return documentId; 
      } 
 
      private static StatusJobData SetupJobData(ObjectCache cache, string documentId) 
      { 
         var jobData = new StatusJobData(); 
 
         // 
         // Status section 
         // 
 
         // First, we need a unique job token, create it from a new GUID 
         jobData.JobToken = Guid.NewGuid().ToString().Replace("-", ""); 
         // Next, we need a user token (ID) 
         jobData.UserToken = "TestUser"; 
 
         // We will use the same cache for all operations, so set it once 
         // We will use the cache ability to save its configuration and a policy as a simple string 
         // And since we are using a configuration, we do not need the other cache objects  
         jobData.StatusCacheConfiguration = cache.GetConfigurationString(); 
         jobData.StatusCachePolicy = new CacheItemPolicy().ToParsableString(); 
         jobData.StatusCache = null; 
         jobData.StatusCacheItemPolicy = null; 
 
         // If we have any user data, set it here, let us set a simple string 
         jobData.UserData = "MyUserData"; 
 
         // The following members: 
         // JobStatus, JobStatusPageNumber, JobStatusMessage, IsCompleted, Abort, JobStartedTimestamp, JobCompletedTimestamp, JobStatusTimestamp, QueryJobStatusTimestamp and ErrorMessages 
         // Are used when querying the job, so no need to set them here 
 
         // 
         // Options section 
         // 
 
         // For the document converter, we will create one and use its options 
         // To use the default options or if the settings are set on the server, then leave this to null. 
         using (var documentConverter = new DocumentConverter()) 
         { 
            documentConverter.Options.EnableSvgConversion = true; 
            documentConverter.Options.JobErrorMode = DocumentConverterJobErrorMode.Continue; 
            // Save the options as a simple string 
            jobData.DocumentConverterOptions = documentConverter.Options.SaveToString(); 
            // Do not use the object 
            jobData.DocumentConverter = null; 
         } 
 
         // Similar for the document writer options, if nothing is required 
         // To use the default options or if the settings are set on the server, then leave this to null. 
         var documentWriter = new DocumentWriter(); 
         var pdfOptions = documentWriter.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions; 
         pdfOptions.ImageOverText = true; 
         pdfOptions.DocumentType = PdfDocumentType.PdfA; 
         documentWriter.SetOptions(DocumentFormat.Pdf, pdfOptions); 
 
         using (var ms = new MemoryStream()) 
         { 
            documentWriter.SaveOptions(ms); 
            jobData.DocumentWriterOptions = Encoding.UTF8.GetString(ms.ToArray()); 
         } 
 
         // And the OCR options to use 
         // To use the default options or if the settings are set on the server, then leave this to null. 
         using (var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD)) 
         { 
            ocrEngine.Startup(null, null, null, null); 
            using (var ms = new MemoryStream()) 
            { 
               var ocrWriteXmlOptions = new OcrWriteXmlOptions(); 
               ocrWriteXmlOptions.Formatted = false; 
               ocrEngine.SettingManager.Save(ms, ocrWriteXmlOptions); 
               // Set the OCR engine name and settings 
               jobData.OcrEngineName = ocrEngine.EngineType.ToString(); 
               jobData.OcrEngineSettings = Encoding.UTF8.GetString(ms.ToArray()); 
            } 
         } 
 
         // 
         // Input section 
         // 
 
         // We will use the same cache, so no need to set the input cache, just the document ID 
         // Leaving InputCacheConfiguration and InputCache as null 
         jobData.InputCacheConfiguration = null; 
         jobData.InputCache = null; 
         jobData.InputDocumentId = documentId; 
         // We will convert all pages, so leave the default values for InputDocumentFirstPageNumber and InputDocumentLastPageNumber 
         jobData.InputDocumentFirstPageNumber = 0; 
         jobData.InputDocumentLastPageNumber = 0; 
 
         // 
         // Output section 
         // 
 
         // We will use the same cache, so no need to set the output cache or any of its policies, just the document ID 
         jobData.OutputCacheConfiguration = null; 
         jobData.OutputCachePolicy = null; 
         jobData.OutputCache = null; 
         jobData.OutputCacheItemPolicy = null; 
 
         // For the output document, we could pass the OutputDocumentId if we want to specify the ID of the output document or leave it null to let the service create one 
         jobData.OutputDocumentId = null; 
         // OutputDocumentUri is for output, so leave it for now 
         // We can set a name for the document, or leave it null to use the default (will be the name part of the input URL) 
         jobData.OutputDocumentName = null; 
 
         // 
         // Conversion options 
         // 
         jobData.DocumentFormat = DocumentFormat.Pdf; 
         jobData.RasterImageFormat = RasterImageFormat.Unknown; 
         jobData.RasterImageBitsPerPixel = 24; 
         jobData.JobName = "MyJob"; // Optional 
         jobData.AnnotationsMode = DocumentConverterAnnotationsMode.None; 
 
         return jobData; 
      } 
 
      private static string ToLocalTime(string timestamp) 
      { 
         if (string.IsNullOrEmpty(timestamp)) 
            return "not set"; 
         var date = DateTime.Parse(timestamp); 
         date = date.ToLocalTime(); 
         return date.ToString("T"); 
      } 
   }

Requirements

Target Platforms

Reference

StatusJobData Class

StatusJobData Members

Leadtools.Document.Converter Namespace

Download our FREE evaluation

Help Version 22.0.2023.5.10

Leadtools.Document.Converter Assembly

Introduction

Getting Started

Namespaces

Leadtools.Document.Converter Namespace

Assemblies