Name of the OCR engine to use with the conversion.
public string OcrEngineName { get; set; }
The name of the OCR engine to use with the conversion. The default value is null.
If this property is set, then this value should be set to one of the OcrEngineType enumeration members as string. The runner will create a new IOcrEngine instance from this type and use it during the conversion.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Svg;
using LeadtoolsExamples.Common;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Document.Converter;
using Leadtools.Annotations.Rendering;
// This is Server.exe, gets a StatusJobData as JSON from the command line and run it
class Server
{
public static void Main(string[] args)
{
Console.WriteLine("Server running the job");
StatusJobData jobData = null;
// The command line contain the path to a StatusJobData saved as JSON, load it
string jsonFile = args[0];
using (var stream = File.OpenRead(jsonFile))
{
var jsonSerializer = new DataContractJsonSerializer(typeof(StatusJobData));
jobData = jsonSerializer.ReadObject(stream) as StatusJobData;
}
// Run it
using (var runner = new StatusJobDataRunner())
{
runner.Prepare(jobData);
runner.Run();
}
}
// This is Client.exe
class Client
{
public static void Main(string[] args)
{
// We will be converting this TIFF file to PDF
string imageUrl = "https://demo.leadtools.com/images/tiff/ocr.tif";
// Download the final document to this file
string outputFile = @"c:\temp\output.pdf";
// Setup the cache
FileCache cache = new FileCache();
// This should be changed to a network location if the service and client are not on the same machine
// Or use something like the Redis cache that supports access from multiple processes and machines
cache.CacheDirectory = @"c:\temp\cache";
string documentId = LoadDocumentIntoCache(cache, imageUrl);
// Setup the StatusJobData we will use for conversion
var jobData = SetupJobData(cache, documentId);
// We are ready, save the statusJobData as a JSON file and call the service
string tempFile = null;
using (var ms = new MemoryStream())
{
var jsonSerializer = new DataContractJsonSerializer(typeof(StatusJobData));
jsonSerializer.WriteObject(ms, jobData);
// Save it to a temp file
string json = Encoding.UTF8.GetString(ms.ToArray());
tempFile = Path.GetTempFileName();
File.WriteAllText(tempFile, json);
}
// We are ready, call the Server to perform the conversion and monitor the status of the job
Process.Start("Server.exe", tempFile);
bool isDone = false;
StatusJobData statusJobData = null;
while (!isDone)
{
// We could abort at any time by calling this
// StatusJobDataRunner.AbortJob(cache, jobData.UserToken, jobData.JobToken);
// Get the status of the job
statusJobData = StatusJobDataRunner.QueryJobStatus(cache, jobData.UserToken, jobData.JobToken);
if (statusJobData != null)
{
// Print the status message
Console.WriteLine("Status {0} IsCompleted {1} Abort {2} started at {3} jobStatus at {4} query at {5} - {6}",
statusJobData.JobStatus, statusJobData.IsCompleted, statusJobData.Abort,
ToLocalTime(statusJobData.JobStartedTimestamp),
ToLocalTime(statusJobData.JobStatusTimestamp),
ToLocalTime(statusJobData.QueryJobStatusTimestamp),
statusJobData.JobStatusMessage);
if (statusJobData.IsCompleted)
{
Console.WriteLine("Completed");
// The job has been completed, check the error messages (if any)
var errorMessages = statusJobData.ErrorMessages;
if (errorMessages != null && errorMessages.Length > 0)
{
foreach (var errorMessage in errorMessages)
Console.WriteLine("{0}", errorMessage);
}
}
isDone = statusJobData.IsCompleted;
}
else
{
Console.WriteLine("Did not start yet");
}
// Breathe for a little bit
Thread.Sleep(100);
}
// Delete the job
StatusJobDataRunner.DeleteJob(cache, jobData.UserToken, jobData.JobToken);
// If successful, download the document
if (statusJobData.JobStatus != DocumentConverterJobStatus.Aborted)
{
Console.WriteLine("Downloading the document");
using (var stream = File.Create(outputFile))
{
var downloadDocumentOptions = new DownloadDocumentOptions();
downloadDocumentOptions.Cache = cache;
downloadDocumentOptions.DocumentId = statusJobData.OutputDocumentUri.ToString();
downloadDocumentOptions.Offset = 0;
downloadDocumentOptions.Length = -1;
downloadDocumentOptions.Stream = stream;
DocumentFactory.DownloadDocument(downloadDocumentOptions);
}
}
File.Delete(tempFile);
// Finally, delete the document from the cache since we finished it
var deleteFromCacheOptions = new LoadFromCacheOptions();
deleteFromCacheOptions.Cache = cache;
deleteFromCacheOptions.DocumentId = documentId;
DocumentFactory.DeleteFromCache(deleteFromCacheOptions);
// Show the final document
if (statusJobData.JobStatus != DocumentConverterJobStatus.Aborted)
{
Process.Start(outputFile);
}
}
private static string LoadDocumentIntoCache(ObjectCache cache, string url)
{
// Load the document and save it to the cache
var loadDocumentOptions = new LoadDocumentOptions();
loadDocumentOptions.Cache = cache;
Console.WriteLine("Client loading and saving document into the cache");
string documentId;
using (LEADDocument document = DocumentFactory.LoadFromUri(new Uri(url), loadDocumentOptions))
{
// Store the document ID to use it later
documentId = document.DocumentId;
// Make sure the document persist on the cache after we dispose it
document.AutoSaveToCache = false;
document.AutoDeleteFromCache = false;
document.SaveToCache();
}
return documentId;
}
private static StatusJobData SetupJobData(ObjectCache cache, string documentId)
{
var jobData = new StatusJobData();
//
// Status section
//
// First, we need a unique job token, create it from a new GUID
jobData.JobToken = Guid.NewGuid().ToString().Replace("-", "");
// Next, we need a user token (ID)
jobData.UserToken = "TestUser";
// We will use the same cache for all operations, so set it once
// We will use the cache ability to save its configuration and a policy as a simple string
// And since we are using a configuration, we do not need the other cache objects
jobData.StatusCacheConfiguration = cache.GetConfigurationString();
jobData.StatusCachePolicy = new CacheItemPolicy().ToParsableString();
jobData.StatusCache = null;
jobData.StatusCacheItemPolicy = null;
// If we have any user data, set it here, let us set a simple string
jobData.UserData = "MyUserData";
// The following members:
// JobStatus, JobStatusPageNumber, JobStatusMessage, IsCompleted, Abort, JobStartedTimestamp, JobCompletedTimestamp, JobStatusTimestamp, QueryJobStatusTimestamp and ErrorMessages
// Are used when querying the job, so no need to set them here
//
// Options section
//
// For the document converter, we will create one and use its options
// To use the default options or if the settings are set on the server, then leave this to null.
using (var documentConverter = new DocumentConverter())
{
documentConverter.Options.EnableSvgConversion = true;
documentConverter.Options.JobErrorMode = DocumentConverterJobErrorMode.Continue;
// Save the options as a simple string
jobData.DocumentConverterOptions = documentConverter.Options.SaveToString();
// Do not use the object
jobData.DocumentConverter = null;
}
// Similar for the document writer options, if nothing is required
// To use the default options or if the settings are set on the server, then leave this to null.
var documentWriter = new DocumentWriter();
var pdfOptions = documentWriter.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions;
pdfOptions.ImageOverText = true;
pdfOptions.DocumentType = PdfDocumentType.PdfA;
documentWriter.SetOptions(DocumentFormat.Pdf, pdfOptions);
using (var ms = new MemoryStream())
{
documentWriter.SaveOptions(ms);
jobData.DocumentWriterOptions = Encoding.UTF8.GetString(ms.ToArray());
}
// And the OCR options to use
// To use the default options or if the settings are set on the server, then leave this to null.
using (var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD))
{
ocrEngine.Startup(null, null, null, null);
using (var ms = new MemoryStream())
{
var ocrWriteXmlOptions = new OcrWriteXmlOptions();
ocrWriteXmlOptions.Formatted = false;
ocrEngine.SettingManager.Save(ms, ocrWriteXmlOptions);
// Set the OCR engine name and settings
jobData.OcrEngineName = ocrEngine.EngineType.ToString();
jobData.OcrEngineSettings = Encoding.UTF8.GetString(ms.ToArray());
}
}
//
// Input section
//
// We will use the same cache, so no need to set the input cache, just the document ID
// Leaving InputCacheConfiguration and InputCache as null
jobData.InputCacheConfiguration = null;
jobData.InputCache = null;
jobData.InputDocumentId = documentId;
// We will convert all pages, so leave the default values for InputDocumentFirstPageNumber and InputDocumentLastPageNumber
jobData.InputDocumentFirstPageNumber = 0;
jobData.InputDocumentLastPageNumber = 0;
//
// Output section
//
// We will use the same cache, so no need to set the output cache or any of its policies, just the document ID
jobData.OutputCacheConfiguration = null;
jobData.OutputCachePolicy = null;
jobData.OutputCache = null;
jobData.OutputCacheItemPolicy = null;
// For the output document, we could pass the OutputDocumentId if we want to specify the ID of the output document or leave it null to let the service create one
jobData.OutputDocumentId = null;
// OutputDocumentUri is for output, so leave it for now
// We can set a name for the document, or leave it null to use the default (will be the name part of the input URL)
jobData.OutputDocumentName = null;
//
// Conversion options
//
jobData.DocumentFormat = DocumentFormat.Pdf;
jobData.RasterImageFormat = RasterImageFormat.Unknown;
jobData.RasterImageBitsPerPixel = 24;
jobData.JobName = "MyJob"; // Optional
jobData.AnnotationsMode = DocumentConverterAnnotationsMode.None;
return jobData;
}
private static string ToLocalTime(string timestamp)
{
if (string.IsNullOrEmpty(timestamp))
return "not set";
var date = DateTime.Parse(timestamp);
date = date.ToLocalTime();
return date.ToString("T");
}
}