public char MissingSymbol { get; set; }
The character used as a symbol for the missing characters in the final output document.
The missing symbol is a special character that replaces another character that was recognized by the engine but could not be represented in the final output document, since the character does not exist in the current code page.
During the conversion to the final output document the IOcrDocument.Save or IOcrDocument.SaveXml methods will try to find a replacement character with a similar shape for one not supported by the current code page. Only if this attempt was not successful the missing symbol will be sent into the final output document.
The default missing symbol is "^" (the circumflex character)
For more information on recognition and rejected and missing symbols, refer to IOcrPage.Recognize.
Note: This property is not supported in the LEADTOOLS OCR Module - LEAD Engine
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Ocr;
using Leadtools.Forms.Common;
using Leadtools.Document.Writer;
using Leadtools.WinForms;
public void OcrDocumentManagerExample()
{
string tifFileName1 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif");
string tifFileName2 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr2.tif");
string outputDirectory = Path.Combine(LEAD_VARS.ImagesDir, "OutputDirectory");
// Create the output directory
if (Directory.Exists(outputDirectory))
Directory.Delete(outputDirectory, true);
Directory.CreateDirectory(outputDirectory);
// Create an instance of the engine
using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD))
{
// Start the engine using default parameters
Console.WriteLine("Starting up the engine...");
ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir);
// Create the OCR document
Console.WriteLine("Creating the OCR document...");
IOcrDocumentManager ocrDocumentManager = ocrEngine.DocumentManager;
using (IOcrDocument ocrDocument = ocrDocumentManager.CreateDocument())
{
// Add the pages to the document
Console.WriteLine("Adding the pages...");
ocrDocument.Pages.AddPage(tifFileName1, null);
ocrDocument.Pages.AddPage(tifFileName2, null);
// Recognize the pages to this document. Note, we did not call AutoZone, it will explicitly be called by Recognize
Console.WriteLine("Recognizing all the pages...");
ocrDocument.Pages.Recognize(null);
// Save to all the formats supported by this OCR engine
Array formats = Enum.GetValues(typeof(DocumentFormat));
foreach (DocumentFormat format in formats)
{
string friendlyName = DocumentWriter.GetFormatFriendlyName(format);
Console.WriteLine("Saving (using default options) to {0}...", friendlyName);
// Construct the output file name (output_directory + document_format_name + . + extension)
string extension = DocumentWriter.GetFormatFileExtension(format);
string outputFileName = Path.Combine(outputDirectory, format.ToString() + "." + extension);
// Save the document
ocrDocument.Save(outputFileName, format, null);
// If this is the LTD format, convert it to PDF
if (format == DocumentFormat.Ltd)
{
Console.WriteLine("Converting the LTD file to PDF...");
string pdfFileName = Path.Combine(outputDirectory, format.ToString() + "_pdf.pdf");
DocumentWriter docWriter = ocrEngine.DocumentWriterInstance;
docWriter.Convert(outputFileName, pdfFileName, DocumentFormat.Pdf);
}
}
// Now save to all the engine native formats (if any) supported by the engine
string[] engineFormats = ocrDocumentManager.GetSupportedEngineFormats();
foreach (string engineFormat in engineFormats)
{
string friendlyName = ocrDocumentManager.GetEngineFormatFriendlyName(engineFormat);
Console.WriteLine("Saving to engine native format {0}...", friendlyName);
// Construct the output file name (output_directory + "engine" + engine_format_name + . + extension)
string extension = ocrDocumentManager.GetEngineFormatFileExtension(engineFormat);
string outputFileName = Path.Combine(outputDirectory, "engine_" + engineFormat + "." + extension);
// To use this format, set it in the IOcrDocumentManager.EngineFormat and do a normal save using DocumentFormat.User
// Save the document
ocrDocumentManager.EngineFormat = engineFormat;
ocrDocument.Save(outputFileName, DocumentFormat.User, null);
}
}
// Shutdown the engine
// Note: calling Dispose will also automatically shutdown the engine if it has been started
Console.WriteLine("Shutting down...");
ocrEngine.Shutdown();
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";
public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime";
}