public void Save(
string fileName,
DocumentFormat format,
OcrProgressCallback callback
)
fileName
The name of the file to save the final output document to.
format
The document format to use. If this parameter is DocumentFormat.User, then the document saved using the native engine format set in IOcrDocumentManager.EngineFormat if the engine used supports native formats, otherwise an exception will be thrown. Note that saving the OCR results using the native engine formats may produce more accurate results in table and cell positions since the engine has access to extra data that is saved internally.
callback
Optional callback to show operation progress.
To save the output document to a .NET stream, use IOcrDocument.Save(Stream stream, DocumentFormat format, OcrProgressCallback callback).
To get the extension used commonly with the document format specified in format, use DocumentWriter.GetFormatFileExtension.
Each IOcrPage object in the Pages collection of this IOcrDocument object holds its recognition data internally. This data is used by this method to generate the final output document.
Typical OCR operation using the IOcrEngine involves starting up the engine then creating a new IOcrDocument object using the CreateDocument method before adding the pages into it and perform either automatic or manual zoning. Once this is done, you can use the IOcrPage.Recognize method of each page to collect the recognition data and store it internally in the page. After the recognition data is collected, you use the various IOcrDocument.Save methods to save the document to its final format as well as IOcrDocument.SaveXml to save as XML.
You can also use the IOcrPage.GetText method to return the recognition data as a simple String object.
You can use IOcrDocument.Save as many times as required to save the document to multiple formats. You can also continue to add and recognize pages (through the IOcrPage.Recognize method after you save the document.
For each IOcrPage that is not recognized (the user did not call Recognize and the value of the page IOcrPage.IsRecognized is still false) the IOcrDocument will insert an empty page into the final document.
To get the low level recognition data including the recognized characters and their confidence, use IOcrPage.GetRecognizedCharacters instead.
The IOcrDocument interface implements IDisposable, hence you must dispose the IOcrDocument object as soon as you are finished using it. Disposing an IOcrDocument object will free all the pages stored inside its IOcrDocument.Pages collection.
You can use the OcrProgressCallback to show the operation progress or to abort it. For more information and an example, refer to OcrProgressCallback.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Ocr;
using Leadtools.Forms.Common;
using Leadtools.Document.Writer;
using Leadtools.WinForms;
public void OcrDocumentManagerExample()
{
string tifFileName1 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif");
string tifFileName2 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr2.tif");
string outputDirectory = Path.Combine(LEAD_VARS.ImagesDir, "OutputDirectory");
// Create the output directory
if (Directory.Exists(outputDirectory))
Directory.Delete(outputDirectory, true);
Directory.CreateDirectory(outputDirectory);
// Create an instance of the engine
using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD))
{
// Start the engine using default parameters
Console.WriteLine("Starting up the engine...");
ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir);
// Create the OCR document
Console.WriteLine("Creating the OCR document...");
IOcrDocumentManager ocrDocumentManager = ocrEngine.DocumentManager;
using (IOcrDocument ocrDocument = ocrDocumentManager.CreateDocument())
{
// Add the pages to the document
Console.WriteLine("Adding the pages...");
ocrDocument.Pages.AddPage(tifFileName1, null);
ocrDocument.Pages.AddPage(tifFileName2, null);
// Recognize the pages to this document. Note, we did not call AutoZone, it will explicitly be called by Recognize
Console.WriteLine("Recognizing all the pages...");
ocrDocument.Pages.Recognize(null);
// Save to all the formats supported by this OCR engine
Array formats = Enum.GetValues(typeof(DocumentFormat));
foreach (DocumentFormat format in formats)
{
string friendlyName = DocumentWriter.GetFormatFriendlyName(format);
Console.WriteLine("Saving (using default options) to {0}...", friendlyName);
// Construct the output file name (output_directory + document_format_name + . + extension)
string extension = DocumentWriter.GetFormatFileExtension(format);
string outputFileName = Path.Combine(outputDirectory, format.ToString() + "." + extension);
// Save the document
ocrDocument.Save(outputFileName, format, null);
// If this is the LTD format, convert it to PDF
if (format == DocumentFormat.Ltd)
{
Console.WriteLine("Converting the LTD file to PDF...");
string pdfFileName = Path.Combine(outputDirectory, format.ToString() + "_pdf.pdf");
DocumentWriter docWriter = ocrEngine.DocumentWriterInstance;
docWriter.Convert(outputFileName, pdfFileName, DocumentFormat.Pdf);
}
}
// Now save to all the engine native formats (if any) supported by the engine
string[] engineFormats = ocrDocumentManager.GetSupportedEngineFormats();
foreach (string engineFormat in engineFormats)
{
string friendlyName = ocrDocumentManager.GetEngineFormatFriendlyName(engineFormat);
Console.WriteLine("Saving to engine native format {0}...", friendlyName);
// Construct the output file name (output_directory + "engine" + engine_format_name + . + extension)
string extension = ocrDocumentManager.GetEngineFormatFileExtension(engineFormat);
string outputFileName = Path.Combine(outputDirectory, "engine_" + engineFormat + "." + extension);
// To use this format, set it in the IOcrDocumentManager.EngineFormat and do a normal save using DocumentFormat.User
// Save the document
ocrDocumentManager.EngineFormat = engineFormat;
ocrDocument.Save(outputFileName, DocumentFormat.User, null);
}
}
// Shutdown the engine
// Note: calling Dispose will also automatically shutdown the engine if it has been started
Console.WriteLine("Shutting down...");
ocrEngine.Shutdown();
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";
public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime";
}