Imports Leadtools
Imports Leadtools.Codecs
Imports Leadtools.Forms.DocumentWriters
Imports Leadtools.Forms.Ocr
Public Sub AppendLtdExample()
' Get a multi-page source file
Dim inputFileName As String = GetImageFileName()
Dim outputFileName As String = Path.Combine(LEAD_VARS.ImagesDir, "AppendLtdExample.pdf")
' This is the LTD file we will use to append all recognition data
Dim mainLtdFileName As String = Path.GetTempFileName()
If (File.Exists(outputFileName)) Then File.Delete(outputFileName)
If (File.Exists(mainLtdFileName)) Then File.Delete(mainLtdFileName)
' Use OCR Advantage engine
Using ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False)
ocrEngine.Startup(Nothing, Nothing, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir)
' We can re-use LTD files, so create one
Dim pageLtdFileName As String = Path.GetTempFileName()
' Get number of pages
Dim pageCount As Integer = ocrEngine.RasterCodecsInstance.GetTotalPages(inputFileName)
For pageNumber As Integer = 1 To pageCount
' OCR this page and save it as LTD
Console.WriteLine("Processing page {0} of {1}", pageNumber, pageCount)
RecognizeAndSaveLtd(ocrEngine, inputFileName, pageNumber, pageLtdFileName)
' Append this LTD to the main one
' Notice, first time, the main LTD does not exist, AppendLtd will
' just copy the data over from the source file
ocrEngine.DocumentWriterInstance.AppendLtd(pageLtdFileName, mainLtdFileName)
Next
' No need for this anymore
File.Delete(pageLtdFileName)
' We are done, convert the LTD to final format, here, we will
' use PDF
Console.WriteLine("Converting to final format")
ocrEngine.DocumentWriterInstance.Convert(mainLtdFileName, outputFileName, DocumentFormat.Pdf)
End Using
Console.WriteLine("Success, file {0} is created", outputFileName)
End Sub
Private Shared Sub RecognizeAndSaveLtd(ocrEngine As IOcrEngine, inputFileName As String, pageNumber As Integer, pageLtdFileName As String)
' Delete the LTD file if it exists so we can put fresh data in it
If (File.Exists(pageLtdFileName)) Then File.Delete(pageLtdFileName)
' Create an OCR document
Using ocrDocument As IOcrDocument = ocrEngine.DocumentManager.CreateDocument()
' Load the page
Console.WriteLine(" Loading the page")
ocrDocument.Pages.AddPages(inputFileName, pageNumber, pageNumber, Nothing)
Dim ocrPage As IOcrPage = ocrDocument.Pages(0)
' Auto-zone it
Console.WriteLine(" Auto-zoning the page")
ocrPage.AutoZone(Nothing)
' Recognize it
Console.WriteLine(" Recognizing the page")
ocrPage.Recognize(Nothing)
' Save it
Console.WriteLine(" Saving the page")
ocrDocument.Save(pageLtdFileName, DocumentFormat.Ltd, Nothing)
End Using
End Sub
Private Shared Function GetImageFileName() As String
Dim pageTileTemplate As String = Path.Combine(LEAD_VARS.ImagesDir, "Ocr{0}.tif")
Dim multiPageImageFileName As String = Path.Combine(LEAD_VARS.ImagesDir, "AppendLtdExample.tif")
If File.Exists(multiPageImageFileName) Then File.Delete(multiPageImageFileName)
' Create a multi-page TIF file by stitching OCR1 to OCR4.tif shipped with LEADTOOLS
Using codecs As New RasterCodecs()
Dim finalImage As RasterImage = Nothing
For page As Integer = 1 To 4
Dim pageImage As RasterImage = codecs.Load(String.Format(pageTileTemplate, page))
If IsNothing(finalImage) Then
finalImage = pageImage
Else
finalImage.AddPage(pageImage)
pageImage.Dispose()
End If
Next
' Save the final image
codecs.Save(finalImage, multiPageImageFileName, RasterImageFormat.CcittGroup4, 1)
End Using
Return multiPageImageFileName
End Function
Public NotInheritable Class LEAD_VARS
Public Const ImagesDir As String = "C:\Users\Public\Documents\LEADTOOLS Images"
Public Const OcrAdvantageRuntimeDir As String = "C:\LEADTOOLS 19\Bin\Common\OcrAdvantageRuntime"
End Class
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Forms.DocumentWriters;
using Leadtools.Forms.Ocr;
public void AppendLtdExample()
{
// Get a multi-page source file
var inputFileName = GetImageFileName();
var outputFileName = Path.Combine(LEAD_VARS.ImagesDir, "AppendLtdExample.pdf");
// This is the LTD file we will use to append all recognition data
string mainLtdFileName = Path.GetTempFileName();
if (File.Exists(outputFileName))
File.Delete(outputFileName);
if (File.Exists(mainLtdFileName))
File.Delete(mainLtdFileName);
// Use OCR Advantage engine
using (var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false))
{
ocrEngine.Startup(null, null, null, LEAD_VARS.OcrAdvantageRuntimeDir);
// We can re-use LTD files, so create one
var pageLtdFileName = Path.GetTempFileName();
// Get number of pages
var pageCount = ocrEngine.RasterCodecsInstance.GetTotalPages(inputFileName);
for (var pageNumber = 1; pageNumber <= pageCount; pageNumber++)
{
// OCR this page and save it as LTD
Console.WriteLine("Processing page {0} of {1}", pageNumber, pageCount);
RecognizeAndSaveLtd(ocrEngine, inputFileName, pageNumber, pageLtdFileName);
// Append this LTD to the main one
// Notice, first time, the main LTD does not exist, AppendLtd will
// just copy the data over from the source file
ocrEngine.DocumentWriterInstance.AppendLtd(pageLtdFileName, mainLtdFileName);
}
// No need for this anymore
File.Delete(pageLtdFileName);
// We are done, convert the LTD to final format, here, we will
// use PDF
Console.WriteLine("Converting to final format");
ocrEngine.DocumentWriterInstance.Convert(mainLtdFileName, outputFileName, DocumentFormat.Pdf);
}
Console.WriteLine("Success, file {0} is created", outputFileName);
}
private static void RecognizeAndSaveLtd(IOcrEngine ocrEngine, string inputFileName, int pageNumber, string pageLtdFileName)
{
// Delete the LTD file if it exists so we can put fresh data in it
if (File.Exists(pageLtdFileName))
File.Delete(pageLtdFileName);
// Create an OCR document
using (var ocrDocument = ocrEngine.DocumentManager.CreateDocument())
{
// Load the page
Console.WriteLine(" Loading the page");
ocrDocument.Pages.AddPages(inputFileName, pageNumber, pageNumber, null);
var ocrPage = ocrDocument.Pages[0];
// Auto-zone it
Console.WriteLine(" Auto-zoning the page");
ocrPage.AutoZone(null);
// Recognize it
Console.WriteLine(" Recognizing the page");
ocrPage.Recognize(null);
// Save it
Console.WriteLine(" Saving the page");
ocrDocument.Save(pageLtdFileName, DocumentFormat.Ltd, null);
}
}
private static string GetImageFileName()
{
var pageTileTemplate = Path.Combine(LEAD_VARS.ImagesDir, "Ocr{0}.tif");
var multiPageImageFileName = Path.Combine(LEAD_VARS.ImagesDir, "AppendLtdExample.tif");
if (File.Exists(multiPageImageFileName))
File.Delete(multiPageImageFileName);
// Create a multi-page TIF file by stitching OCR1 to OCR4.tif shipped with LEADTOOLS
using (var codecs = new RasterCodecs())
{
RasterImage finalImage = null;
for (int page = 1; page <= 4; page++)
{
var pageImage = codecs.Load(string.Format(pageTileTemplate, page));
if (finalImage == null)
{
finalImage = pageImage;
}
else
{
finalImage.AddPage(pageImage);
pageImage.Dispose();
}
}
// Save the final image
codecs.Save(finalImage, multiPageImageFileName, RasterImageFormat.CcittGroup4, 1);
}
return multiPageImageFileName;
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\Users\Public\Documents\LEADTOOLS Images";
public const string OcrAdvantageRuntimeDir = @"C:\LEADTOOLS 19\Bin\Common\OcrAdvantageRuntime";
}