- Start Visual Studio .NET.
- Choose File->New->Project... from the menu.
- In the New Project dialog box, choose either "Visual C# Projects" or "Visual Basic Projects" in the Projects Type List, and choose "Windows Application" in the Templates List.
- Type the project name as "ScanToSearchablePDF" in the Project Name field, and then choose OK. If desired, type a new location for your project or select a directory using the Browse button, and then choose OK.
- In the "Solution Explorer" window, right-click on the "References" folder, and select "Add Reference..." from the context menu. In the "Add Reference..." dialog box, select the ".NET" tab and browse to LEADTOOLS For .NET "\LEAD Technologies\LEADTOOLS xx\Bin\DotNet\Win32 " folder and select the following DLLs:
- Leadtools.dll
- Leadtools.Codecs.dll
- Leadtools.Forms.DocumentWriters.dll
- Leadtools.Forms.Ocr.dll
- Leadtools.Forms.Ocr.Advantage.dll
- Leadtools.ImageProcessing.Core.dll
- Leadtools.Twain.dll
- Leadtools.WinForms.dll
Text | Name | Event |
Select Output Directory | buttonOutputDirectory | _miOutputDir_Click |
Select Scanning Device | buttonScanningDevice | _miSelectScanner_Click |
Acquire | buttonAcquire | _miScan_Click |
[Visual Basic]
Imports Leadtools[C#]
Imports Leadtools.Codecs
Imports Leadtools.Forms.DocumentWriters
Imports Leadtools.Forms.Ocr
Imports Leadtools.ImageProcessing.Core
Imports Leadtools.Twain
using Leadtools;In Form1.vb for Visual Basic or Form1.cs for C# class, declare the global variables. Each object is created globally to avoid destroying and recreating each object for each time it is used.
using Leadtools.Codecs;
using Leadtools.Forms.DocumentWriters;
using Leadtools.Forms.Ocr;
using Leadtools.ImageProcessing.Core;
using Leadtools.Twain;
[Visual Basic]
Public m_strSavePath As String = "C:\" Public WithEvents twain As TwainSession Private _ocrEngine As IOcrEngine[C#]
Private _ocrDocument As IOcrDocument
Dim deskew As New DeskewCommand Dim despeckle As New DespeckleCommand Dim dotRemove As New DotRemoveCommand Dim holepunchRemove As New HolePunchRemoveCommand Dim lineRemove As New LineRemoveCommand
public string m_strSavePath = @"C:\"; public TwainSession twain;Next, in the Form1_Load event create and initialize the objects. The user can now select an output directory (_miOutputDir_Click). If no output directory is selected the program will default to the C:\. The user can also choose the device to be used to scan the document (_miSelectScanner_Click). Also note that this tutorial is using "RasterDocumentFormatType.Pdf" output format. Alternatively the "RasterDocumentFormatType.PdfImageOnText" format could be used.
private IOcrEngine _ocrEngine;
private IOcrDocument _ocrDocument;
DeskewCommand deskew = new DeskewCommand(); DespeckleCommand despeckle = new DespeckleCommand(); DotRemoveCommand dotRemove = new DotRemoveCommand(); HolePunchRemoveCommand holepunchRemove = new HolePunchRemoveCommand(); LineRemoveCommand lineRemove = new LineRemoveCommand();
[Visual Basic]
Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load 'Unlock support for these features[C#]
RasterSupport.Unlock(RasterSupportType.Document, "YourKeyHere")
RasterSupport.Unlock(RasterSupportType.OcrAdvantage, "YourKeyHere")
RasterSupport.Unlock(RasterSupportType.OcrAdvantagePdfLeadOutput, "YourKeyHere") 'Initialize OCR object 'Change this path to your local machine path to locate the OCR component RasterDocumentEngine.EnginePath = "<LEADTOOLS_INSTALLDIR>\Bin\Common\OCRPlusRuntime" document = RasterDocumentEngine.Instance document.Startup() document.RecognitionDataFileName = Application.StartupPath + "\DataFile.rdf" Dim sro As Document.RasterDocumentResultOptions = document.SaveResultOptions sro.Format = RasterDocumentFormatType.Pdf sro.FormatLevel = RasterDocumentFormatLevel.Full document.SaveResultOptions = sro 'Initalize Twain object twain = New TwainSession twain.Startup(Me, "Manufacturer", "Product Family", "Version", "Application",TwainStartupFlags.None) 'Initialize DotRemove dotRemove.Flags = DotRemoveCommandFlags.UseDiagonals Or DotRemoveCommandFlags.UseSize dotRemove.MaximumDotHeight = 8 dotRemove.MaximumDotWidth = 8 dotRemove.MinimumDotHeight = 2 dotRemove.MinimumDotWidth = 2 'Initialize HolePunchRemove holepunchRemove.Flags = HolePunchRemoveCommandFlags.UseDpi Or HolePunchRemoveCommandFlags.UseCount Or _ HolePunchRemoveCommandFlags.UseLocation holepunchRemove.Location = HolePunchRemoveCommandLocation.Left 'Initialize LineRemove lineRemove.MaximumLineWidth = 9 lineRemove.MinimumLineLength = 400 lineRemove.Wall = 15 lineRemove.MaximumWallPercent = 10 lineRemove.Variance = 3 lineRemove.GapLength = 3 End Sub
private void Form1_Load(object sender, System.EventArgs e)
{
//Unlock support for these features
RasterSupport.Unlock(RasterSupportType.Document, "YourKeyHere");
RasterSupport.Unlock(RasterSupportType.OcrAdvantage, "YourKeyHere");
RasterSupport.Unlock(RasterSupportType.OcrAdvantagePdfLeadOutput, "YourKeyHere");
//Initialize OCR object
//Change this path to your local machine path to locate the OCR component
RasterDocumentEngine.EnginePath = @"<LEADTOOLS_INSTALLDIR>\Bin\Common\OCRPlusRuntime";
document = RasterDocumentEngine.Instance;
document.Startup();
document.RecognitionDataFileName = Application.StartupPath + "\\DataFile.rdf";
RasterDocumentResultOptions sro = document.SaveResultOptions;
sro.Format = RasterDocumentFormatType.Pdf;
sro.FormatLevel = RasterDocumentFormatLevel.Full;
document.SaveResultOptions = sro;
//Initalize Twain object
twain = new TwainSession();
twain.Startup(this, "Manufacturer", "Product Family", "Version", "Application", TwainStartupFlags.None);
twain.AcquirePage += new EventHandler<TwainAcquirePageEventArgs>(twain_AcquirePage);
//Initialize DotRemove
//In case your local machine halts with an exception
//asking for ScanSoft dlls, refer to online support pages
//on how to clear the error
dotRemove.Flags = DotRemoveCommandFlags.UseDiagonals | DotRemoveCommandFlags.UseSize;
dotRemove.MaximumDotHeight = 8;
dotRemove.MaximumDotWidth = 8;
dotRemove.MinimumDotHeight = 2;
dotRemove.MinimumDotWidth = 2;
//Initialize HolePunchRemove
holepunchRemove.Flags = HolePunchRemoveCommandFlags.UseDpi | HolePunchRemoveCommandFlags.UseCount |HolePunchRemoveCommandFlags.UseLocation;
holepunchRemove.Location = HolePunchRemoveCommandLocation.Left;
//Initialize LineRemove
lineRemove.MaximumLineWidth = 9;
lineRemove.MinimumLineLength = 400;
lineRemove.Wall = 15;
lineRemove.MaximumWallPercent = 10;
lineRemove.Variance = 3;
lineRemove.GapLength = 3;
}
Add code to the Form1_FormClosing to allow closing the panel, as follows:
[Visual Basic]
Private Sub Form1_FormClosing(ByVal sender As System.Object, ByVal e As System.Windows.Forms.FormClosingEventArgs) Handles MyBase.FormClosing
'Free resources
' Destory the document
_ocrDocument.Dispose()
' Shut down the OCR engine
_ocrEngine.Shutdown()
twain.Shutdown()
End Sub
[C#]
private void Form1_FormClosing(object sender, System.Windows.Forms.FormClosingEventArgs e)
{
//Free resources
// Destory the document
_ocrDocument.Dispose();
// Shut down the OCR engine
_ocrEngine.Shutdown();
twain.Shutdown();
}
Add code to select the output directory using the _miOutputDir_Click method, as follows:
[Visual Basic]
Private Sub _miOutputDir_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles _miOutputDir.Click
Dim dlg As New System.Windows.Forms.FolderBrowserDialog
If (dlg.ShowDialog() = Windows.Forms.DialogResult.OK) Then
m_strSavePath = dlg.SelectedPath + "\"
End If
End Sub
[C#]
private void _miOutputDir_Click(object sender, System.EventArgs e)
{
System.Windows.Forms.FolderBrowserDialog dlg = new System.Windows.Forms.FolderBrowserDialog();
if(dlg.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
m_strSavePath = dlg.SelectedPath + "\\";
}
}
Add code to select the scanner device using twain.SelectSource in _miSelectScanner_Click, as follows:
[Visual Basic]
Private Sub _miSelectScanner_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles _miSelectScanner.Click
twain.SelectSource(String.Empty)
End Sub
[C#]
private void _miSelectScanner_Click(object sender, System.EventArgs e)
{
twain.SelectSource(String.Empty);
}
Add code to initiate the scanning process. In the _miScan_Click event, any previous temp files created by the OCR will be deleted and any previous pages added to the OCR will be removed. Once twain.Acquire is called, the scanner interface is exposed, the user makes a selection, and the scanning process starts.
For each page acquired by the scanner the twain_AcquirePage event will be called. In this event each page is cleaned during scanning, then added to the OCR object. Once all pages are scanned, the twain.Acquire function will return. Then it OCR's all the pages (_ocrDocument.Pages.Recognize) and saves the results to a searchable PDF (_ocrDocument.Save).
Passing TwainUserInterfaceFlags.Show as the only parameter will show the TWAIN driver's user interface. Pass TwainUserInterfaceFlags.None so as not to show the TWAIN driver's interface. Opt to do this if the user is to be able to adjust the scanner settings or if a customized user interface is to be displayed.
[Visual Basic]
[C#]Private Sub _miScan_Click(ByVal sender As Object, ByVal e As EventArgs)
'Remove all previous pages
Do While _ocrDocument.Pages.Count > 0
_ocrDocument.Pages.Remove(_ocrDocument.Pages(0))
Loop
'Scan images
twain.Acquire(TwainUserInterfaceFlags.Show)
'OCR all the images
_ocrDocument.Pages.Recognize(Nothing)
'Save the results to PDF
_ocrDocument.Save(m_strSavePath & "Results.pdf", DocumentFormat.Pdf, Nothing)
' Show the PDF file we just saved
System.Diagnostics.Process.Start(m_strSavePath & "Results.pdf")
End Sub
Add code to acquire the document using twain_AcquirePage, as follows:private void _miScan_Click(object sender, EventArgs e)
{
//Remove all previous pages
while (_ocrDocument.Pages.Count > 0)
{
_ocrDocument.Pages.Remove(_ocrDocument.Pages[0]);
}
//Scan images
twain.Acquire(TwainUserInterfaceFlags.Show);
//OCR all the images
_ocrDocument.Pages.Recognize(null);
//Save the results to PDF
_ocrDocument.Save(m_strSavePath + "Results.pdf", DocumentFormat.Pdf, null);
// Show the PDF file we just saved
System.Diagnostics.Process.Start(m_strSavePath + "Results.pdf");
}
[Visual Basic]
Private Sub twain_AcquirePage(ByVal sender As Object, ByVal e As Leadtools.Twain.TwainAcquirePageEventArgs) Handles twain.AcquirePage
'Deskew
deskew.Run(e.Image)
'Despeckle
despeckle.Run(e.Image)
'Dot Remove
dotRemove.Run(e.Image)
'Hole Punch Remove
holepunchRemove.Run(e.Image)
'Vertical Line Remove
lineRemove.Type = LineRemoveCommandType.Vertical
lineRemove.Run(e.Image)
'Horizontal Line Remove
lineRemove.Type = LineRemoveCommandType.Horizontal
lineRemove.Run(e.Image)
'Add page to the OCR engine
_ocrDocument.Pages.AddPage(e.Image, Nothing)
End Sub
[C#]
private void twain_AcquirePage(object sender, Leadtools.Twain.TwainAcquirePageEventArgs e)
{
//Deskew
deskew.Run(e.Image);
//Despeckle
despeckle.Run(e.Image);
//Dot Remove
dotRemove.Run(e.Image);
//Hole Punch Remove
holepunchRemove.Run(e.Image);
//Vertical Line Remove
lineRemove.Type = LineRemoveCommandType.Vertical;
lineRemove.Run(e.Image);
//Horizontal Line Remove
lineRemove.Type = LineRemoveCommandType.Horizontal;
lineRemove.Run(e.Image);
//Add page to the OCR engine
_ocrDocument.Pages.AddPage(e.Image, null);
}
Build, and Run the program to test it.