←Select platform

UpdateWord Method

Summary

Updates or deletes a word in the recognized words list.

Syntax

C#
VB
C++
void UpdateWord(  
   IList<OcrWord> words, 
   int zoneIndex, 
   int wordIndex, 
   string newValue 
) 
Sub UpdateWord( _ 
   ByVal words As IList(Of OcrWord), _ 
   ByVal zoneIndex As Integer, _ 
   ByVal wordIndex As Integer, _ 
   ByVal newValue As String _ 
)  
void UpdateWord(  
   IList<OcrWord>^ words, 
   int zoneIndex, 
   int wordIndex, 
   String^ newValue 
)  

Parameters

words
A list of OcrWord. In most cases, the same list obtained from IOcrZoneCharacters.GetWords.

zoneIndex
The 0-based zone index of the words.

wordIndex
The 0-based index of the word in words to update.

newValue
The value of the new word. Use null to delete the word.

Remarks

You can use UpdateWord to modify the OCR recognition results by updating or deleting the words before optionally saving the results to the final output document. The C# and VB OCR Edit Demo uses this technique as well as the example below.

Example

This example will recognize a document, then: capitalize all "the", delete all "a" and replaces all instance of "color" with "water".

C#
VB
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Forms.Ocr; 
using Leadtools.Forms; 
using Leadtools.Forms.DocumentWriters; 
using Leadtools.WinForms; 
using Leadtools.Drawing; 
using Leadtools.ImageProcessing; 
using Leadtools.ImageProcessing.Color; 
 
public void OcrUpdateWordExample() 
{ 
   string tifFileName = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif"); 
   string pdfFileName1 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1_Original.pdf"); 
   string pdfFileName2 = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1_Modified.pdf"); 
 
   using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false)) 
   { 
      ocrEngine.Startup(null, null, null, LEAD_VARS.OcrAdvantageRuntimeDir); 
      using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument()) 
      { 
         // Recognize the TIFF file 
         IOcrPage ocrPage = ocrDocument.Pages.AddPage(tifFileName, null); 
         ocrPage.Recognize(null); 
 
         // Save the original recognition results to compare with the results 
         // we will modify 
         ocrDocument.Save(pdfFileName1, DocumentFormat.Pdf, null); 
 
         // Get the recognized words 
         IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters(); 
         foreach (IOcrZoneCharacters zoneCharacters in pageCharacters) 
         { 
            IList<OcrWord> words = zoneCharacters.GetWords(ocrPage.DpiX, ocrPage.DpiY, LogicalUnit.Pixel); 
 
            // Check for our words 
 
            // Note, we should not loop through words and change the collection. Remember, collections cannot 
            // be modified this way, instead, we have a helper method that returns the index of the word 
            // we are looking for then change the word. Repeat while no more words are found 
 
            int index; 
 
            // Capitalize all "the" 
            do 
            { 
               index = FindWord(words, "the", false); 
               if (index != -1) 
               { 
                  // We have one, update it 
                  pageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, "THE"); 
               } 
            } 
            while (index != -1); 
 
 
            // Delete all "a" 
            do 
            { 
               index = FindWord(words, "a", true); 
               if (index != -1) 
               { 
                  // We have one, update it 
                  pageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, null); 
               } 
            } 
            while (index != -1); 
 
 
            // Replace all "color" with "water" 
            do 
            { 
               index = FindWord(words, "color", true); 
               if (index != -1) 
               { 
                  // We have one, update it 
                  pageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, "water"); 
               } 
            } 
            while (index != -1); 
         } 
 
         // We are done, update the page recognized results 
         ocrPage.SetRecognizedCharacters(pageCharacters); 
 
         // Save this new results 
         ocrDocument.Save(pdfFileName2, DocumentFormat.Pdf, null); 
      } 
   } 
} 
 
private static int FindWord(IList<OcrWord> words, string value, bool ignoreCase) 
{ 
   if (words == null || words.Count == 0) 
   { 
      return -1; 
   } 
 
   for (int i = 0; i < words.Count; i++) 
   { 
      if (string.Compare(words[i].Value, value, ignoreCase) == 0) 
      { 
         // Found it 
         return i; 
      } 
   } 
 
   // Not found 
   return -1; 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\Users\Public\Documents\LEADTOOLS Images"; 
   public const string OcrAdvantageRuntimeDir = @"C:\LEADTOOLS 19\Bin\Common\OcrAdvantageRuntime"; 
} 
Imports Leadtools 
Imports Leadtools.Codecs 
Imports Leadtools.Forms.Ocr 
Imports Leadtools.Forms 
Imports Leadtools.Forms.DocumentWriters 
Imports Leadtools.WinForms 
Imports Leadtools.Drawing 
Imports Leadtools.ImageProcessing 
Imports Leadtools.ImageProcessing.Color 
 
<TestMethod> 
Public Sub OcrUpdateWordExample() 
   Dim tifFileName As String = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1.tif") 
   Dim pdfFileName1 As String = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1_Original.pdf") 
   Dim pdfFileName2 As String = Path.Combine(LEAD_VARS.ImagesDir, "Ocr1_Modified.pdf") 
 
   Using ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False) 
      ocrEngine.Startup(Nothing, Nothing, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir) 
      Using ocrDocument As IOcrDocument = ocrEngine.DocumentManager.CreateDocument() 
         ' Recognize the TIFF file 
         Dim ocrPage As IOcrPage = ocrDocument.Pages.AddPage(tifFileName, Nothing) 
         ocrPage.Recognize(Nothing) 
 
         ' Save the original recognition results to compare with the results 
         ' we will modify 
         ocrDocument.Save(pdfFileName1, DocumentFormat.Pdf, Nothing) 
 
         ' Get the recognized words 
         Dim pageCharacters As IOcrPageCharacters = ocrPage.GetRecognizedCharacters() 
         For Each zoneCharacters As IOcrZoneCharacters In pageCharacters 
            Dim words As IList(Of OcrWord) = zoneCharacters.GetWords(ocrPage.DpiX, ocrPage.DpiY, LogicalUnit.Pixel) 
 
            ' Check for our words 
 
            ' Note, we should not loop through words and change the collection. Remember, collections cannot 
            ' be modified this way, instead, we have a helper method that returns the index of the word 
            ' we are looking for then change the word. Repeat while no more words are found 
 
            Dim index As Integer 
 
            ' Capitilize all "the" 
            Do 
               index = FindWord(words, "the", False) 
               If index <> -1 Then 
                  ' We have one, update it 
                  pageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, "THE") 
               End If 
            Loop While index <> -1 
 
 
            ' Delete all "a" 
            Do 
               index = FindWord(words, "a", True) 
               If index <> -1 Then 
                  ' We have one, update it 
                  pageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, Nothing) 
               End If 
            Loop While index <> -1 
 
 
            ' Replace all "color" with "water" 
            Do 
               index = FindWord(words, "color", True) 
               If index <> -1 Then 
                  ' We have one, update it 
                  pageCharacters.UpdateWord(words, zoneCharacters.ZoneIndex, index, "water") 
               End If 
            Loop While index <> -1 
         Next 
 
         ' We are done, update the page recognized results 
         ocrPage.SetRecognizedCharacters(pageCharacters) 
 
         ' Save this new results 
         ocrDocument.Save(pdfFileName2, DocumentFormat.Pdf, Nothing) 
      End Using 
   End Using 
End Sub 
 
Private Shared Function FindWord(words As IList(Of OcrWord), value As String, ignoreCase As Boolean) As Integer 
   If words Is Nothing OrElse words.Count = 0 Then 
      Return -1 
   End If 
 
   For i As Integer = 0 To words.Count - 1 
      If String.Compare(words(i).Value, value, ignoreCase) = 0 Then 
         ' Found it 
         Return i 
      End If 
   Next 
 
   ' Not found 
   Return -1 
End Function 
 
Public NotInheritable Class LEAD_VARS 
   Public Const ImagesDir As String = "C:\Users\Public\Documents\LEADTOOLS Images" 
   Public Const OcrAdvantageRuntimeDir As String = "C:\LEADTOOLS 19\Bin\Common\OcrAdvantageRuntime" 
End Class 

Requirements

Target Platforms

Products | Support | Contact Us | Copyright Notices
© 1991-2017 LEAD Technologies, Inc. All Rights Reserved.
Leadtools.Forms.Ocr Assembly
Click or drag to resize