public interface IOcrPageCharacters : IList<IOcrZoneCharacters>, ICollection<IOcrZoneCharacters>, IEnumerable<IOcrZoneCharacters>, IEnumerable To get the recognized characters of a page, call IOcrPage.GetRecognizedCharacters after IOcrPage.Recognize.
To update the recognized characters of a page, call IOcrPage.GetRecognizedCharacters before calling IOcrDocument.Save, IOcrDocument.SaveXml.
IOcrPageCharacters implements the standard IList<T>, ICollection<T> and IEnumerable<T> interfaces with items of type IOcrZoneCharacters. Thus each item in the IOcrPageCharacters contains a collection of the character collections of the zones.
The IOcrZoneCharacters interface contains a collection of the characters of a particular zones.
IOcrZoneCharacters also implements IList<T>, ICollection<T> and IEnumerable<T> interfaces but with items of type OcrCharacter. Each item in the IOcrZoneCharacters contains a collection of the characters of the zone.
The IOcrPageCharacters interface also contains the IOcrPageCharacters.UpdateWord method that allow to modify the OCR recognition results by updating or deleting the words before optionally saving the results to the final output document.
delete any word from For example, if you are interested in iterating through the characters of the 2nd zone in the page, you can do the following:
// Get the page charactersIOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters();// Get the 2nd zone characters. Note, index is zero-based so 2nd zone is index 1// You can also iterate through the pageCharacters collection and find the IOcrZoneCharacters item with ZoneIndex = 1IOcrZoneCharacters zoneCharacters = pageCharacters.FindZoneCharacters(1);// Loop through the charactersforeach(OcrCharacter ocrCharacter in zoneCharacters){// Do something with ocrCharacter}
using Leadtools;using Leadtools.Codecs;using Leadtools.Ocr;using Leadtools.Forms.Common;using Leadtools.Document.Writer;using Leadtools.WinForms;using Leadtools.Drawing;using Leadtools.ImageProcessing;using Leadtools.ImageProcessing.Color;public void RecognizedCharactersExample(){// Create an image with some text in itRasterImage image = new RasterImage(RasterMemoryFlags.Conventional, 640, 200, 24, RasterByteOrder.Bgr, RasterViewPerspective.TopLeft, null, IntPtr.Zero, 0);Rectangle imageRect = new Rectangle(0, 0, image.ImageWidth, image.ImageHeight);IntPtr hdc = RasterImagePainter.CreateLeadDC(image);using (Graphics g = Graphics.FromHdc(hdc)){g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;g.FillRectangle(Brushes.White, imageRect);using (Font f = new Font("Arial", 20, FontStyle.Regular))g.DrawString("Normal line", f, Brushes.Black, 0, 0);using (Font f = new Font("Arial", 20, FontStyle.Bold))g.DrawString("Bold, italic and underline", f, Brushes.Black, 0, 40);using (Font f = new Font("Courier New", 20, FontStyle.Regular))g.DrawString("Monospaced line", f, Brushes.Black, 0, 80);}RasterImagePainter.DeleteLeadDC(hdc);string textFileName = Path.Combine(LEAD_VARS.ImagesDir, "MyImageWithTest.txt");string pdfFileName = Path.Combine(LEAD_VARS.ImagesDir, "MyImageWithTest.pdf");// Create an instance of the engineusing (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD)){// Start the engine using default parametersocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir);// Create an OCR pageIOcrPage ocrPage = ocrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose);// Recognize this pageocrPage.Recognize(null);// Dump the characters into a text fileusing (StreamWriter writer = File.CreateText(textFileName)){IOcrPageCharacters ocrPageCharacters = ocrPage.GetRecognizedCharacters();foreach (IOcrZoneCharacters ocrZoneCharacters in ocrPageCharacters){// Show the words found in this zone. Get the word boundaries in inchesICollection<OcrWord> words = ocrZoneCharacters.GetWords();Console.WriteLine("Words:");foreach (OcrWord word in words)Console.WriteLine("Word: {0}, at {1}, characters index from {2} to {3}", word.Value, word.Bounds, word.FirstCharacterIndex, word.LastCharacterIndex);bool nextCharacterIsNewWord = true;for (int i = 0; i < ocrZoneCharacters.Count; i++){OcrCharacter ocrCharacter = ocrZoneCharacters[i];// Capitalize the first letter if this is a new wordif (nextCharacterIsNewWord)ocrCharacter.Code = Char.ToUpper(ocrCharacter.Code);writer.WriteLine("Code: {0}, Confidence: {1}, WordIsCertain: {2}, Bounds: {3}, Position: {4}, FontSize: {5}, FontStyle: {6}",ocrCharacter.Code,ocrCharacter.Confidence,ocrCharacter.WordIsCertain,ocrCharacter.Bounds,ocrCharacter.Position,ocrCharacter.FontSize,ocrCharacter.FontStyle);// If the character is bold, make it underlineif ((ocrCharacter.FontStyle & OcrCharacterFontStyle.Bold) == OcrCharacterFontStyle.Bold){ocrCharacter.FontStyle |= OcrCharacterFontStyle.Italic;ocrCharacter.FontStyle |= OcrCharacterFontStyle.Underline;}// Check if next character is the start of a new wordif ((ocrCharacter.Position & OcrCharacterPosition.EndOfWord) == OcrCharacterPosition.EndOfWord ||(ocrCharacter.Position & OcrCharacterPosition.EndOfLine) == OcrCharacterPosition.EndOfLine)nextCharacterIsNewWord = true;elsenextCharacterIsNewWord = false;ocrZoneCharacters[i] = ocrCharacter;}}// Replace the characters with the modified one before we saveocrPage.SetRecognizedCharacters(ocrPageCharacters);}// Create an OCR document so we can save the resultsusing (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile)){// Add the page and dispose itocrDocument.Pages.Add(ocrPage);ocrPage.Dispose();// Show the recognition results// Set the PDF options to save as PDF/A text onlyPdfDocumentOptions pdfOptions = ocrEngine.DocumentWriterInstance.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions;pdfOptions.DocumentType = PdfDocumentType.PdfA;pdfOptions.ImageOverText = false;ocrEngine.DocumentWriterInstance.SetOptions(DocumentFormat.Pdf, pdfOptions);ocrDocument.Save(pdfFileName, DocumentFormat.Pdf, null);// Open and check the result file, it should contain the following text// "Normal Line"// "Bold And Italic Line"// "Monospaced Line"// With the second line bold and underlined now}// Shutdown the engine// Note: calling Dispose will also automatically shutdown the engine if it has been startedocrEngine.Shutdown();}}static class LEAD_VARS{public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images";public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS22\Bin\Common\OcrLEADRuntime";}
IOcrPage.SetRecognizedCharacters
IOcrPage.GetRecognizedCharacters
IOcrPageCharacters Interface