public interface IOcrZoneCharacters : IList<OcrCharacter>, ICollection<OcrCharacter>, IEnumerable<OcrCharacter>, IEnumerable To get the recognized characters of a page, call IOcrPage.GetRecognizedCharacters after IOcrPage.Recognize.
To update the recognized characters of a page, call IOcrPage.GetRecognizedCharacters before calling IOcrDocument.Save, IOcrDocument.SaveXml.
IOcrPageCharacters implements the standard IList<T>, ICollection<T> and IEnumerable<T> interfaces with items of type IOcrZoneCharacters. Each item in the IOcrPageCharacters contains a collection of the character collections of the zones.
The IOcrZoneCharacters interface contains a collection of the characters of a particular zones.
IOcrZoneCharacters also implements IList<T>, ICollection<T> and IEnumerable<T> interfaces but with items of type OcrCharacter. Each item in the IOcrZoneCharacters contains a collection of the characters of the zone.
For example, if you are interested in iterating through the characters of the 2nd zone in the page, you can do the following:
// Get the page charactersIOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters();// Get the 2nd zone characters. Note, the index is zero-based so the 2nd zone is at index 1// You can also iterate through the pageCharacters collection and fine the IOcrZoneCharacters item with ZoneIndex = 1IOcrZoneCharacters zoneCharacters = pageCharacters.FindZoneCharacters(1);// Loop through the charactersforeach(OcrCharacter ocrCharacter in zoneCharacters){// Do something with ocrCharacter}
using Leadtools;using Leadtools.Codecs;using Leadtools.Ocr;using Leadtools.Forms.Common;using Leadtools.Document.Writer;using Leadtools.WinForms;using Leadtools.Drawing;using Leadtools.ImageProcessing;using Leadtools.ImageProcessing.Color;public void RecognizedCharactersExample(){// Create an image with some text in itRasterImage image = new RasterImage(RasterMemoryFlags.Conventional, 640, 200, 24, RasterByteOrder.Bgr, RasterViewPerspective.TopLeft, null, IntPtr.Zero, 0);Rectangle imageRect = new Rectangle(0, 0, image.ImageWidth, image.ImageHeight);IntPtr hdc = RasterImagePainter.CreateLeadDC(image);using (Graphics g = Graphics.FromHdc(hdc)){g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;g.FillRectangle(Brushes.White, imageRect);using (Font f = new Font("Arial", 20, FontStyle.Regular))g.DrawString("Normal line", f, Brushes.Black, 0, 0);using (Font f = new Font("Arial", 20, FontStyle.Bold))g.DrawString("Bold, italic and underline", f, Brushes.Black, 0, 40);using (Font f = new Font("Courier New", 20, FontStyle.Regular))g.DrawString("Monospaced line", f, Brushes.Black, 0, 80);}RasterImagePainter.DeleteLeadDC(hdc);string textFileName = Path.Combine(LEAD_VARS.ImagesDir, "MyImageWithTest.txt");string pdfFileName = Path.Combine(LEAD_VARS.ImagesDir, "MyImageWithTest.pdf");// Create an instance of the engineusing (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD)){// Start the engine using default parametersocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir);// Create an OCR pageIOcrPage ocrPage = ocrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose);// Recognize this pageocrPage.Recognize(null);// Dump the characters into a text fileusing (StreamWriter writer = File.CreateText(textFileName)){IOcrPageCharacters ocrPageCharacters = ocrPage.GetRecognizedCharacters();foreach (IOcrZoneCharacters ocrZoneCharacters in ocrPageCharacters){// Show the words found in this zone. Get the word boundaries in inchesICollection<OcrWord> words = ocrZoneCharacters.GetWords();Console.WriteLine("Words:");foreach (OcrWord word in words)Console.WriteLine("Word: {0}, at {1}, characters index from {2} to {3}", word.Value, word.Bounds, word.FirstCharacterIndex, word.LastCharacterIndex);bool nextCharacterIsNewWord = true;for (int i = 0; i < ocrZoneCharacters.Count; i++){OcrCharacter ocrCharacter = ocrZoneCharacters[i];// Capitalize the first letter if this is a new wordif (nextCharacterIsNewWord)ocrCharacter.Code = Char.ToUpper(ocrCharacter.Code);writer.WriteLine("Code: {0}, Confidence: {1}, WordIsCertain: {2}, Bounds: {3}, Position: {4}, FontSize: {5}, FontStyle: {6}",ocrCharacter.Code,ocrCharacter.Confidence,ocrCharacter.WordIsCertain,ocrCharacter.Bounds,ocrCharacter.Position,ocrCharacter.FontSize,ocrCharacter.FontStyle);// If the character is bold, make it underlineif ((ocrCharacter.FontStyle & OcrCharacterFontStyle.Bold) == OcrCharacterFontStyle.Bold){ocrCharacter.FontStyle |= OcrCharacterFontStyle.Italic;ocrCharacter.FontStyle |= OcrCharacterFontStyle.Underline;}// Check if next character is the start of a new wordif ((ocrCharacter.Position & OcrCharacterPosition.EndOfWord) == OcrCharacterPosition.EndOfWord ||(ocrCharacter.Position & OcrCharacterPosition.EndOfLine) == OcrCharacterPosition.EndOfLine)nextCharacterIsNewWord = true;elsenextCharacterIsNewWord = false;ocrZoneCharacters[i] = ocrCharacter;}}// Replace the characters with the modified one before we saveocrPage.SetRecognizedCharacters(ocrPageCharacters);}// Create an OCR document so we can save the resultsusing (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile)){// Add the page and dispose itocrDocument.Pages.Add(ocrPage);ocrPage.Dispose();// Show the recognition results// Set the PDF options to save as PDF/A text onlyPdfDocumentOptions pdfOptions = ocrEngine.DocumentWriterInstance.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions;pdfOptions.DocumentType = PdfDocumentType.PdfA;pdfOptions.ImageOverText = false;ocrEngine.DocumentWriterInstance.SetOptions(DocumentFormat.Pdf, pdfOptions);ocrDocument.Save(pdfFileName, DocumentFormat.Pdf, null);// Open and check the result file, it should contain the following text// "Normal Line"// "Bold And Italic Line"// "Monospaced Line"// With the second line bold and underlined now}// Shutdown the engine// Note: calling Dispose will also automatically shutdown the engine if it has been startedocrEngine.Shutdown();}}static class LEAD_VARS{public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime";}
IOcrPage.SetRecognizedCharacters
IOcrPage.GetRecognizedCharacters
IOcrZoneCharacters Interface