public int CellIndex { get; set; }
The index of the cell in the table that contains this character.
This value is applicable only for OcrZoneType.Table zones. This is the index into the cells array if this zone is a table zone. You can use these values to detect the row and column value of a character or word inside the table. This information is only available if IOcrPage.TableZoneManager contains a valid IOcrTableZoneManager object and cell manipulation is supported by the engine. Currently, only the LEADTOOLS OmniPage engine supports table cells.
To get the recognized characters of a page, call IOcrPage.GetRecognizedCharacters after IOcrPage.Recognize.
To update the recognized characters of a page, call IOcrPage.GetRecognizedCharacters before calling IOcrDocument.Save or IOcrDocument.SaveXml.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Ocr;
using Leadtools.Forms.Common;
using Leadtools.Document.Writer;
using Leadtools.WinForms;
using Leadtools.Drawing;
using Leadtools.ImageProcessing;
using Leadtools.ImageProcessing.Color;
public void RecognizedCharactersExample()
{
// Create an image with some text in it
RasterImage image = new RasterImage(RasterMemoryFlags.Conventional, 640, 200, 24, RasterByteOrder.Bgr, RasterViewPerspective.TopLeft, null, IntPtr.Zero, 0);
Rectangle imageRect = new Rectangle(0, 0, image.ImageWidth, image.ImageHeight);
IntPtr hdc = RasterImagePainter.CreateLeadDC(image);
using (Graphics g = Graphics.FromHdc(hdc))
{
g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;
g.FillRectangle(Brushes.White, imageRect);
using (Font f = new Font("Arial", 20, FontStyle.Regular))
g.DrawString("Normal line", f, Brushes.Black, 0, 0);
using (Font f = new Font("Arial", 20, FontStyle.Bold))
g.DrawString("Bold, italic and underline", f, Brushes.Black, 0, 40);
using (Font f = new Font("Courier New", 20, FontStyle.Regular))
g.DrawString("Monospaced line", f, Brushes.Black, 0, 80);
}
RasterImagePainter.DeleteLeadDC(hdc);
string textFileName = Path.Combine(LEAD_VARS.ImagesDir, "MyImageWithTest.txt");
string pdfFileName = Path.Combine(LEAD_VARS.ImagesDir, "MyImageWithTest.pdf");
// Create an instance of the engine
using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD))
{
// Start the engine using default parameters
ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir);
// Create an OCR page
IOcrPage ocrPage = ocrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose);
// Recognize this page
ocrPage.Recognize(null);
// Dump the characters into a text file
using (StreamWriter writer = File.CreateText(textFileName))
{
IOcrPageCharacters ocrPageCharacters = ocrPage.GetRecognizedCharacters();
foreach (IOcrZoneCharacters ocrZoneCharacters in ocrPageCharacters)
{
// Show the words found in this zone. Get the word boundaries in inches
ICollection<OcrWord> words = ocrZoneCharacters.GetWords();
Console.WriteLine("Words:");
foreach (OcrWord word in words)
Console.WriteLine("Word: {0}, at {1}, characters index from {2} to {3}", word.Value, word.Bounds, word.FirstCharacterIndex, word.LastCharacterIndex);
bool nextCharacterIsNewWord = true;
for (int i = 0; i < ocrZoneCharacters.Count; i++)
{
OcrCharacter ocrCharacter = ocrZoneCharacters[i];
// Capitalize the first letter if this is a new word
if (nextCharacterIsNewWord)
ocrCharacter.Code = Char.ToUpper(ocrCharacter.Code);
writer.WriteLine("Code: {0}, Confidence: {1}, WordIsCertain: {2}, Bounds: {3}, Position: {4}, FontSize: {5}, FontStyle: {6}",
ocrCharacter.Code,
ocrCharacter.Confidence,
ocrCharacter.WordIsCertain,
ocrCharacter.Bounds,
ocrCharacter.Position,
ocrCharacter.FontSize,
ocrCharacter.FontStyle);
// If the character is bold, make it underline
if ((ocrCharacter.FontStyle & OcrCharacterFontStyle.Bold) == OcrCharacterFontStyle.Bold)
{
ocrCharacter.FontStyle |= OcrCharacterFontStyle.Italic;
ocrCharacter.FontStyle |= OcrCharacterFontStyle.Underline;
}
// Check if next character is the start of a new word
if ((ocrCharacter.Position & OcrCharacterPosition.EndOfWord) == OcrCharacterPosition.EndOfWord ||
(ocrCharacter.Position & OcrCharacterPosition.EndOfLine) == OcrCharacterPosition.EndOfLine)
nextCharacterIsNewWord = true;
else
nextCharacterIsNewWord = false;
ocrZoneCharacters[i] = ocrCharacter;
}
}
// Replace the characters with the modified one before we save
ocrPage.SetRecognizedCharacters(ocrPageCharacters);
}
// Create an OCR document so we can save the results
using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile))
{
// Add the page and dispose it
ocrDocument.Pages.Add(ocrPage);
ocrPage.Dispose();
// Show the recognition results
// Set the PDF options to save as PDF/A text only
PdfDocumentOptions pdfOptions = ocrEngine.DocumentWriterInstance.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions;
pdfOptions.DocumentType = PdfDocumentType.PdfA;
pdfOptions.ImageOverText = false;
ocrEngine.DocumentWriterInstance.SetOptions(DocumentFormat.Pdf, pdfOptions);
ocrDocument.Save(pdfFileName, DocumentFormat.Pdf, null);
// Open and check the result file, it should contain the following text
// "Normal Line"
// "Bold And Italic Line"
// "Monospaced Line"
// With the second line bold and underlined now
}
// Shutdown the engine
// Note: calling Dispose will also automatically shutdown the engine if it has been started
ocrEngine.Shutdown();
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";
public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime";
}