Contains the text characters and words found in a document page.
[SerializableAttribute()][DataContractAttribute(Name="DocumentPageText")]public class DocumentPageText
<SerializableAttribute()>Public Class DocumentPageText
[SerializableAttribute()]public ref class DocumentPageText
The text of a document page can be read by using the DocumentObjectManager.ParsePageText method. The text characters found in the page will be set in the in Characters property of the returned DocumentPageText object.
The text words are created from the characters found in the document based on the DocumentCharacter.IsEndOfWord returned by document reader engine. Whenever an "end of word" is found, the last set of characters are grouped together and stored as an item in the DocumentPageText.Words list.
The overall text string (with no extra properties) can be obtained using the DocumentPageText.BuildText method.
Public Sub DocumentPageTextExample()Dim documentFileName As StringUsing dlg As New OpenFileDialog()If dlg.ShowDialog() <> System.Windows.Forms.DialogResult.OK ThenReturnEnd IfdocumentFileName = dlg.FileNameEnd Using' Load the document at 200 DPIDim loadOptions As New DocumentReaderLoadOptions()loadOptions.Resolution = 200Dim reader As DocumentReader = DocumentReader.Create(documentFileName, loadOptions)' If this is a Raster document such as TIFF or JPEG, we must use an OCR engineDim ocrEngine As IOcrEngine = NothingIf reader.ReaderType = DocumentReaderType.Raster ThenocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False)ocrEngine.Startup(Nothing, Nothing, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir)End Ifreader.ObjectManager.BeginParse(ocrEngine)' Get the text of the first pageDim page As DocumentReaderPage = reader.Pages(0)Dim pageText As DocumentPageText = reader.ObjectManager.ParsePageText(page)' Create the bitmap to draw the objects toUsing btmp As New Bitmap(page.PixelWidth, page.PixelHeight)btmp.SetResolution(CType(page.DpiX, Single), CType(page.DpiY, Single))Using g As Graphics = Graphics.FromImage(btmp)g.Clear(Color.White)' Render the objects' Text is a word at a timeDim textRect As LogicalRectangle = LogicalRectangle.EmptyDim textFontHeight As Double = 0Dim textWord As New StringBuilder()For Each character As DocumentCharacter In pageText.Characters' Add the text code and rects togethertextWord.Append(character.Code)If textRect.IsEmpty ThentextRect = character.BoundsElsetextRect = LogicalRectangle.Union(textRect, character.Bounds)End IftextFontHeight = Math.Max(textFontHeight, character.FontSize)' If this is the last object in a word, render itIf character.IsEndOfWord OrElse character.IsEndOfLine OrElse character.IsEndOfParagraph OrElse character.IsEndOfPage ThenRenderText(g, pageText, textWord.ToString(), textRect, character, textFontHeight)textWord = New StringBuilder()textRect = LogicalRectangle.EmptyEnd IfNext' Save the result as PNGUsing saveDlg As New SaveFileDialog()saveDlg.Filter = "PNG files|*.png"If saveDlg.ShowDialog() = System.Windows.Forms.DialogResult.OK Thenbtmp.Save(saveDlg.FileName, System.Drawing.Imaging.ImageFormat.Png)End IfEnd UsingEnd UsingEnd Usingreader.ObjectManager.EndParse()If Not IsNothing(ocrEngine) ThenocrEngine.Dispose()End Ifreader.Dispose()End SubPrivate Shared Sub RenderText(ByVal g As Graphics, ByVal pageText As DocumentPageText, _ByVal text As String, ByVal textRect As LogicalRectangle, _ByVal character As DocumentCharacter, ByVal textFontHeight _As Double)' Create the fontDim font As DocumentFont = pageText.Fonts(character.FontIndex)Dim faceName As String = font.FaceNameIf String.IsNullOrEmpty(faceName) Then' Could be an embedded font, use ArialfaceName = "Arial"End IfDim fontStyle As FontStyle = fontStyle.RegularIf (font.FontStyle And DocumentFontStyle.Bold) = DocumentFontStyle.Bold ThenfontStyle = fontStyle Or fontStyle.BoldEnd IfIf (font.FontStyle And DocumentFontStyle.Italic) = DocumentFontStyle.Italic ThenfontStyle = fontStyle Or fontStyle.ItalicEnd IfIf (font.FontStyle And DocumentFontStyle.Underline) = DocumentFontStyle.Underline ThenfontStyle = fontStyle Or fontStyle.UnderlineEnd IfUsing f As New Font(faceName, CType(textFontHeight * 72 / g.DpiY, Single), fontStyle)Dim rect As New Rectangle(CType(textRect.X, Integer), CType(textRect.Y, Integer), _CType(textRect.Width, Integer), CType(textRect.Height, _Integer))Using sf As New StringFormat()sf.Alignment = StringAlignment.Centersf.LineAlignment = StringAlignment.Centersf.FormatFlags = sf.FormatFlags Or StringFormatFlags.NoClip Or StringFormatFlags.NoWrapg.DrawString(text, f, Brushes.Black, rect, sf)End UsingEnd UsingEnd Sub
public void DocumentPageTextExample(){string documentFileName;using(OpenFileDialog dlg = new OpenFileDialog()){if(dlg.ShowDialog() != DialogResult.OK){return;}documentFileName = dlg.FileName;}// Load the document at 200 DPIDocumentReaderLoadOptions loadOptions = new DocumentReaderLoadOptions();loadOptions.Resolution = 200;DocumentReader reader = DocumentReader.Create(documentFileName, loadOptions);// If this is a Raster document such as TIFF or JPEG, we must use an OCR engineIOcrEngine ocrEngine = null;if(reader.ReaderType == DocumentReaderType.Raster){ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false);ocrEngine.Startup(null, null, null, LEAD_VARS.OcrAdvantageRuntimeDir);}reader.ObjectManager.BeginParse(ocrEngine);// Get the text of the first pageDocumentReaderPage page = reader.Pages[0];DocumentPageText pageText = reader.ObjectManager.ParsePageText(page);// Create the bitmap to draw the objects tousing(Bitmap btmp = new Bitmap(page.PixelWidth, page.PixelHeight)){btmp.SetResolution((float)page.DpiX, (float)page.DpiY);using(Graphics g = Graphics.FromImage(btmp)){g.Clear(Color.White);// Render the objects// Text is a word at a timeLogicalRectangle textRect = LogicalRectangle.Empty;double textFontHeight = 0;StringBuilder textWord = new StringBuilder();foreach(DocumentCharacter character in pageText.Characters){// Add the text code and rects togethertextWord.Append(character.Code);if(textRect.IsEmpty){textRect = character.Bounds;}else{textRect = LogicalRectangle.Union(textRect, character.Bounds);}textFontHeight = Math.Max(textFontHeight, character.FontSize);// If this is the last object in a word, render itif(character.IsEndOfWord || character.IsEndOfLine || character.IsEndOfParagraph|| character.IsEndOfPage){RenderText(g, pageText, textWord.ToString(), textRect, character, textFontHeight);textWord = new StringBuilder();textRect = LogicalRectangle.Empty;}}// Save the result as PNGusing(SaveFileDialog saveDlg = new SaveFileDialog()){saveDlg.Filter = "PNG files|*.png";if(saveDlg.ShowDialog() == DialogResult.OK){btmp.Save(saveDlg.FileName, System.Drawing.Imaging.ImageFormat.Png);}}}}reader.ObjectManager.EndParse();if(ocrEngine != null){ocrEngine.Dispose();}reader.Dispose();}private static void RenderText(Graphics g, DocumentPageText pageText, string text,LogicalRectangle textRect, DocumentCharacter character,double textFontHeight){// Create the fontDocumentFont font = pageText.Fonts[character.FontIndex];string faceName = font.FaceName;if(string.IsNullOrEmpty(faceName)){// Could be an embedded font, use ArialfaceName = "Arial";}FontStyle fontStyle = FontStyle.Regular;if((font.FontStyle & DocumentFontStyle.Bold) == DocumentFontStyle.Bold){fontStyle |= FontStyle.Bold;}if((font.FontStyle & DocumentFontStyle.Italic) == DocumentFontStyle.Italic){fontStyle |= FontStyle.Italic;}if((font.FontStyle & DocumentFontStyle.Underline) == DocumentFontStyle.Underline){fontStyle |= FontStyle.Underline;}using(Font f = new Font(faceName, (float)textFontHeight * 72 / g.DpiY, fontStyle)){Rectangle rect = new Rectangle((int)textRect.X, (int)textRect.Y, (int)textRect.Width,(int)textRect.Height);using(StringFormat sf = new StringFormat()){sf.Alignment = StringAlignment.Center;sf.LineAlignment = StringAlignment.Center;sf.FormatFlags |= StringFormatFlags.NoClip | StringFormatFlags.NoWrap;g.DrawString(text, f, Brushes.Black, rect, sf);}}}
Target Platforms: Windows 7, Windows Vista SP1 or later, Windows XP SP3, Windows Server 2008 (Server Core not supported), Windows Server 2008 R2 (Server Core supported with SP1 or later), Windows Server 2003 SP2
Raster .NET | C API | C++ Class Library | JavaScript HTML5
Document .NET | C API | C++ Class Library | JavaScript HTML5
Medical .NET | C API | C++ Class Library | JavaScript HTML5
Medical Web Viewer .NET
