Leadtools.Forms.DocumentReaders Namespace : DocumentReaderPage Class |
public class DocumentReaderPage
'Declaration Public Class DocumentReaderPage
'Usage Dim instance As DocumentReaderPage
public ref class DocumentReaderPage
The DocumentReaderPage class is used as the type of the DocumentReader.Pages collection.
The DocumentReader.Pages collection is automatically created when a new DocumentReader object is created with the DocumentReader.Create method. This collection is read-only and cannot be modified since the DocumentReader object is a read-only view of a document. Each item in the collection corresponding to a page in the document.
Each item in the Pages collection corresponds to a page in the document. So the item at index 0 contains the properties of page 1, the item at index 1 contains the properties of page 2 and so on. Even though the DocumentReaderPage class contains the PageNumber property that specifies the number of the page, this information is for convenience only, the DocumentReader creator or load method will always populate the collection in the correct order from the first page to the last page.
The DocumentReaderPage class contains the size of the page in logical units stored in the Size property, in inches. The size is read from the document directly. The page resolution is stored in the DpiX and DpiY. The physical size of the page is stored in PixelWidth and PixelHeight and the bits per pixel is stored in BitsPerPixel.
The total number of pages in the document is DocumentReader.Pages.Count.
You can get a raster image render of a page or its thumbnail by using the DocumentReader.ImageManager property.
You can get the text properties of a page such the characters, words, fonts and location information using the DocumentReader.ObjectManager property.
Public Sub DocumentReaderPageExample() Dim documentFileName As String Using dlg As New OpenFileDialog() If dlg.ShowDialog() <> System.Windows.Forms.DialogResult.OK Then Return End If documentFileName = dlg.FileName End Using ' Load the document using default options Dim reader As DocumentReader = DocumentReader.Create(documentFileName, Nothing) ' Show the document properties Dim sb As New StringBuilder() sb.AppendFormat("Reader used: {0}\n", reader.ReaderType) sb.AppendFormat("Document has {0} pages\n", reader.Pages.Count) ' Get the properties (meta data) Dim props As IDictionary(Of String, String) = reader.GetProperties() For Each prop As KeyValuePair(Of String, String) In props sb.AppendFormat("{0}: {1}\n", prop.Key, prop.Value) Next MessageBox.Show(sb.ToString()) ' Now show the pages sizes sb = New StringBuilder() For Each page As DocumentReaderPage In reader.Pages sb.AppendFormat("Page: {0} size: {1}\n", page.PageNumber, page.Size) Next MessageBox.Show(sb.ToString()) ' Now loop and show the text for each page till use cancels ' If this is a Raster document such as TIFF or JPEG, we must use an OCR engine Dim ocrEngine As IOcrEngine = Nothing If reader.ReaderType = DocumentReaderType.Raster Then ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False) ocrEngine.Startup(Nothing, Nothing, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir) End If reader.ObjectManager.BeginParse(ocrEngine) For Each page As DocumentReaderPage In reader.Pages ' Parse this page Dim pageText As DocumentPageText = reader.ObjectManager.ParsePageText(page) Dim text As String = pageText.BuildText() If MessageBox.Show(text, String.Format("Page {0} text, continue to next page?", _ page.PageNumber), MessageBoxButtons.YesNo) = _ System.Windows.Forms.DialogResult.No Then Exit For End If Next reader.ObjectManager.EndParse() If Not IsNothing(ocrEngine) Then ocrEngine.Dispose() End If reader.Dispose() End Sub
public void DocumentReaderPageExample() { string documentFileName; using(OpenFileDialog dlg = new OpenFileDialog()) { if(dlg.ShowDialog() != DialogResult.OK) { return; } documentFileName = dlg.FileName; } // Load the document using default options DocumentReader reader = DocumentReader.Create(documentFileName, null); // Show the document properties StringBuilder sb = new StringBuilder(); sb.AppendFormat("Reader used: {0}\n", reader.ReaderType); sb.AppendFormat("Document has {0} pages\n", reader.Pages.Count); // Get the properties (meta data) IDictionary<string, string> props = reader.GetProperties(); foreach(KeyValuePair<string, string> prop in props) { sb.AppendFormat("{0}: {1}\n", prop.Key, prop.Value); } MessageBox.Show(sb.ToString()); // Now show the pages sizes sb = new StringBuilder(); foreach(DocumentReaderPage page in reader.Pages) { sb.AppendFormat("Page: {0} size: {1}\n", page.PageNumber, page.Size); } MessageBox.Show(sb.ToString()); // Now loop and show the text for each page till use cancels // If this is a Raster document such as TIFF or JPEG, we must use an OCR engine IOcrEngine ocrEngine = null; if(reader.ReaderType == DocumentReaderType.Raster) { ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false); ocrEngine.Startup(null, null, null, LEAD_VARS.OcrAdvantageRuntimeDir); } reader.ObjectManager.BeginParse(ocrEngine); foreach(DocumentReaderPage page in reader.Pages) { // Parse this page DocumentPageText pageText = reader.ObjectManager.ParsePageText(page); string text = pageText.BuildText(); if(MessageBox.Show(text, string.Format("Page {0} text, continue to next page?", page.PageNumber), MessageBoxButtons.YesNo) == DialogResult.No) { break; } } reader.ObjectManager.EndParse(); if(ocrEngine != null) { ocrEngine.Dispose(); } reader.Dispose(); }
Target Platforms: Windows 7, Windows Vista SP1 or later, Windows XP SP3, Windows Server 2008 (Server Core not supported), Windows Server 2008 R2 (Server Core supported with SP1 or later), Windows Server 2003 SP2