DocumentAnalyzer Class

Summary

Class that organizes the operations to load and execute document analysis steps.

Syntax

C++/CLI

Python

public class DocumentAnalyzer

public: 
   ref class DocumentAnalyzer

class DocumentAnalyzer:

Example

This example shows how to load and run document analysis.

Java

using Leadtools; 
using Leadtools.Document.Analytics; 
using Leadtools.Document; 
using Leadtools.Document.Unstructured; 
using Leadtools.Document.Data; 
 
using Leadtools.Ocr; 
 
public void Sample() 
{ 
   // Initialize the OCR Engine 
   using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD)) 
   { 
      // Load the document to be analyzed 
      using (LEADDocument document = DocumentFactory.LoadFromFile(@"C:\Desktop\test.docx", new LoadDocumentOptions())) 
      { 
         document.Text.OcrEngine = ocrEngine; 
 
         // Initialize the Document Analyzer 
         var analyzer = new DocumentAnalyzer() 
         { 
            Reader = new UnstructuredDataReader(), 
            QueryContext = new FileRepositoryContext(@"C:\Desktop\test.json") 
         }; 
 
         // Initialize the Document Options 
         var options = new DocumentAnalyzerRunOptions() 
         { 
            ElementQuery = new RepositoryQuery() 
         }; 
 
         // Create the list of results 
         List<ElementSetResult> results = analyzer.Run(document, options); 
 
         string resultsMessage = string.Empty; 
 
         // Parse the results and output text to console 
         foreach (ElementSetResult result in results) 
            foreach (ElementResult item in result.Items) 
               Console.Write($"{(item.Value)} "); 
      } 
   } 
}

 
import static org.junit.Assert.assertTrue; 
 
import java.io.File; 
import java.io.IOException; 
import java.util.List; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
 
import leadtools.document.DocumentFactory; 
import leadtools.document.analytics.*; 
import leadtools.document.data.FileRepositoryContext; 
import leadtools.document.data.RepositoryQuery; 
import leadtools.document.unstructured.UnstructuredDataReader; 
import leadtools.document.*; 
import leadtools.ocr.OcrEngine; 
import leadtools.ocr.OcrEngineManager; 
import leadtools.ocr.OcrEngineType; 
 
 
public void sample() { 
   // Initialize the OCR Engine 
   OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD); 
 
   // Load the document to be analyzed 
   LEADDocument document = DocumentFactory.loadFromFile( 
         "C:\\LEADTOOLS23\\Resources\\Images\\Forms\\Unstructured\\ClientInfoSheet.docx", new LoadDocumentOptions()); 
   document.getText().setOcrEngine(ocrEngine); 
 
   // Initialize the Document Analyzer 
   DocumentAnalyzer analyzer = new DocumentAnalyzer(); 
   analyzer.getReaders().add(new UnstructuredDataReader()); 
   analyzer.setQueryContext( 
         new FileRepositoryContext("C:\\LEADTOOLS23\\Resources\\Images\\Forms\\Unstructured\\ClientInfoSheet.json")); 
 
   // Initialize the Document Options 
   DocumentAnalyzerRunOptions options = new DocumentAnalyzerRunOptions(); 
   options.setElementQuery(new RepositoryQuery()); 
 
   List<ElementSetResult> results = analyzer.run(document, options); 
   // Create the list of results 
   for (ElementSetResult result : results) { 
      for (ElementResult item : result.getItems()) { 
         System.out.println(item); 
      } 
   } 
 
   // Parse the results and output text to console 
   int i = 0; 
   for (ElementSetResult result : results) { 
      i++; 
      for (ElementResult item : result.getItems()) { 
         System.out.println(item.getValue()); 
      } 
   } 
 
   System.out.println(results.size()); 
 
   assertTrue(i == results.size()); 
   System.out.println("All items have been accounted for"); 
}

Requirements

Target Platforms