PDFRedact Structure

Summary

Redact item data.

Syntax

C++/CLI

Java

Python

[SerializableAttribute()] 
public struct PDFRedact

public class PDFRedact

public: 
   [SerializableAttribute] 
   value class PDFRedact sealed

class PDFRedact:

Remarks

PDFRedact is used with PDFFile to quickly redact an existing PDF document in place.

Example

Java

using Leadtools.WinForms; 
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Controls; 
using Leadtools.Drawing; 
using Leadtools.ImageProcessing; 
using Leadtools.Pdf; 
using Leadtools.Svg; 
 
 
private static void RedactExample() 
{ 
   const string toRedact = "LEADTOOLS"; 
 
   // Make a copy of 'leadtools.pdf' installed with LEADTOOLS 
   string imagesDir = @"C:\LEADTOOLS22\Resources\Images"; 
   string pdfFileName = Path.Combine(imagesDir, "leadtools-redacted.pdf"); 
   File.Copy(Path.Combine(imagesDir, "leadtools.pdf"), pdfFileName, true); 
 
   // We will use PDFDocument to find the position of the words to redact 
 
   // Find any text containing the word "LEADTOOLS" in the document 
   var allWords = new List<MyPDFWord>(); 
   using (var pdfDocument = new PDFDocument(pdfFileName)) 
   { 
      pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1); 
 
      // Build the words for each page from PDFDocumentPage.Objects 
      foreach (PDFDocumentPage pdfPage in pdfDocument.Pages) 
      { 
         IList<MyPDFWord> words = GetPageWords(pdfPage); 
         allWords.AddRange(words); 
      } 
   } 
 
   // Now create a PDFRedact object for each word that contains the value we want to redact 
   string toRedactLower = toRedact.ToLowerInvariant(); 
   var pdfRedacts = new List<PDFRedact>(); 
   foreach (MyPDFWord word in allWords) 
   { 
      if (word.Value.ToLowerInvariant().Contains(toRedactLower)) 
      { 
         Console.WriteLine($"Found {word.Value} at {word.Bounds} in page {word.PageNumber}"); 
         var pdfRedact = new PDFRedact(); 
         pdfRedact.PageNumber = word.PageNumber; 
         pdfRedact.Bounds = new PDFRect(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom); 
         pdfRedacts.Add(pdfRedact); 
      } 
   } 
 
   // Redact the document 
   PDFFile.Redact(pdfFileName, null, pdfRedacts); 
 
   // Finally, verify that the redact PDF does not have the redacted words anymore 
   using (var pdfDocument = new PDFDocument(pdfFileName)) 
   { 
      pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1); 
 
      // Build the words for each page from PDFDocumentPage.Objects 
      foreach (PDFDocumentPage pdfPage in pdfDocument.Pages) 
      { 
         IList<MyPDFWord> words = GetPageWords(pdfPage); 
 
         foreach (MyPDFWord word in words) 
         { 
            Debug.Assert(!word.Value.ToLowerInvariant().Contains(toRedactLower)); 
         } 
      } 
   } 
} 
 
// Class to define a word in a PDF page 
class MyPDFWord 
{ 
   // Page number 
   public int PageNumber; 
   // The value as a string 
   public string Value; 
   // Its location in the PDF coordinate 
   public LeadRectD Bounds; 
} 
 
private static IList<MyPDFWord> GetPageWords(PDFDocumentPage pdfPage) 
{ 
   var words = new List<MyPDFWord>(); 
 
   IList<PDFObject> objects = pdfPage.Objects; 
   if (objects == null || objects.Count == 0) 
      return words; 
 
   int objectIndex = 0; 
   int objectCount = objects.Count; 
 
   double pageHeight = pdfPage.Height; 
 
   // Loop through all the objects 
   while (objectIndex < objectCount) 
   { 
      // Find the total bounding rectangle, begin and end index of the next word 
      LeadRectD wordBounds = LeadRectD.Empty; 
      int firstObjectIndex = objectIndex; 
 
      // Loop till we reach EndOfWord or reach the end of the objects 
      bool more = true; 
      while (more) 
      { 
         PDFObject pdfObject = objects[objectIndex]; 
         // Is it text? 
         if (pdfObject.ObjectType == PDFObjectType.Text) 
         { 
            PDFRect pdfBounds = pdfObject.Bounds; 
 
            // objectBounds are in bottom-left coordinate, convert it to top-left 
            LeadRectD objectBounds = LeadRectD.FromLTRB(pdfObject.Bounds.Left, pageHeight - pdfObject.Bounds.Top, pdfObject.Bounds.Right, pageHeight - pdfObject.Bounds.Bottom); 
 
            // Add the bounding rectangle of this object 
            if (wordBounds.IsEmpty) 
               wordBounds = objectBounds; 
            else 
               wordBounds = LeadRectD.UnionRects(wordBounds, objectBounds); 
         } 
         else 
         { 
            firstObjectIndex = objectIndex + 1; 
         } 
 
         objectIndex++; 
         more = (objectIndex < objectCount) && !pdfObject.TextProperties.IsEndOfWord && !pdfObject.TextProperties.IsEndOfLine; 
      } 
 
      if (firstObjectIndex == objectIndex) 
      { 
         continue; 
      } 
 
      // From the begin and end index, collect the characters into a string 
      StringBuilder sb = new StringBuilder(); 
      for (int i = firstObjectIndex; i < objectIndex; i++) 
      { 
         if (objects[i].ObjectType == PDFObjectType.Text) 
            sb.Append(objects[i].Code); 
      } 
 
      // Add this word to the list 
 
      PDFObject lastObject = objects[objectIndex - 1]; 
 
      var word = new MyPDFWord(); 
      word.PageNumber = pdfPage.PageNumber; 
      word.Value = sb.ToString(); 
      word.Bounds = wordBounds; 
      words.Add(word); 
   } 
 
   return words; 
}

 
import java.io.ByteArrayInputStream; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.FileOutputStream; 
import java.io.FileReader; 
import java.io.IOException; 
import java.io.InputStream; 
import java.nio.file.Files; 
import java.nio.file.Paths; 
import java.nio.file.StandardCopyOption; 
import java.util.ArrayList; 
import java.util.Date; 
import java.util.List; 
import java.util.Scanner; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
import static org.junit.Assert.*; 
 
import leadtools.*; 
import leadtools.codecs.*; 
import leadtools.pdf.*; 
 
 
public void pdfFileRedactExample() throws IOException { 
 
   final String toRedact = "LEADTOOLS"; 
 
   // Make a copy of 'leadtools.pdf' installed with LEADTOOLS 
   final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images"; 
   String pdfFileName = combine(LEAD_VARS_IMAGES_DIR, "leadtools-redacted.pdf"); 
   Files.copy(Paths.get(combine(LEAD_VARS_IMAGES_DIR, "leadtools.pdf")), Paths.get(pdfFileName), 
         StandardCopyOption.REPLACE_EXISTING); 
 
   // We will use PDFDocument to find the position of the words to redact 
 
   // Find any text containing the word "LEADTOOLS" in the document 
   ArrayList<MyPDFWord> allWords = new ArrayList<MyPDFWord>(); 
   PDFDocument pdfDocument = new PDFDocument(pdfFileName); 
   pdfDocument.parsePages(PDFParsePagesOptions.OBJECTS.getValue(), 1, -1); 
 
   // Build the words for each page from PDFDocumentPage.Objects 
   for (PDFDocumentPage pdfPage : pdfDocument.getPages()) { 
      ArrayList<MyPDFWord> words = GetPageWords(pdfPage); 
      allWords.addAll(words); 
   } 
   pdfDocument = null; 
 
   // Now create a PDFRedact object for each word that contains the value we want 
   // to redact 
   String toRedactLower = toRedact.toLowerCase(); 
   ArrayList<PDFRedact> pdfRedacts = new ArrayList<PDFRedact>(); 
   for (MyPDFWord word : allWords) { 
      if (word.Value.toLowerCase().contains(toRedactLower)) { 
         System.out.println("Found " + word.Value + " at " + word.Bounds + " in page " + word.PageNumber); 
         PDFRedact pdfRedact = new PDFRedact(); 
         pdfRedact.setPageNumber(word.PageNumber); 
         pdfRedact.setBounds(new PDFRect(word.Bounds.getLeft(), word.Bounds.getTop(), word.Bounds.getRight(), 
               word.Bounds.getBottom())); 
         pdfRedacts.add(pdfRedact); 
      } 
   } 
 
   // Redact the document 
   PDFFile.redact(pdfFileName, null, pdfRedacts); 
 
   // Finally, verify that the redact PDF does not have the redacted words anymore 
   pdfDocument = new PDFDocument(pdfFileName); 
   pdfDocument.parsePages(PDFParsePagesOptions.OBJECTS.getValue(), 1, -1); 
 
   // Build the words for each page from PDFDocumentPage.Objects 
   for (PDFDocumentPage pdfPage : pdfDocument.getPages()) { 
      ArrayList<MyPDFWord> words = GetPageWords(pdfPage); 
 
      for (MyPDFWord word : words) { 
         assertTrue(!word.Value.toLowerCase().contains(toRedactLower)); 
      } 
   } 
   pdfDocument = null; 
} 
 
// Class to define a word in a PDF page 
class MyPDFWord { 
 
   // Page number 
   public int PageNumber; 
 
   // The value as a string 
   public String Value; 
 
   // Its location in the PDF coordinate 
   public LeadRectD Bounds; 
 
} 
 
private static ArrayList<MyPDFWord> GetPageWords(PDFDocumentPage pdfPage) { 
   ArrayList<MyPDFWord> words = new ArrayList<MyPDFWord>(); 
 
   List<PDFObject> objects = pdfPage.getObjects(); 
   if (objects == null || objects.size() == 0) 
      return words; 
 
   int objectIndex = 0; 
   int objectCount = objects.size(); 
 
   double pageHeight = pdfPage.getHeight(); 
 
   // Loop through all the objects 
   while (objectIndex < objectCount) { 
      // Find the total bounding rectangle, begin and end index of the next word 
      LeadRectD wordBounds = LeadRectD.getEmpty(); 
      int firstObjectIndex = objectIndex; 
 
      // Loop till we reach EndOfWord or reach the end of the objects 
      boolean more = true; 
      while (more) { 
         PDFObject pdfObject = objects.get(objectIndex); 
         // Is it text? 
         if (pdfObject.getObjectType() == PDFObjectType.TEXT) { 
            PDFRect pdfBounds = pdfObject.getBounds(); 
 
            // objectBounds are in bottom-left coordinate, convert it to top-left 
            LeadRectD objectBounds = LeadRectD.fromLTRB(pdfObject.getBounds().getLeft(), 
                  pageHeight - pdfObject.getBounds().getTop(), pdfObject.getBounds().getRight(), 
                  pageHeight - pdfObject.getBounds().getBottom()); 
 
            // Add the bounding rectangle of this object 
            if (wordBounds.isEmpty()) 
               wordBounds = objectBounds; 
            else 
               wordBounds = LeadRectD.unionRects(wordBounds, objectBounds); 
         } else { 
            firstObjectIndex = objectIndex + 1; 
         } 
 
         objectIndex++; 
         more = (objectIndex < objectCount) && !pdfObject.getTextProperties().isEndOfWord() 
               && !pdfObject.getTextProperties().isEndOfLine(); 
      } 
 
      if (firstObjectIndex == objectIndex) { 
         continue; 
      } 
 
      // From the begin and end index, collect the characters into a string 
      StringBuilder sb = new StringBuilder(); 
      for (int i = firstObjectIndex; i < objectIndex; i++) { 
         if (objects.get(i).getObjectType() == PDFObjectType.TEXT) 
            sb.append(objects.get(i).getCode()); 
      } 
 
      PDFFilesExamples ex = new PDFFilesExamples(); 
      MyPDFWord word = ex.new MyPDFWord(); 
      word.PageNumber = pdfPage.getPageNumber(); 
      word.Value = sb.toString(); 
      word.Bounds = wordBounds; 
      words.add(word); 
   } 
   return words; 
}

Requirements

Target Platforms

Reference

Leadtools.Pdf Namespace

Download our FREE evaluation

Help Version 23.0.2024.2.29

Leadtools.Pdf Assembly

Introduction

Getting Started

Namespaces

Leadtools.Pdf Namespace

Assemblies