image2data

Der Tech-Blog

...weitermachen, wo OCR aufhört

Textextraktion über alle Seiten (sample_4.i2dspt)

Dez 012014
// ***************************************************************************
// * sample_4.i2dspt                                                         *
// *                                                                         *
// * - Retrieving the text from all pages of a pdf using pdf text extraction *
// * - Showing the result in the i2dTextViewer                               *
// *                                                                         *
// * Press F9 to execute the code or F7/F8 to debug it                       *
// *                                                                         *
// * Be sure that the sample data was installed!                             *
// *                                                                         *
// * Contact www.norpa.eu for more information                               *
// ***************************************************************************
 
var
  // Declare some variables
  i: Integer;
 
  iPageCount: Integer;
  sText: String;
 
  rProcessSettings: Ti2dProcessSettings;
 
begin
  // Get the configured process settings. We must know the configured "in"-folder because that's
  // where the sample docs are located in!
  i2dGetProcessSettings(rProcessSettings);
 
  // Get the number of pages from the image file
  iPageCount := i2dGetPDFPageCount(rProcessSettings.InPath + 'sample_invoice_5.pdf', '');
 
  for i := 1 to iPageCount do begin
    // Perform a pdf text extraction on page number i using the extraction flag 1 (valid range: 0 to 6)
    sText := sText + i2dExtractPDFText(rProcessSettings.InPath + 'sample_invoice_5.pdf', '', i, 1)
 
    // Insert a pageend tag for optical reasons
    sText := sText + #13#10'<page end>'#13#10;
  end;
 
  // Show the result
  i2dShowInTextViewer(sText);
end.

Atom

powered by Nibbleblog