image2data

Der Tech-Blog

...weitermachen, wo OCR aufhört

Seite segmentieren, dann Texterkennung auf Segmente (sample_9.i2dspt)

Dez 012014
// *****************************************************
// * sample_9.i2dspt                                   *
// *                                                   *
// * - Retrieving the objects of an image              *
// * - Retrieving the structure of an image            *
// * - Performing an OCR on every found zone           *
// * - Showing the result in the i2dTextViewer         *
// *                                                   *
// * Press F9 to execute the code or F7/F8 to debug it *
// *                                                   *
// * Be sure that the sample data was installed!       *
// *                                                   *
// * Contact www.norpa.eu for more information         *
// *****************************************************
 
var
  // Declare some variables
  i: Integer;
  sText: String;
 
  oPage: TBitmap;
  oPageBak: TBitmap;
 
  aOCRObjects: Ti2dOCRObjects;
  aOCRZones: Ti2dOCRZones;
 
  rProcessSettings: Ti2dProcessSettings;
 
begin
  // Get the configured process settings. We must know the configured "in"-folder because that's
  // where the sample docs are located in!
  i2dGetProcessSettings(rProcessSettings);
 
  // Create the bitmap objects
  oPage := TBitmap.Create;
  oPageBak := TBitmap.Create;
 
  try
    // Load first page of the image into the bitmap object
    i2dLoadBitmap(rProcessSettings.InPath + 'sample_invoice_1.tif', 1, oPage);
 
    // Deskew the image
    i2dDeskewBitmap(oPage);
 
    // Get the image objects
    i2dGetBitmapObjects(oPage, 2, 2, 4, 11, 5, 5, 5, 5, 4, aOCRObjects);
 
    // Get the image structure
    i2dGetBitmapStructure(oPage, 200, 100, 10, 10, aOCRObjects, aOCRZones);
 
    // Backup the page
    oPageBak.Assign(oPage);
 
    // Perform an OCR on every image segment
    for i := 0 to Length(aOCRZones) - 1 do begin
      // Restore the page
      oPage.Assign(oPageBak);
 
      // Crop the page to the zones dimension
      i2dCropBitmap(oPage, aOCRZones[i].Left, aOCRZones[i].Top, aOCRZones[i].Width, aOCRZones[i].Height);
 
      // Perform the OCR on the cropped zone
      sText := sText + i2dOCRBitmap(oPage, 0, False);
      if (sText > '') and (sText[Length(sText)] <> #10) then
        sText := sText + #10;
    end;
 
    // And show it
    i2dShowInTextViewer(sText);
  finally
   // Destroy the bitmap objects and release memory
   oPage.Free;
   oPageBak.Free;
  end;
end.

Texterkennung eines Scanstapels getrennt nach Barcodes (sample_5.i2dspt)

Dez 012014
// ***********************************************************************************
// * sample_5.i2dspt                                                                 *
// *                                                                                 *
// * - Retrieving the text from all pages of an image using OCR                      * 
// * - Separate invoices by searching a barcode in the upper left corner of any page *
// *                                                                                 *
// * Press F9 to execute the code or F7/F8 to debug it                               *
// *                                                                                 *
// * Be sure that the sample data was installed!                                     *
// *                                                                                 *
// * Contact www.norpa.eu for more information                                       *
// ***********************************************************************************
 
var
  // Declare some variables
  i: Integer;
 
  iPageCount: Integer;
  sText: String;
 
  aBarcodes: Ti2dBarcodes;
 
  oPage: TBitmap;
  oPageCorner: TBitmap;
 
  rProcessSettings: Ti2dProcessSettings;
 
begin
  // Get the configured process settings. We must know the configured "in"-folder because that's
  // where the sample docs are located in!
  i2dGetProcessSettings(rProcessSettings);
 
  // Create the bitmap objects
  oPage := TBitmap.Create;
  oPageCorner := TBitmap.Create;
 
  try
    // Get the number of pages from the image file
    iPageCount := i2dGetImagePageCount(rProcessSettings.InPath + 'sample_all.tif');
 
    for i := 1 to iPageCount do begin
      // Load first page of the image into the bitmap object
      i2dLoadBitmap(rProcessSettings.InPath + 'sample_all.tif', i, oPage);
 
      // Crop the upper left corner. If a barcode exists on the page, it will be there. Before that,
      // copy the bitmap object to the image corner object
      oPageCorner.Assign(oPage);
      i2dCropBitmap(oPageCorner, 0, 0, 400, 1000);
 
      // Perform a barcode recognition (obcr) on the image corner object, whichs size is now only
      // 400 x 1000 pixel. The barcode value is unimportant, we just want to know if at least one
      // exists on the current page, because in this case it is a first page
      if i2dOBCRBitmap(oPageCorner, 0, aBarcodes) > 0 then
        sText := sText + #13#10'<new invoice>'#13#10;
 
      // Perform a character recognition (ocr) with format preservation on the bitmap object
      sText := sText + i2dOCRBitmap(oPage, 0, True);
      // Insert a pageend tag for optical reasons
      sText := sText + #13#10'<page end>'#13#10;
    end;
 
    // Show the result
    i2dShowInTextViewer(sText);
  finally
    // Destroy the bitmap objects and release memory
    oPage.Free;
    oPageCorner.Free;
  end;
end.

Atom

powered by Nibbleblog