diff --git a/bin/MindeeCliCommand.php b/bin/MindeeCliCommand.php index 3476f4e8..772f9758 100644 --- a/bin/MindeeCliCommand.php +++ b/bin/MindeeCliCommand.php @@ -5,7 +5,6 @@ namespace Mindee\Cli; use Exception; -use Mindee\Error\V1\MindeeV1HttpException; use Mindee\Input\InputSource; use Mindee\Input\PageOptions; use Mindee\Input\PathInput; @@ -13,6 +12,7 @@ use Mindee\V1\Client; use Mindee\V1\ClientOptions\PredictMethodOptions; use Mindee\V1\ClientOptions\PredictOptions; +use Mindee\V1\Error\MindeeV1HttpException; use Mindee\V1\Parsing\Common\AsyncPredictResponse; use Mindee\V1\Parsing\Common\PredictResponse; use Symfony\Component\Console\Command\Command; diff --git a/src/Image/ImageExtractor.php b/src/Image/ImageExtractor.php index 6d72ebb6..7170dc52 100644 --- a/src/Image/ImageExtractor.php +++ b/src/Image/ImageExtractor.php @@ -9,7 +9,6 @@ use ImagickException; use Mindee\Dependency\DependencyChecker; use Mindee\Error\ErrorCode; -use Mindee\Error\MindeeGeometryException; use Mindee\Error\MindeeImageException; use Mindee\Error\MindeePdfException; use Mindee\Geometry\BBox; @@ -17,7 +16,6 @@ use Mindee\Geometry\Point; use Mindee\Geometry\Polygon; use Mindee\Input\LocalInputSource; -use Mindee\V1\Parsing\Standard\BaseField; use function count; use function sprintf; @@ -128,20 +126,6 @@ public function getPageCount(): int return count($this->pageImages); } - /** - * Extract multiple images on a given page from a list of fields having position data. - * - * @param array> $fields List of Fields to extract. - * @param integer $pageIndex The page index to extract, begins at 0. - * @param null|string $outputName The base output filename, must have an image extension. - * - * @return array a list of extracted images - */ - public function extractImagesFromPage(array $fields, int $pageIndex, ?string $outputName = null): array - { - $outputName ??= $this->filename; - return $this->extractFromPage($fields, $pageIndex, $outputName); - } /** * Extracts images from a page. @@ -212,47 +196,6 @@ public function extractPolygonFromPage( return new ExtractedImage($extractedImageData, $filename, $format, $pageIndex, $index); } - /** - * Extracts a single image from a Position field. - * - * @param BaseField $field The field to extract. - * @param integer $pageIndex The page index to extract, begins at 0. - * @param integer $index The index to use for naming the extracted image. - * @param string $filename The output filename. - * @param string $format The output format. - * - * @return null|ExtractedImage The extracted image, or null if the field does not have valid position data. - * - * @throws MindeeGeometryException Throws if a field does not contain positional data. - */ - public function extractImage( - BaseField $field, - int $pageIndex, - int $index, - string $filename, - string $format - ): ?ExtractedImage { - $polygon = null; - - if (!empty($field->polygon)) { - $polygon = $field->polygon; - } elseif (!empty($field->boundingBox)) { - $polygon = $field->boundingBox; - } elseif (!empty($field->quadrangle)) { - $polygon = $field->quadrangle; - } elseif (!empty($field->rectangle)) { - $polygon = $field->rectangle; - } - - if (null === $polygon) { - throw new MindeeGeometryException( - 'Provided field has no valid position data.', - ErrorCode::GEOMETRIC_OPERATION_FAILED - ); - } - - return $this->extractPolygonFromPage($polygon, $pageIndex, $index, $filename, $format); - } /** * Getter for the local input source. @@ -262,33 +205,6 @@ public function getInputSource(): LocalInputSource return $this->inputSource; } - /** - * Extracts images from a page. - * - * @param array> $fields List of Fields to extract. - * @param integer $pageIndex The page index to extract, begins at 0. - * @param string $outputName Name of the created file. - * @param string $format The output format. - * - * @return array An array of created images - */ - protected function extractFromPage(array $fields, int $pageIndex, string $outputName, string $format = 'jpg'): array - { - $extractedImages = []; - - $i = 0; - foreach ($fields as $field) { - $filename = sprintf('%s_page%d-%d.%s', $outputName, $pageIndex, $i, $format); - $extractedImage = $this->extractImage($field, $pageIndex, $i, $filename, $format); - if (null !== $extractedImage) { - $extractedImages[] = $extractedImage; - } - ++$i; - } - - return $extractedImages; - } - /** * Extracts an image from a set of coordinates. * diff --git a/src/Pdf/PdfExtractor.php b/src/Pdf/PdfExtractor.php index d819bda9..7c8991a2 100644 --- a/src/Pdf/PdfExtractor.php +++ b/src/Pdf/PdfExtractor.php @@ -10,7 +10,6 @@ use Mindee\Dependency\DependencyChecker; use Mindee\Error\MindeePdfException; use Mindee\Input\LocalInputSource; -use Mindee\V1\Product\InvoiceSplitter\InvoiceSplitterV1InvoicePageGroups; use setasign\Fpdi\Fpdi; use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException; use setasign\Fpdi\PdfParser\Filter\FilterException; @@ -18,7 +17,6 @@ use setasign\Fpdi\PdfReader\PdfReaderException; use function count; -use function is_array; use function sprintf; /** @@ -39,8 +37,8 @@ class PdfExtractor /** * @param LocalInputSource $localInput Local Input, accepts all compatible formats. * - * @throws MindeePdfException Throws if PDF operations aren't supported, or if the file - * can't be read, respectively. + * @throws MindeePdfException|ImagickException Throws if PDF operations aren't supported, or if the file + * can't be read, respectively. */ public function __construct(LocalInputSource $localInput) { @@ -86,14 +84,14 @@ public function getPageCount(): int /** * Extracts sub-documents from the source document using list of page indexes. * - * @param array>|InvoiceSplitterV1InvoicePageGroups $pageIndexes List of sub-lists of pages to keep. + * @param array> $pageIndexes List of sub-lists of pages to keep. * * @return ExtractedPdf[] list of extracted documents * * @throws MindeePdfException Throws if FDPF/FPDI wasn't able to handle the pdf during the extraction. * @throws InvalidArgumentException Throws if invalid indexes are provided. */ - public function extractSubDocuments(array|InvoiceSplitterV1InvoicePageGroups $pageIndexes): array + public function extractSubDocuments(array $pageIndexes): array { $extractedPdfs = []; @@ -141,58 +139,17 @@ public function extractSubDocuments(array|InvoiceSplitterV1InvoicePageGroups $pa /** * Extracts invoices as complete PDFs from the document. * - * @param array>|InvoiceSplitterV1InvoicePageGroups $pageIndexes List of sub-lists of pages to keep. + * @param array> $pageIndexes List of sub-lists of pages to keep. * @param boolean $strict Whether to trust confidence scores or not. * * @return ExtractedPdf[] a list of extracted invoices */ - public function extractInvoices(array|InvoiceSplitterV1InvoicePageGroups $pageIndexes, bool $strict = false): array + public function extractInvoices(array $pageIndexes, bool $strict = false): array { if (empty($pageIndexes)) { return []; } - if (!$strict) { - $indexes = array_map(static fn($invoicePageIndexes) => $invoicePageIndexes->pageIndexes, (array) $pageIndexes); - - return $this->extractSubDocuments($indexes); - } - if (is_array($pageIndexes[0])) { - return $this->extractSubDocuments($pageIndexes); - } - - $correctPageIndexes = []; - $currentList = []; - $previousConfidence = null; - - $i = 0; - foreach ($pageIndexes as $pageIndex) { - $confidence = $pageIndex->confidence; - $pageList = $pageIndex->pageIndexes; - - if ($confidence >= 0.5 && null === $previousConfidence) { - $currentList = $pageList; - } elseif ($confidence >= 0.5 && $i !== count($pageIndexes) - 1) { - if (!empty($currentList)) { - $correctPageIndexes[] = $currentList; - } - $currentList = $pageList; - } elseif ($confidence < 0.5 && $i === count($pageIndexes) - 1) { - $currentList = array_merge($currentList, $pageList); - if (!empty($currentList)) { - $correctPageIndexes[] = $currentList; - } - } else { - if (!empty($currentList)) { - $correctPageIndexes[] = $currentList; - } - $correctPageIndexes[] = $pageList; - } - - $previousConfidence = $confidence; - ++$i; - } - - return $this->extractSubDocuments($correctPageIndexes); + return $this->extractSubDocuments($pageIndexes); } /** diff --git a/src/V1/Client.php b/src/V1/Client.php index 21b8a02a..a5fdc433 100644 --- a/src/V1/Client.php +++ b/src/V1/Client.php @@ -16,14 +16,14 @@ use Mindee\Error\ErrorCode; use Mindee\Error\MindeeApiException; use Mindee\Error\MindeeException; -use Mindee\Error\V1\MindeeV1ClientException; -use Mindee\Error\V1\MindeeV1HttpException; use Mindee\Input\InputSource; use Mindee\Input\LocalInputSource; use Mindee\Input\LocalResponse; use Mindee\Input\PageOptions; use Mindee\V1\ClientOptions\PredictMethodOptions; use Mindee\V1\ClientOptions\WorkflowOptions; +use Mindee\V1\Error\MindeeV1ClientException; +use Mindee\V1\Error\MindeeV1HttpException; use Mindee\V1\Http\Endpoint; use Mindee\V1\Http\MindeeApi; use Mindee\V1\Http\MindeeWorkflowApi; diff --git a/src/Error/V1/MindeeV1ClientException.php b/src/V1/Error/MindeeV1ClientException.php similarity index 89% rename from src/Error/V1/MindeeV1ClientException.php rename to src/V1/Error/MindeeV1ClientException.php index 2eba70f6..febb5c02 100644 --- a/src/Error/V1/MindeeV1ClientException.php +++ b/src/V1/Error/MindeeV1ClientException.php @@ -7,7 +7,7 @@ * Mindee Client Exceptions. */ -namespace Mindee\Error\V1; +namespace Mindee\V1\Error; use Mindee\Error\MindeeException; diff --git a/src/Error/V1/MindeeV1HttpException.php b/src/V1/Error/MindeeV1HttpException.php similarity index 99% rename from src/Error/V1/MindeeV1HttpException.php rename to src/V1/Error/MindeeV1HttpException.php index 383511ca..448800f1 100644 --- a/src/Error/V1/MindeeV1HttpException.php +++ b/src/V1/Error/MindeeV1HttpException.php @@ -7,7 +7,7 @@ * Mindee HTTP Exceptions. */ -namespace Mindee\Error\V1; +namespace Mindee\V1\Error; use Mindee\Error\ErrorCode; use Mindee\Error\MindeeException; diff --git a/src/V1/Image/ImageExtractor.php b/src/V1/Image/ImageExtractor.php index aa8a88d7..9a71af84 100644 --- a/src/V1/Image/ImageExtractor.php +++ b/src/V1/Image/ImageExtractor.php @@ -4,9 +4,102 @@ namespace Mindee\V1\Image; +use Mindee\Error\ErrorCode; +use Mindee\Error\MindeeGeometryException; +use Mindee\Geometry\Polygon; +use Mindee\Image\ExtractedImage; use Mindee\Image\ImageExtractor as BaseImageExtractor; +use Mindee\V1\Parsing\Standard\BaseField; + +use function sprintf; /** * Wrapper class for V1 of the BaseImageExtractor. */ -class ImageExtractor extends BaseImageExtractor {} +class ImageExtractor extends BaseImageExtractor +{ + /** + * Extract multiple images on a given page from a list of fields having position data. + * + * @param array> $fields List of Fields to extract. + * @param integer $pageIndex The page index to extract, begins at 0. + * @param null|string $outputName The base output filename, must have an image extension. + * + * @return array a list of extracted images + */ + public function extractImagesFromPage(array $fields, int $pageIndex, ?string $outputName = null): array + { + $outputName ??= $this->filename; + return $this->extractFromPage($fields, $pageIndex, $outputName); + } + + /** + * Extracts a single image from a Position field. + * + * @param BaseField $field The field to extract. + * @param integer $pageIndex The page index to extract, begins at 0. + * @param integer $index The index to use for naming the extracted image. + * @param string $filename The output filename. + * @param string $format The output format. + * + * @return null|ExtractedImage The extracted image, or null if the field does not have valid position data. + * + * @throws MindeeGeometryException Throws if a field does not contain positional data. + */ + public function extractImage( + BaseField $field, + int $pageIndex, + int $index, + string $filename, + string $format + ): ?ExtractedImage { + $polygon = null; + + if (!empty($field->polygon)) { + $polygon = $field->polygon; + } elseif (!empty($field->boundingBox)) { + $polygon = $field->boundingBox; + } elseif (!empty($field->quadrangle)) { + $polygon = $field->quadrangle; + } elseif (!empty($field->rectangle)) { + $polygon = $field->rectangle; + } + + if (null === $polygon) { + throw new MindeeGeometryException( + 'Provided field has no valid position data.', + ErrorCode::GEOMETRIC_OPERATION_FAILED + ); + } + + return $this->extractPolygonFromPage($polygon, $pageIndex, $index, $filename, $format); + } + + /** + * Extracts images from a page. + * + * @param array> $fields List of Fields to extract. + * @param integer $pageIndex The page index to extract, begins at 0. + * @param string $outputName Name of the created file. + * @param string $format The output format. + * + * @return array An array of created images + */ + protected function extractFromPage(array $fields, int $pageIndex, string $outputName, string $format = 'jpg'): array + { + $extractedImages = []; + + $i = 0; + foreach ($fields as $field) { + $filename = sprintf('%s_page%d-%d.%s', $outputName, $pageIndex, $i, $format); + $extractedImage = $this->extractImage($field, $pageIndex, $i, $filename, $format); + if (null !== $extractedImage) { + $extractedImages[] = $extractedImage; + } + ++$i; + } + + return $extractedImages; + } + +} diff --git a/src/V1/Pdf/PdfExtractor.php b/src/V1/Pdf/PdfExtractor.php new file mode 100644 index 00000000..ecdc9df9 --- /dev/null +++ b/src/V1/Pdf/PdfExtractor.php @@ -0,0 +1,95 @@ +>|InvoiceSplitterV1InvoicePageGroups $pageIndexes List of sub-lists of pages to keep. + * + * @return ExtractedPdf[] list of extracted documents + * + * @throws MindeePdfException Throws if FDPF/FPDI wasn't able to handle the pdf during the extraction. + * @throws InvalidArgumentException Throws if invalid indexes are provided. + */ + public function extractSubDocuments(array|InvoiceSplitterV1InvoicePageGroups $pageIndexes): array + { + if (is_array($pageIndexes[0])) { + $indexes = $pageIndexes; + } else { + $indexes = array_map(static fn($pageIndex) => $pageIndex->pageIndexes, (array) $pageIndexes); + } + return parent::extractSubDocuments($indexes); + } + + + /** + * Extracts invoices as complete PDFs from the document. + * + * @param array>|InvoiceSplitterV1InvoicePageGroups $pageIndexes List of sub-lists of pages to keep. + * @param boolean $strict Whether to trust confidence scores or not. + * + * @return ExtractedPdf[] a list of extracted invoices + */ + public function extractInvoices(array|InvoiceSplitterV1InvoicePageGroups $pageIndexes, bool $strict = false): array + { + if (empty($pageIndexes)) { + return []; + } + if (!$strict) { + $indexes = array_map(static fn($invoicePageIndexes) => $invoicePageIndexes->pageIndexes, (array) $pageIndexes); + + return $this->extractSubDocuments($indexes); + } + if (is_array($pageIndexes[0])) { + return parent::extractInvoices($pageIndexes, $strict); + } + + $correctPageIndexes = []; + $currentList = []; + $previousConfidence = null; + + $i = 0; + foreach ($pageIndexes as $pageIndex) { + $confidence = $pageIndex->confidence; + $pageList = $pageIndex->pageIndexes; + + if ($confidence >= 0.5 && null === $previousConfidence) { + $currentList = $pageList; + } elseif ($confidence >= 0.5 && $i !== count($pageIndexes) - 1) { + if (!empty($currentList)) { + $correctPageIndexes[] = $currentList; + } + $currentList = $pageList; + } elseif ($confidence < 0.5 && $i === count($pageIndexes) - 1) { + $currentList = array_merge($currentList, $pageList); + if (!empty($currentList)) { + $correctPageIndexes[] = $currentList; + } + } else { + if (!empty($currentList)) { + $correctPageIndexes[] = $currentList; + } + $correctPageIndexes[] = $pageList; + } + + $previousConfidence = $confidence; + ++$i; + } + + return $this->extractSubDocuments($correctPageIndexes); + } +} diff --git a/src/Error/V2/MindeeV2HttpException.php b/src/V2/Error/MindeeV2HttpException.php similarity index 97% rename from src/Error/V2/MindeeV2HttpException.php rename to src/V2/Error/MindeeV2HttpException.php index 21be8d06..3fe9c708 100644 --- a/src/Error/V2/MindeeV2HttpException.php +++ b/src/V2/Error/MindeeV2HttpException.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace Mindee\Error\V2; +namespace Mindee\V2\Error; use Mindee\Error\MindeeException; use Mindee\V2\Parsing\Error\ErrorItem; diff --git a/src/Error/V2/MindeeV2HttpUnknownException.php b/src/V2/Error/MindeeV2HttpUnknownException.php similarity index 96% rename from src/Error/V2/MindeeV2HttpUnknownException.php rename to src/V2/Error/MindeeV2HttpUnknownException.php index 66bc0c84..8ed4ea46 100644 --- a/src/Error/V2/MindeeV2HttpUnknownException.php +++ b/src/V2/Error/MindeeV2HttpUnknownException.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace Mindee\Error\V2; +namespace Mindee\V2\Error; use Mindee\V2\Parsing\Error\ErrorResponse; diff --git a/src/V2/Http/MindeeApiV2.php b/src/V2/Http/MindeeApiV2.php index 67c24af5..b1efefc4 100644 --- a/src/V2/Http/MindeeApiV2.php +++ b/src/V2/Http/MindeeApiV2.php @@ -13,12 +13,12 @@ use Mindee\Error\ErrorCode; use Mindee\Error\MindeeApiException; use Mindee\Error\MindeeException; -use Mindee\Error\V2\MindeeV2HttpException; -use Mindee\Error\V2\MindeeV2HttpUnknownException; use Mindee\Input\InputSource; use Mindee\Input\LocalInputSource; use Mindee\Input\UrlInputSource; use Mindee\V2\ClientOptions\BaseParameters; +use Mindee\V2\Error\MindeeV2HttpException; +use Mindee\V2\Error\MindeeV2HttpUnknownException; use Mindee\V2\Parsing\Error\ErrorResponse; use Mindee\V2\Parsing\Inference\BaseResponse; use Mindee\V2\Parsing\Job\JobResponse; @@ -30,7 +30,6 @@ use function call_user_func; use function dirname; -use const Mindee\V1\Http\API_KEY_ENV_NAME; use const Mindee\VERSION; // phpcs:disable diff --git a/tests/ClientTest.php b/tests/ClientTest.php index 262dd542..01910820 100644 --- a/tests/ClientTest.php +++ b/tests/ClientTest.php @@ -5,13 +5,13 @@ use Mindee\ClientOptions\PollingOptions; use Mindee\Error\MindeeApiException; use Mindee\Error\MindeeMimeTypeException; -use Mindee\Error\V1\MindeeV1HttpException; use Mindee\Input\LocalResponse; use Mindee\Input\PageOptions; use Mindee\Input\PathInput; use Mindee\Input\UrlInputSource; use Mindee\V1\Client; use Mindee\V1\ClientOptions\PredictMethodOptions; +use Mindee\V1\Error\MindeeV1HttpException; use Mindee\V1\Product\Generated\GeneratedV1; use Mindee\V1\Product\Invoice\InvoiceV4; use Mindee\V1\Product\InvoiceSplitter\InvoiceSplitterV1; diff --git a/tests/V1/Error/MindeeHttpExceptionTest.php b/tests/V1/Error/MindeeHttpExceptionTest.php index e5db2bc3..de85d894 100644 --- a/tests/V1/Error/MindeeHttpExceptionTest.php +++ b/tests/V1/Error/MindeeHttpExceptionTest.php @@ -4,9 +4,9 @@ namespace V1\Error; -use Mindee\Error\V1\MindeeV1HttpException; use Mindee\Input\PathInput; use Mindee\V1\Client; +use Mindee\V1\Error\MindeeV1HttpException; use Mindee\V1\Product\Invoice\InvoiceV4; use PHPUnit\Framework\TestCase; use TestingUtilities; diff --git a/tests/V1/Image/InvoiceSplitterAutoExtractionTestFunctional.php b/tests/V1/Image/InvoiceSplitterAutoExtractionTestFunctional.php index 6d52da49..dcc932ff 100644 --- a/tests/V1/Image/InvoiceSplitterAutoExtractionTestFunctional.php +++ b/tests/V1/Image/InvoiceSplitterAutoExtractionTestFunctional.php @@ -5,7 +5,7 @@ namespace V1\Image; use Mindee\Input\PathInput; -use Mindee\Pdf\PdfExtractor; +use Mindee\V1\Pdf\PdfExtractor; use Mindee\V1\Client; use Mindee\V1\Parsing\Common\Document; use Mindee\V1\Product\Invoice\InvoiceV4; diff --git a/tests/V1/Pdf/PdfExtractorTest.php b/tests/V1/Pdf/PdfExtractorTest.php index f9858b16..653b21d8 100644 --- a/tests/V1/Pdf/PdfExtractorTest.php +++ b/tests/V1/Pdf/PdfExtractorTest.php @@ -6,7 +6,7 @@ use Mindee\Input\LocalResponse; use Mindee\Input\PathInput; -use Mindee\Pdf\PdfExtractor; +use Mindee\V1\Pdf\PdfExtractor; use Mindee\V1\Client; use Mindee\V1\Product\InvoiceSplitter\InvoiceSplitterV1; use PHPUnit\Framework\TestCase; diff --git a/tests/V2/ClientV2TestFunctional.php b/tests/V2/ClientV2TestFunctional.php index 578ea599..ea23eb7d 100644 --- a/tests/V2/ClientV2TestFunctional.php +++ b/tests/V2/ClientV2TestFunctional.php @@ -4,10 +4,10 @@ namespace V2; -use Mindee\Error\V2\MindeeV2HttpException; use Mindee\Input\PathInput; use Mindee\Input\UrlInputSource; use Mindee\V2\Client; +use Mindee\V2\Error\MindeeV2HttpException; use Mindee\V2\Product\Extraction\ExtractionResponse; use Mindee\V2\Product\Extraction\Params\ExtractionParameters; use PHPUnit\Framework\TestCase;