Compare commits

...

3 Commits

Author SHA1 Message Date
Jan Böhmer
e36119e4ce Started working on price import 2025-03-30 16:36:20 +02:00
Jan Böhmer
3626570a0b Use panther to retrieve more data from Aliexpress 2025-03-25 23:14:58 +01:00
Jan Böhmer
2fdd837354 Started working on an aliexpress provider 2025-03-25 21:55:54 +01:00
7 changed files with 502 additions and 69 deletions

View File

@@ -4,6 +4,8 @@ APP_SECRET='$ecretf0rt3st'
SYMFONY_DEPRECATIONS_HELPER=999999
PANTHER_APP_ENV=panther
PANTHER_ERROR_SCREENSHOT_DIR=./var/error-screenshots
PANTHER_APP_ENV=panther
PANTHER_ERROR_SCREENSHOT_DIR=./var/error-screenshots
DATABASE_URL="sqlite:///%kernel.project_dir%/var/app_test.db"
# Doctrine automatically adds an _test suffix to database name in test env

2
.gitignore vendored
View File

@@ -8,6 +8,8 @@
/vendor/
###< symfony/framework-bundle ###
drivers/
###> symfony/phpunit-bridge ###
.phpunit
.phpunit.result.cache

View File

@@ -17,6 +17,7 @@
"brick/math": "0.12.1 as 0.11.0",
"composer/ca-bundle": "^1.3",
"composer/package-versions-deprecated": "^1.11.99.5",
"dbrekelmans/bdi": "^1.4",
"doctrine/data-fixtures": "^2.0.0",
"doctrine/dbal": "^4.0.0",
"doctrine/doctrine-bundle": "^2.0",
@@ -65,6 +66,7 @@
"symfony/http-kernel": "6.4.*",
"symfony/mailer": "6.4.*",
"symfony/monolog-bundle": "^3.1",
"symfony/panther": "^2.2",
"symfony/polyfill-php82": "^1.28",
"symfony/process": "6.4.*",
"symfony/property-access": "6.4.*",

342
composer.lock generated
View File

@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "75643d42e05fce4684644d375bff2d0a",
"content-hash": "d894170eb8b24ff5376bf32a2fa71204",
"packages": [
{
"name": "amphp/amp",
@@ -1569,6 +1569,55 @@
},
"time": "2024-04-12T12:12:48+00:00"
},
{
"name": "dbrekelmans/bdi",
"version": "1.4.0",
"source": {
"type": "git",
"url": "https://github.com/dbrekelmans/bdi.git",
"reference": "fa2ff9b5ed0508ddf5cd574f9dfa6fea954a9acd"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/dbrekelmans/bdi/zipball/fa2ff9b5ed0508ddf5cd574f9dfa6fea954a9acd",
"reference": "fa2ff9b5ed0508ddf5cd574f9dfa6fea954a9acd",
"shasum": ""
},
"require": {
"ext-json": "*",
"ext-zip": "*",
"ext-zlib": "*",
"php": "^8.1"
},
"bin": [
"bdi",
"bdi.phar"
],
"type": "library",
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Daniël Brekelmans",
"homepage": "https://github.com/dbrekelmans"
},
{
"name": "Contributors",
"homepage": "https://github.com/dbrekelmans/bdi/graphs/contributors"
}
],
"description": "PHAR distribution of dbrekelmans/browser-driver-installer.",
"homepage": "https://github.com/dbrekelmans/bdi",
"keywords": [
"browser-driver-installer"
],
"support": {
"source": "https://github.com/dbrekelmans/bdi/tree/1.4.0"
},
"time": "2024-12-12T18:36:47+00:00"
},
{
"name": "doctrine/collections",
"version": "2.3.0",
@@ -6269,6 +6318,72 @@
},
"time": "2024-03-15T13:55:21+00:00"
},
{
"name": "php-webdriver/webdriver",
"version": "1.15.2",
"source": {
"type": "git",
"url": "https://github.com/php-webdriver/php-webdriver.git",
"reference": "998e499b786805568deaf8cbf06f4044f05d91bf"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-webdriver/php-webdriver/zipball/998e499b786805568deaf8cbf06f4044f05d91bf",
"reference": "998e499b786805568deaf8cbf06f4044f05d91bf",
"shasum": ""
},
"require": {
"ext-curl": "*",
"ext-json": "*",
"ext-zip": "*",
"php": "^7.3 || ^8.0",
"symfony/polyfill-mbstring": "^1.12",
"symfony/process": "^5.0 || ^6.0 || ^7.0"
},
"replace": {
"facebook/webdriver": "*"
},
"require-dev": {
"ergebnis/composer-normalize": "^2.20.0",
"ondram/ci-detector": "^4.0",
"php-coveralls/php-coveralls": "^2.4",
"php-mock/php-mock-phpunit": "^2.0",
"php-parallel-lint/php-parallel-lint": "^1.2",
"phpunit/phpunit": "^9.3",
"squizlabs/php_codesniffer": "^3.5",
"symfony/var-dumper": "^5.0 || ^6.0 || ^7.0"
},
"suggest": {
"ext-SimpleXML": "For Firefox profile creation"
},
"type": "library",
"autoload": {
"files": [
"lib/Exception/TimeoutException.php"
],
"psr-4": {
"Facebook\\WebDriver\\": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"description": "A PHP client for Selenium WebDriver. Previously facebook/webdriver.",
"homepage": "https://github.com/php-webdriver/php-webdriver",
"keywords": [
"Chromedriver",
"geckodriver",
"php",
"selenium",
"webdriver"
],
"support": {
"issues": "https://github.com/php-webdriver/php-webdriver/issues",
"source": "https://github.com/php-webdriver/php-webdriver/tree/1.15.2"
},
"time": "2024-11-21T15:12:59+00:00"
},
{
"name": "phpdocumentor/reflection-common",
"version": "2.2.0",
@@ -8157,6 +8272,74 @@
],
"time": "2024-10-25T15:07:50+00:00"
},
{
"name": "symfony/browser-kit",
"version": "v6.4.19",
"source": {
"type": "git",
"url": "https://github.com/symfony/browser-kit.git",
"reference": "ce95f3e3239159e7fa3be7690c6ce95a4714637f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/browser-kit/zipball/ce95f3e3239159e7fa3be7690c6ce95a4714637f",
"reference": "ce95f3e3239159e7fa3be7690c6ce95a4714637f",
"shasum": ""
},
"require": {
"php": ">=8.1",
"symfony/dom-crawler": "^5.4|^6.0|^7.0"
},
"require-dev": {
"symfony/css-selector": "^5.4|^6.0|^7.0",
"symfony/http-client": "^5.4|^6.0|^7.0",
"symfony/mime": "^5.4|^6.0|^7.0",
"symfony/process": "^5.4|^6.0|^7.0"
},
"type": "library",
"autoload": {
"psr-4": {
"Symfony\\Component\\BrowserKit\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Simulates the behavior of a web browser, allowing you to make requests, click on links and submit forms programmatically",
"homepage": "https://symfony.com",
"support": {
"source": "https://github.com/symfony/browser-kit/tree/v6.4.19"
},
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2025-02-14T11:23:16+00:00"
},
{
"name": "symfony/cache",
"version": "v6.4.19",
@@ -10610,6 +10793,95 @@
],
"time": "2024-11-20T10:57:02+00:00"
},
{
"name": "symfony/panther",
"version": "v2.2.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/panther.git",
"reference": "b7e0f834c9046918972edb3dde2ecc4a20f6155e"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/panther/zipball/b7e0f834c9046918972edb3dde2ecc4a20f6155e",
"reference": "b7e0f834c9046918972edb3dde2ecc4a20f6155e",
"shasum": ""
},
"require": {
"ext-dom": "*",
"ext-libxml": "*",
"php": ">=8.0",
"php-webdriver/webdriver": "^1.8.2",
"symfony/browser-kit": "^5.4 || ^6.4 || ^7.0",
"symfony/dependency-injection": "^5.4 || ^6.4 || ^7.0",
"symfony/deprecation-contracts": "^2.4 || ^3",
"symfony/dom-crawler": "^5.4 || ^6.4 || ^7.0",
"symfony/http-client": "^6.4 || ^7.0",
"symfony/http-kernel": "^5.4 || ^6.4 || ^7.0",
"symfony/process": "^5.4 || ^6.4 || ^7.0"
},
"require-dev": {
"symfony/css-selector": "^5.4 || ^6.4 || ^7.0",
"symfony/framework-bundle": "^5.4 || ^6.4 || ^7.0",
"symfony/mime": "^5.4 || ^6.4 || ^7.0",
"symfony/phpunit-bridge": "^7.2.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "2.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\Panther\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Kévin Dunglas",
"email": "dunglas@gmail.com",
"homepage": "https://dunglas.fr"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "A browser testing and web scraping library for PHP and Symfony.",
"homepage": "https://dunglas.fr",
"keywords": [
"e2e",
"scraping",
"selenium",
"symfony",
"testing",
"webdriver"
],
"support": {
"issues": "https://github.com/symfony/panther/issues",
"source": "https://github.com/symfony/panther/tree/v2.2.0"
},
"funding": [
{
"url": "https://www.panthera.org/donate",
"type": "custom"
},
{
"url": "https://github.com/dunglas",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/panther",
"type": "tidelift"
}
],
"time": "2025-01-30T13:11:55+00:00"
},
{
"name": "symfony/password-hasher",
"version": "v6.4.13",
@@ -18332,74 +18604,6 @@
],
"time": "2020-09-28T06:39:44+00:00"
},
{
"name": "symfony/browser-kit",
"version": "v6.4.19",
"source": {
"type": "git",
"url": "https://github.com/symfony/browser-kit.git",
"reference": "ce95f3e3239159e7fa3be7690c6ce95a4714637f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/browser-kit/zipball/ce95f3e3239159e7fa3be7690c6ce95a4714637f",
"reference": "ce95f3e3239159e7fa3be7690c6ce95a4714637f",
"shasum": ""
},
"require": {
"php": ">=8.1",
"symfony/dom-crawler": "^5.4|^6.0|^7.0"
},
"require-dev": {
"symfony/css-selector": "^5.4|^6.0|^7.0",
"symfony/http-client": "^5.4|^6.0|^7.0",
"symfony/mime": "^5.4|^6.0|^7.0",
"symfony/process": "^5.4|^6.0|^7.0"
},
"type": "library",
"autoload": {
"psr-4": {
"Symfony\\Component\\BrowserKit\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Simulates the behavior of a web browser, allowing you to make requests, click on links and submit forms programmatically",
"homepage": "https://symfony.com",
"support": {
"source": "https://github.com/symfony/browser-kit/tree/v6.4.19"
},
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2025-02-14T11:23:16+00:00"
},
{
"name": "symfony/debug-bundle",
"version": "v6.4.13",

View File

@@ -28,6 +28,7 @@
</testsuite>
</testsuites>
<extensions>
<extension class="Symfony\Component\Panther\ServerExtension" />
<extension class="DAMA\DoctrineTestBundle\PHPUnit\PHPUnitExtension"/>
</extensions>
<listeners>

View File

@@ -0,0 +1,213 @@
<?php
/*
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
*
* Copyright (C) 2019 - 2025 Jan Böhmer (https://github.com/jbtronics)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace App\Services\InfoProviderSystem\Providers;
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
use App\Services\InfoProviderSystem\DTOs\PriceDTO;
use App\Services\InfoProviderSystem\DTOs\PurchaseInfoDTO;
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\WebDriverDimension;
use Symfony\Component\DependencyInjection\Attribute\Autowire;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Panther\Client;
use Symfony\Component\Panther\DomCrawler\Link;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class AliexpressProvider implements InfoProviderInterface
{
private readonly string $chromiumDriverPath;
public function __construct(private readonly HttpClientInterface $client,
#[Autowire('%kernel.project_dir%')]
private readonly string $projectDir)
{
$this->chromiumDriverPath = $this->projectDir . '/drivers/chromedriver.exe';
}
public function getProviderInfo(): array
{
return [
'name' => 'Aliexpress',
'description' => 'Webscrapping from reichelt.com to get part information',
'url' => 'https://aliexpress.com/',
'disabled_help' => 'Set PROVIDER_REICHELT_ENABLED env to 1'
];
}
public function getProviderKey(): string
{
return "aliexpress";
}
public function isActive(): bool
{
return true;
}
public function getBaseURL(): string
{
//Without the trailing slash
return 'https://de.aliexpress.com';
}
public function searchByKeyword(string $keyword): array
{
$response = $this->client->request('GET', $this->getBaseURL() . '/wholesale', [
'query' => [
'SearchText' => $keyword,
'CatId' => 0,
'd' => 'y',
]
]
);
$content = $response->getContent();
$dom = new Crawler($content);
$results = [];
//Iterate over each div.search-item-card-wrapper-gallery
$dom->filter('div.search-item-card-wrapper-gallery')->each(function (Crawler $node) use (&$results) {
$productURL = $this->cleanProductURL($node->filter("a")->first()->attr('href'));
$productID = $this->extractProductID($productURL);
//Skip results where we cannot extract a product ID
if ($productID === null) {
return;
}
$results[] = new SearchResultDTO(
provider_key: $this->getProviderKey(),
provider_id: $productID,
name: $node->filter("div[title]")->attr('title'),
description: "",
preview_image_url: $node->filter("img")->first()->attr('src'),
provider_url: $productURL
);
});
return $results;
}
private function cleanProductURL(string $url): string
{
//Strip the query string
return explode('?', $url)[0];
}
private function extractProductID(string $url): ?string
{
//We want the numeric id from the url before the .html
$matches = [];
preg_match('/\/(\d+)\.html/', $url, $matches);
return $matches[1] ?? null;
}
public function getDetails(string $id): PartDetailDTO
{
//Ensure that $id is numeric
if (!is_numeric($id)) {
throw new \InvalidArgumentException("The id must be numeric");
}
$product_page = $this->getBaseURL() . "/item/{$id}.html";
//Create panther client
$chromeOptions = new ChromeOptions();
//Disable W3C mode, to avoid issues with getting html() from elements. See https://github.com/symfony/panther/issues/478
$chromeOptions->setExperimentalOption('w3c', false);
$client = Client::createChromeClient( $this->chromiumDriverPath, options: ['capabilities' => [ChromeOptions::CAPABILITY => $chromeOptions]]);
$client->manage()->deleteAllCookies();
$client->manage()->window()->setSize(new WebDriverDimension(1920, 1080));
$client->request('GET', $product_page );
//Dismiss cookie consent
$dom = $client->waitFor('div.global-gdpr-wrap button.btn-accept');
$dom->filter('div.global-gdpr-wrap button.btn-accept')->first()->click();
$dom = $client->waitFor('h1[data-pl="product-title"]');
$name = $dom->filter('h1[data-pl="product-title"]')->text();
//Click on the description button
$dom->filter('a[href="#nav-description"]')->first()->click();
//$client->clickLink('Übersicht');
$dom = $client->waitFor('#product-description');
$description = $dom->filter('#product-description')->html();
//Remove any script tags. This is just to prevent any weird output in the notes field, this is not really a security measure
$description = preg_replace('/<script\b[^>]*>(.*?)<\/script>/is', "", $description);
//Find price
$dom = $client->waitFor('span.product-price-value');
$price_str = $dom->filter('span.product-price-value')->text();
//Try to extract the price from the text
$matches = [];
preg_match('/([\d,\.]+)/', $price_str, $matches);
//Try to parse the price as a float
$price = str_replace(',', '.', $matches[1] ?? '0');
$client->quit();
$price = new PriceDTO(
minimum_discount_amount: 1,
price: $price,
currency_iso_code: "EUR"
);
$vendor_info = new PurchaseInfoDTO(
distributor_name: "Aliexpress",
order_number: $id,
prices: [$price],
product_url: $product_page
);
return new PartDetailDTO(
provider_key: $this->getProviderKey(),
provider_id: $id,
name: $name,
description: "",
provider_url: $product_page,
notes: $description,
vendor_infos: [$vendor_info]
);
}
public function getCapabilities(): array
{
return [
ProviderCapabilities::BASIC,
ProviderCapabilities::PICTURE,
ProviderCapabilities::PRICE,
];
}
}

View File

@@ -559,6 +559,15 @@
"symfony/options-resolver": {
"version": "v4.2.3"
},
"symfony/panther": {
"version": "2.2",
"recipe": {
"repo": "github.com/symfony/recipes",
"branch": "main",
"version": "1.0",
"ref": "bc2de681f79db177eac72d5b04c23bd59bea2b46"
}
},
"symfony/password-hasher": {
"version": "v5.3.8"
},