mirror of
https://github.com/trezor/trezor-suite.git
synced 2026-03-03 05:55:03 +01:00
feat(utils): add extractUrlsFromText util
This commit is contained in:
35
packages/utils/src/extractUrlsFromText.ts
Normal file
35
packages/utils/src/extractUrlsFromText.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
const URL_REGEX =
|
||||
/\b(?:https?:\/\/|www\.)[a-zA-Z0-9-._~:/?#[\]@!$&'()*+,;=%]+\b|(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(?=\b|\s|$|\])/gi;
|
||||
|
||||
export const extractUrlsFromText = (text: string) => {
|
||||
const urls: string[] = [];
|
||||
const textParts: string[] = [];
|
||||
let lastIndex = 0;
|
||||
|
||||
const matches = [...text.matchAll(URL_REGEX)];
|
||||
|
||||
matches.forEach(match => {
|
||||
const url = match[0];
|
||||
const index = match.index !== undefined ? match.index : -1; // Ensure index is defined
|
||||
|
||||
// Capture text before the URL
|
||||
if (lastIndex < index) {
|
||||
textParts.push(text.slice(lastIndex, index));
|
||||
}
|
||||
// Capture the URL itself
|
||||
urls.push(url);
|
||||
lastIndex = index + url.length;
|
||||
});
|
||||
|
||||
// Capture any remaining text after the last URL
|
||||
if (lastIndex < text.length) {
|
||||
textParts.push(text.slice(lastIndex));
|
||||
}
|
||||
|
||||
// Special case: if there's no text before or after, ensure the array is non-empty
|
||||
if (textParts.length === 0 && urls.length > 0) {
|
||||
textParts.push('');
|
||||
}
|
||||
|
||||
return { textParts, urls };
|
||||
};
|
||||
@@ -43,3 +43,4 @@ export * from './logs';
|
||||
export * from './logsManager';
|
||||
export * from './bigNumber';
|
||||
export * from './throttler';
|
||||
export * from './extractUrlsFromText';
|
||||
|
||||
111
packages/utils/tests/extractUrlsFromText.test.ts
Normal file
111
packages/utils/tests/extractUrlsFromText.test.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import { extractUrlsFromText } from '../src/extractUrlsFromText';
|
||||
|
||||
describe('extractUrlsFromText', () => {
|
||||
it('should return textParts and urls for text with URLs', () => {
|
||||
const text =
|
||||
'Go to this page to claim your rewards: http://example.com/url and also check out www.phishing-site.com';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual([
|
||||
'Go to this page to claim your rewards: ',
|
||||
' and also check out ',
|
||||
]);
|
||||
expect(urls).toEqual(['http://example.com/url', 'www.phishing-site.com']);
|
||||
});
|
||||
|
||||
it('should handle text without URLs', () => {
|
||||
const text = 'This is a message very nice Ethereum token';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual([text]);
|
||||
expect(urls).toEqual([]);
|
||||
});
|
||||
|
||||
it('should not match invalid URLs like "2.0" in token name', () => {
|
||||
const text = 'Liquid staked Ether 2.0';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual([text]);
|
||||
expect(urls).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle text with multiple URLs related to Ethereum tokens correctly', () => {
|
||||
const text =
|
||||
'Visit https://etherscan.io, http://mycrypto.com, and www.ethereum.org to claim your tokens.';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual(['Visit ', ', ', ', and ', ' to claim your tokens.']);
|
||||
expect(urls).toEqual(['https://etherscan.io', 'http://mycrypto.com', 'www.ethereum.org']);
|
||||
});
|
||||
|
||||
it('should correctly extract URLs from phishing messages related to Ethereum', () => {
|
||||
const text =
|
||||
'Attention! Go to http://phishing-site.com to secure your Ethereum wallet immediately.';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual([
|
||||
'Attention! Go to ',
|
||||
' to secure your Ethereum wallet immediately.',
|
||||
]);
|
||||
expect(urls).toEqual(['http://phishing-site.com']);
|
||||
});
|
||||
|
||||
it('should not match any url in case of USDT name', () => {
|
||||
const text = 'Tether USD';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual([text]);
|
||||
expect(urls).toEqual([]);
|
||||
});
|
||||
|
||||
it('should not match any url in case of USDT symbol', () => {
|
||||
const text = 'USDT';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual([text]);
|
||||
expect(urls).toEqual([]);
|
||||
});
|
||||
|
||||
it('should match url in case of just url', () => {
|
||||
const text = 'USDT.io';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual(['']);
|
||||
expect(urls).toEqual([text]);
|
||||
});
|
||||
|
||||
it('should match url in case of scam', () => {
|
||||
const text = '$ USDCXmas.com';
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual(['$ ']);
|
||||
expect(urls).toEqual(['USDCXmas.com']);
|
||||
});
|
||||
|
||||
it('should match url in case scam name with emoji', () => {
|
||||
const text = '🎁10K$ gift at [bit.ly/tpepe]';
|
||||
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual(['🎁10K$ gift at [', '/tpepe]']);
|
||||
expect(urls).toEqual(['bit.ly']);
|
||||
});
|
||||
|
||||
it('should not match url in of contract address', () => {
|
||||
const text = '0xcDa4e840411C00a614aD9205CAEC807c7458a0E3';
|
||||
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual(['0xcDa4e840411C00a614aD9205CAEC807c7458a0E3']);
|
||||
expect(urls).toEqual([]);
|
||||
});
|
||||
|
||||
it('should match two urls next to each other', () => {
|
||||
const text = 'Visit trezor.io ledger.com';
|
||||
|
||||
const { textParts, urls } = extractUrlsFromText(text);
|
||||
|
||||
expect(textParts).toEqual(['Visit ', ' ']);
|
||||
expect(urls).toEqual(['trezor.io', 'ledger.com']);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user