diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 36576b273..1891c4600 100755 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -168,6 +168,7 @@ "ops": [ "Encode text", "Decode text", + "Remove Diacritics", "Unescape Unicode Characters" ] }, diff --git a/src/core/operations/RemoveDiacritics.mjs b/src/core/operations/RemoveDiacritics.mjs new file mode 100644 index 000000000..217fafe1b --- /dev/null +++ b/src/core/operations/RemoveDiacritics.mjs @@ -0,0 +1,41 @@ +/** + * @author Klaxon [klaxon@veyr.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ + +import Operation from "../Operation"; + +/** + * Remove Diacritics operation + */ +class RemoveDiacritics extends Operation { + + /** + * RemoveDiacritics constructor + */ + constructor() { + super(); + + this.name = "Remove Diacritics"; + this.module = "Default"; + this.description = "Replaces accented characters with their latin character equivalent."; + this.infoURL = "https://wikipedia.org/wiki/Diacritic"; + this.inputType = "string"; + this.outputType = "string"; + this.args = []; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + // reference: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463 + return input.normalize("NFD").replace(/[\u0300-\u036f]/g, ""); + } + +} + +export default RemoveDiacritics; diff --git a/test/index.mjs b/test/index.mjs index 7d60618e9..e40ad9d0b 100644 --- a/test/index.mjs +++ b/test/index.mjs @@ -63,6 +63,7 @@ import "./tests/operations/ParseIPRange"; import "./tests/operations/PowerSet"; import "./tests/operations/Regex"; import "./tests/operations/Register"; +import "./tests/operations/RemoveDiacritics"; import "./tests/operations/Rotate"; import "./tests/operations/SeqUtils"; import "./tests/operations/SetDifference"; diff --git a/test/tests/operations/RemoveDiacritics.mjs b/test/tests/operations/RemoveDiacritics.mjs new file mode 100644 index 000000000..52bf188fe --- /dev/null +++ b/test/tests/operations/RemoveDiacritics.mjs @@ -0,0 +1,23 @@ + +/** + * Remove Diacritics tests. + * + * @author Klaxon [klaxon@veyr.com] + * @copyright Crown Copyright 2017 + * @license Apache-2.0 + */ +import TestRegister from "../../TestRegister"; + +TestRegister.addTests([ + { + name: "Remove Diacritics", + input: "\xe0, \xe8, \xec, \xf2, \xf9 \xc0, \xc8, \xcc, \xd2, \xd9\n\xe1, \xe9, \xed, \xf3, \xfa, \xfd \xc1, \xc9, \xcd, \xd3, \xda, \xdd\n\xe2, \xea, \xee, \xf4, \xfb \xc2, \xca, \xce, \xd4, \xdb\n\xe3, \xf1, \xf5 \xc3, \xd1, \xd5\n\xe4, \xeb, \xef, \xf6, \xfc, \xff \xc4, \xcb, \xcf, \xd6, \xdc, \u0178\n\xe5, \xc5", + expectedOutput: "a, e, i, o, u A, E, I, O, U\na, e, i, o, u, y A, E, I, O, U, Y\na, e, i, o, u A, E, I, O, U\na, n, o A, N, O\na, e, i, o, u, y A, E, I, O, U, Y\na, A", + recipeConfig: [ + { + "op": "Remove Diacritics", + "args": [] + }, + ], + }, +]);