Skip to content

Commit

Permalink
fix($shared-utils): replace diacritics with regex (#1855)
Browse files Browse the repository at this point in the history
Co-authored-by: Sergey Larionov <sergeylarionov@cloud.upwork.com>
  • Loading branch information
larionov and Sergey Larionov committed Aug 19, 2020
1 parent dfe43f6 commit a03e93d
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 8 deletions.
30 changes: 30 additions & 0 deletions packages/@vuepress/shared-utils/__tests__/slugify.spec.ts
@@ -0,0 +1,30 @@
import slugify from '../src/slugify'

describe('slugify', () => {
test('should slugify', () => {
const asserts: Record<string, string> = {
'Привет': 'привет',
'Лед üäöß': 'лед-uaoß',
'hangul 가': 'hangul-가',
'ع': 'ع',
'džℍΩ': 'dzhω',
'カi⁹': 'カi9',
// ㌀ -> アパート'
'㌀': decodeURIComponent('%E3%82%A2%E3%83%8F%E3%82%9A%E3%83%BC%E3%83%88'),
'¼': '_1⁄4',
'džℍΩカi⁹¼': 'dzhωカi91⁄4',
'Iлtèrnåtïonɑlíƶatï߀ԉ': 'iлternationɑliƶati߀ԉ',
'Båcòл ípѕùm ðoɭ߀r ѕït aϻèt âùþê aԉᏧ߀üïlɭê ƃëéf culρá fïlèt ϻiǥnòn cuρiᏧatat ut êлim tòлɢùê.':
'bacoл-ipѕum-ðoɭ߀r-ѕit-aϻet-auþe-aԉꮷ߀uilɭe-ƃeef-culρa-filet-ϻiǥnon-cuρiꮷatat-ut-eлim-toлɢue',
'ᴎᴑᴅᴇȷʂ': 'ᴎᴑᴅᴇȷʂ',
'hambúrguer': 'hamburguer',
'hŒllœ': 'hœllœ',
'Fußball': 'fußball',
'ABCDEFGHIJKLMNOPQRSTUVWXYZé': 'abcdefghijklmnopqrstuvwxyze'
}

Object.keys(asserts).forEach(input => {
expect(slugify(input)).toBe(asserts[input])
})
})
})
1 change: 0 additions & 1 deletion packages/@vuepress/shared-utils/package.json
Expand Up @@ -31,7 +31,6 @@
},
"dependencies": {
"chalk": "^2.3.2",
"diacritics": "^1.3.0",
"escape-html": "^1.0.3",
"fs-extra": "^7.0.1",
"globby": "^9.2.0",
Expand Down
9 changes: 6 additions & 3 deletions packages/@vuepress/shared-utils/src/slugify.ts
@@ -1,14 +1,17 @@
// string.js slugify drops non ascii chars so we have to
// use a custom implementation here
import { remove as removeDiacritics } from 'diacritics'

// eslint-disable-next-line no-control-regex
const rControl = /[\u0000-\u001f]/g
const rSpecial = /[\s~`!@#$%^&*()\-_+=[\]{}|\\;:"'“”‘’–—<>,.?/]+/g
const rCombining = /[\u0300-\u036F]/g

export = function slugify (str: string): string {
return removeDiacritics(str)
// Remove control characters
// Split accented characters into components
return str.normalize('NFKD')
// Remove accents
.replace(rCombining, '')
// Remove control characters
.replace(rControl, '')
// Replace special characters
.replace(rSpecial, '-')
Expand Down
4 changes: 0 additions & 4 deletions yarn.lock
Expand Up @@ -5125,10 +5125,6 @@ dezalgo@^1.0.0:
asap "^2.0.0"
wrappy "1"

diacritics@^1.3.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/diacritics/-/diacritics-1.3.0.tgz#3efa87323ebb863e6696cebb0082d48ff3d6f7a1"

didyoumean@^1.2.1:
version "1.2.1"
resolved "https://registry.yarnpkg.com/didyoumean/-/didyoumean-1.2.1.tgz#e92edfdada6537d484d73c0172fd1eba0c4976ff"
Expand Down

0 comments on commit a03e93d

Please sign in to comment.