Skip to content
This repository has been archived by the owner on Apr 6, 2023. It is now read-only.

Commit

Permalink
ci(crawl): ignore <a> links without hrefs (#7925)
Browse files Browse the repository at this point in the history
Co-authored-by: Alexander Lichter <github@lichter.io>
  • Loading branch information
danielroe and manniL committed Oct 3, 2022
1 parent d25dafe commit c116303
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion scripts/crawl.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ const erroredUrls = new Set()
* @param {string | undefined} referrer The referring page
*/
function queue (path, referrer) {
if (!path) {
const message = chalk.red(`${chalk.bold('✗')} ${referrer} linked to empty href`)
if (isCI) { actions.error(message) }
logger.log(message)
return
}

if (urlsToOmit.some(url => path.startsWith(url))) { return }

const { pathname, origin } = new URL(path, referrer)
Expand Down Expand Up @@ -80,7 +87,11 @@ const crawler = new Crawler({
return done()
}

$('a:not([href*=mailto])').each((_, el) => 'attribs' in el && queue(el.attribs.href, uri))
$('a:not([href*=mailto]):not([href*=tel])').each((_, el) => {
if ('attribs' in el && 'href' in el.attribs) {
queue(el.attribs.href, uri)
}
})

logger.success(chalk.green(uri))
logger.debug(uri, `[${crawler.queueSize} / ${urls.size}]`)
Expand Down

0 comments on commit c116303

Please sign in to comment.