From 905140979cc92c579396324563a5caae09b30fb5 Mon Sep 17 00:00:00 2001 From: Michael Boelen Date: Sat, 1 Jul 2023 11:34:27 +0200 Subject: [PATCH] Uitbreiden link-checker --- scripts/check-all-links.sh | 39 +++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/scripts/check-all-links.sh b/scripts/check-all-links.sh index 09826bd..83ab005 100755 --- a/scripts/check-all-links.sh +++ b/scripts/check-all-links.sh @@ -4,16 +4,37 @@ set -eu if [ -f ./check-all-links.tmp ]; then rm ./check-all-links.tmp; fi -if [ -d ./public ]; then - grep -ri --only-matching --no-filename "https://[a-zA-Z0-9]*\.\([a-z0-9\.\_/~-]\)*" ./public | grep "^http" | sed 's/\\n.*//g' | sort --unique > check-all-links.tmp - for URL in $(cat ./check-all-links.tmp); do - HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${URL}") - if [ ! "${HTTP_CODE}" = "200" ]; then - echo "[WARNING] ${URL} (HTTP: ${HTTP_CODE})" - fi - sleep 5 - done + +if [ -d ./public ]; then + + # Recursief zoeken, case-insentive, alleen de woorden die matchen, bestandsnaam niet tonen + # Alleen de items tonen die starten met http + # Strip items die eindigen met een \ (o.a. uit feed.json) + # Strip items die eindigen met een \n en aanvullende tekst (b.v. in abstracts) + # Negeer items van GitHub/Schema.org + # Sorten en uniek maken van de lijst + grep -ri --only-matching --no-filename "https://[a-zA-Z0-9]*\.\([a-z0-9\.\_/~-]\)*" ./public \ + | grep "^http" \ + | sed 's/\\$//' \ + | sed 's/\\n.*$//' \ + | grep -v 'https://github.com/NLUUG/website' \ + | grep -v 'https://schema.org' \ + | sort --unique > check-all-links.tmp + + # URL's testen + if [ -f ./check-all-links.tmp ]; then + for URL in $(cat ./check-all-links.tmp); do + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${URL}") + if [ ! "${HTTP_CODE}" = "200" ]; then + echo "[WARNING] ${URL} (HTTP: ${HTTP_CODE})" + fi + sleep 2 + done + else + echo "Kan bestand met links niet vinden." + exit 1 + fi else echo "Dit script dient gedraaid te worden in de hoofddirectory van de website." exit 1