diff --git a/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh b/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh index eff2fecea5c8e24f3d3d36581a9d25104d572a7e..6d975eb0c7b0210c552bbc7923042a60949e8110 100755 --- a/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh +++ b/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh @@ -2,6 +2,35 @@ set -euo pipefail +scriptpath=${BASH_SOURCE[0]} +basedir=`dirname "$scriptpath"` +basedir=`dirname "$basedir"` +forbiddenpatternsfile=$(realpath $basedir/forbidden.patterns) + +#This is the ruleset. Each line represents a rule of tab-separated fields. +#The first field represents whether the match should be case-sensitive (s) or insensitive (i). +#The second field represents the pattern that should not be contained in any file that is checked. +#Further fields represent patterns with exceptions. +#For example, the first rule says: +# The pattern \<io\> should not be present in any file (case-insensitive match), except when it appears as ".io". +ruleset="i \<io\> \.io +s \<SLURM\> +i file \+system +i \<taurus\> taurus\.hrsk /taurus +i \<hrskii\> +i hpc \+system +i hpc[ -]\+da\>" + +function grepExceptions(){ +if [ $# -gt 0 ]; then +firstPattern=$1 +shift +grep -v "$firstPattern" | grepExceptions "$@" +else +cat - +fi +} + branch="preview" if [ -n "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" ]; then branch="origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" @@ -9,49 +38,25 @@ fi any_fails=false -files=$(git diff --name-only "$(git merge-base HEAD "$branch")") +files=$(git diff --name-only "$(git merge-base HEAD "$branch")" | grep '.md$' | grep -v '^doc.zih.tu-dresden.de/README.md$') for f in $files; do - if [ "$f" != doc.zih.tu-dresden.de/README.md -a "${f: -3}" == ".md" ]; then - #The following checks assume that grep signals success when it finds something, - #while it signals failure if it doesn't find something. - #We assume that we are successful if we DON'T find the pattern, - #which is the other way around, hence the "!". - - echo "Checking wording of $f: IO" - #io must be the whole word - if ! grep -n -i '\<io\>' "$f" | grep -v '\.io'; then - any_fails=true - fi - echo "Checking wording of $f: SLURM" - #SLURM must be the whole word, otherwise it might match script variables - #such as SLURM_JOB_ID - if ! grep -n '\<SLURM\>' "$f"; then - any_fails=true - fi - echo "Checking wording of $f: file system" - #arbitrary white space in between - if ! grep -n -i 'file \+system' "$f"; then - any_fails=true - fi - #check for word taurus, except when used in conjunction with .hrsk or /taurus, - #which might appear in code snippets - echo "Checking wording of $f: taurus" - if ! grep -n -i '\<taurus\>' "$f" | grep -v 'taurus\.hrsk' | grep -v '/taurus'; then - any_fails=true - fi - echo "Checking wording of $f: hrskii" - if ! grep -n -i '\<hrskii\>' "$f"; then - any_fails=true - fi - echo "Checking wording of $f: hpc system" - if ! grep -n -i 'hpc \+system' "$f"; then - any_fails=true - fi - echo "Checking wording of $f: hpc-da" - if ! grep -n -i 'hpc[ -]\+da\>' "$f"; then - any_fails=true - fi - fi + while IFS=$'\t' read -r flags pattern exceptionPatterns; do + while IFS=$'\t' read -r -a exceptionPatternsArray; do + echo "Checking wording of $f: $pattern" + case "$flags" in + "i") + if grep -n -i "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" ; then + any_fails=true + fi + ;; + "s") + if grep -n "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" ; then + any_fails=true + fi + ;; + esac + done <<< $exceptionPatterns + done <<< $ruleset done if [ "$any_fails" == true ]; then