diff --git a/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh b/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh index eff2fecea5c8e24f3d3d36581a9d25104d572a7e..21e83a5a572e798fe0bf6d87821889443cc6550a 100755 --- a/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh +++ b/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh @@ -2,58 +2,107 @@ set -euo pipefail -branch="preview" -if [ -n "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" ]; then - branch="origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" -fi +scriptpath=${BASH_SOURCE[0]} +basedir=`dirname "$scriptpath"` +basedir=`dirname "$basedir"` + +#This is the ruleset. Each line represents a rule of tab-separated fields. +#The first field represents whether the match should be case-sensitive (s) or insensitive (i). +#The second field represents the pattern that should not be contained in any file that is checked. +#Further fields represent patterns with exceptions. +#For example, the first rule says: +# The pattern \<io\> should not be present in any file (case-insensitive match), except when it appears as ".io". +ruleset="i \<io\> \.io +s \<SLURM\> +i file \+system +i \<taurus\> taurus\.hrsk /taurus +i \<hrskii\> +i hpc \+system +i hpc[ -]\+da\>" -any_fails=false +function grepExceptions () { + if [ $# -gt 0 ]; then + firstPattern=$1 + shift + grep -v "$firstPattern" | grepExceptions "$@" + else + cat - + fi +} -files=$(git diff --name-only "$(git merge-base HEAD "$branch")") +function usage () { + echo "$0 [options]" + echo "Search forbidden patterns in markdown files." + echo "" + echo "Options:" + echo " -a Search in all markdown files (default: git-changed files)" + echo " -s Silent mode" + echo " -h Show help message" +} + +# Options +all_files=false +silent=false +while getopts ":ahs" option; do + case $option in + a) + all_files=true + ;; + s) + silent=true + ;; + h) + usage + exit;; + \?) # Invalid option + echo "Error: Invalid option." + usage + exit;; + esac +done + +branch="origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-preview}" + +if [ $all_files = true ]; then + echo "Search in all markdown files." + files=$(git ls-tree --full-tree -r --name-only HEAD $basedir/docs/ | grep .md) +else + echo "Search in git-changed files." + files=`git diff --name-only "$(git merge-base HEAD "$branch")"` +fi + +cnt=0 for f in $files; do - if [ "$f" != doc.zih.tu-dresden.de/README.md -a "${f: -3}" == ".md" ]; then - #The following checks assume that grep signals success when it finds something, - #while it signals failure if it doesn't find something. - #We assume that we are successful if we DON'T find the pattern, - #which is the other way around, hence the "!". - - echo "Checking wording of $f: IO" - #io must be the whole word - if ! grep -n -i '\<io\>' "$f" | grep -v '\.io'; then - any_fails=true - fi - echo "Checking wording of $f: SLURM" - #SLURM must be the whole word, otherwise it might match script variables - #such as SLURM_JOB_ID - if ! grep -n '\<SLURM\>' "$f"; then - any_fails=true - fi - echo "Checking wording of $f: file system" - #arbitrary white space in between - if ! grep -n -i 'file \+system' "$f"; then - any_fails=true - fi - #check for word taurus, except when used in conjunction with .hrsk or /taurus, - #which might appear in code snippets - echo "Checking wording of $f: taurus" - if ! grep -n -i '\<taurus\>' "$f" | grep -v 'taurus\.hrsk' | grep -v '/taurus'; then - any_fails=true - fi - echo "Checking wording of $f: hrskii" - if ! grep -n -i '\<hrskii\>' "$f"; then - any_fails=true - fi - echo "Checking wording of $f: hpc system" - if ! grep -n -i 'hpc \+system' "$f"; then - any_fails=true + if [ "$f" != doc.zih.tu-dresden.de/README.md -a "${f: -3}" == ".md" ]; then + echo "Check wording in file $f" + while IFS=$'\t' read -r flags pattern exceptionPatterns; do + while IFS=$'\t' read -r -a exceptionPatternsArray; do + if [ $silent = false ]; then + echo " Pattern: $pattern" fi - echo "Checking wording of $f: hpc-da" - if ! grep -n -i 'hpc[ -]\+da\>' "$f"; then - any_fails=true + grepflag= + case "$flags" in + "i") + grepflag=-i + ;; + esac + if grep -n $grepflag "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" ; then + ((cnt=cnt+1)) fi - fi + done <<< $exceptionPatterns + done <<< $ruleset + fi done -if [ "$any_fails" == true ]; then - exit 1 +echo "" +case $cnt in + 1) + echo "Forbidden Patterns: 1 match found" + ;; + *) + echo "Forbidden Patterns: $cnt matches found" + ;; +esac +if [ $cnt -gt 0 ]; then + exit 1 fi