diff --git a/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh b/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh index 280e4003dc951164c86b44560d6c81e3a5dc640c..61fa310667e1cf995efa6f8869877f3a1878d456 100755 --- a/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh +++ b/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh @@ -6,41 +6,50 @@ scriptpath=${BASH_SOURCE[0]} basedir=`dirname "$scriptpath"` basedir=`dirname "$basedir"` -#This is the ruleset. Each line represents a rule of tab-separated fields. +#This is the ruleset. Each rule consists of a message (first line), a tab-separated list of files to skip (second line) and a pattern specification (third line). +#A pattern specification is a tab-separated list of fields: #The first field represents whether the match should be case-sensitive (s) or insensitive (i). #The second field represents the pattern that should not be contained in any file that is checked. #Further fields represent patterns with exceptions. #For example, the first rule says: # The pattern \<io\> should not be present in any file (case-insensitive match), except when it appears as ".io". ruleset="The word \"IO\" should not be used, use \"I/O\" instead. +doc.zih.tu-dresden.de/docs/contrib/content_rules.md i \<io\> \.io \"SLURM\" (only capital letters) should not be used, use \"Slurm\" instead. +doc.zih.tu-dresden.de/docs/contrib/content_rules.md s \<SLURM\> \"File system\" should be written as \"filesystem\", except when used as part of a proper name. +doc.zih.tu-dresden.de/docs/contrib/content_rules.md i file \+system HDFS Use \"ZIH systems\" or \"ZIH system\" instead of \"Taurus\". \"taurus\" is only allowed when used in ssh commands and other very specific situations. +doc.zih.tu-dresden.de/docs/contrib/content_rules.md i \<taurus\> taurus\.hrsk /taurus /TAURUS ssh ^[0-9]\+:Host taurus$ \"HRSKII\" should be avoided, use \"ZIH system\" instead. +doc.zih.tu-dresden.de/docs/contrib/content_rules.md i \<hrskii\> The term \"HPC-DA\" should be avoided. Depending on the situation, use \"data analytics\" or similar. +doc.zih.tu-dresden.de/docs/contrib/content_rules.md i hpc[ -]\+da\> \"ATTACHURL\" was a keyword in the old wiki, don't use it. + i attachurl Replace \"todo\" with real content. + i \<todo\> <!--.*todo.*--> Avoid spaces at end of lines. + i [[:space:]]$ When referencing partitions, put keyword \"partition\" in front of partition name, e. g. \"partition ml\", not \"ml partition\". +doc.zih.tu-dresden.de/docs/contrib/content_rules.md i \(alpha\|ml\|haswell\|romeo\|gpu\|smp\|julia\|hpdlf\|scs5\)-\?\(interactive\)\?[^a-z]*partition Give hints in the link text. Words such as \"here\" or \"this link\" are meaningless. +doc.zih.tu-dresden.de/docs/contrib/content_rules.md i \[\s\?\(documentation\|here\|this \(link\|page\|subsection\)\|slides\?\|manpage\)\s\?\] Use \"workspace\" instead of \"work space\" or \"work-space\". +doc.zih.tu-dresden.de/docs/contrib/content_rules.md i work[ -]\+space" -# Whitelisted files will be ignored -# Whitespace separated list with full path -whitelist=(doc.zih.tu-dresden.de/README.md doc.zih.tu-dresden.de/docs/contrib/content_rules.md) - function grepExceptions () { if [ $# -gt 0 ]; then firstPattern=$1 @@ -55,22 +64,29 @@ function checkFile(){ f=$1 echo "Check wording in file $f" while read message; do + IFS=$'\t' read -r -a files_to_skip + skipping="" + if (printf '%s\n' "${files_to_skip[@]}" | grep -xq $f); then + skipping=" -- skipping" + fi IFS=$'\t' read -r flags pattern exceptionPatterns while IFS=$'\t' read -r -a exceptionPatternsArray; do if [ $silent = false ]; then - echo " Pattern: $pattern" + echo " Pattern: $pattern$skipping" fi - grepflag= - case "$flags" in - "i") - grepflag=-i - ;; - esac - if grep -n $grepflag $color "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" ; then - number_of_matches=`grep -n $grepflag $color "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" | wc -l` - ((cnt=cnt+$number_of_matches)) - if [ $silent = false ]; then - echo " $message" + if [ -z "$skipping" ]; then + grepflag= + case "$flags" in + "i") + grepflag=-i + ;; + esac + if grep -n $grepflag $color "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" ; then + number_of_matches=`grep -n $grepflag $color "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" | wc -l` + ((cnt=cnt+$number_of_matches)) + if [ $silent = false ]; then + echo " $message" + fi fi fi done <<< $exceptionPatterns @@ -123,7 +139,7 @@ branch="origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-preview}" if [ $all_files = true ]; then echo "Search in all markdown files." - files=$(git ls-tree --full-tree -r --name-only HEAD $basedir/docs/ | grep .md) + files=$(git ls-tree --full-tree -r --name-only HEAD $basedir/ | grep .md) elif [[ ! -z $file ]]; then files=$file else @@ -138,10 +154,6 @@ if [[ ! -z $file ]]; then else for f in $files; do if [ "${f: -3}" == ".md" -a -f "$f" ]; then - if (printf '%s\n' "${whitelist[@]}" | grep -xq $f); then - echo "Skip whitelisted file $f" - continue - fi checkFile $f fi done