diff --git a/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh b/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh similarity index 57% rename from doc.zih.tu-dresden.de/util/grep-forbidden-words.sh rename to doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh index cfb2b91b57457b701c5b80e76c6346d460cf4602..7d7b90cec5b88f1fb7468c2ca99e1dda48a17549 100755 --- a/doc.zih.tu-dresden.de/util/grep-forbidden-words.sh +++ b/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh @@ -12,17 +12,29 @@ basedir=`dirname "$basedir"` #Further fields represent patterns with exceptions. #For example, the first rule says: # The pattern \<io\> should not be present in any file (case-insensitive match), except when it appears as ".io". -ruleset="i \<io\> \.io +ruleset="The word \"IO\" should not be used, use \"I/O\" instead. +i \<io\> \.io +\"SLURM\" (only capital letters) should not be used, use \"Slurm\" instead. s \<SLURM\> +\"File system\" should be written as filesystem, except when used as part of a proper name. i file \+system HDFS -i \<taurus\> taurus\.hrsk /taurus /TAURUS +Use \"ZIH systems\" or \"ZIH system\" instead of \"Taurus\". \"taurus\" is only allowed when used in ssh commands and other very specific situations. +i \<taurus\> taurus\.hrsk /taurus /TAURUS ssh +\"HRSKII\" should be avoided, use \"ZIH system\" instead. i \<hrskii\> +The term \"HPC-DA\" should be avoided. Depending on the situation, use \"data analytics\" or similar. i hpc[ -]\+da\> +\"ATTACHURL\" was a keyword in the old wiki, don't use it. i attachurl +Replace \"todo\" with real content. i \<todo\> <!--.*todo.*--> +Avoid spaces at end of lines. i [[:space:]]$ +When referencing partitions, put keyword \"partition\" in front of partition name, e. g. \"partition ml\", not \"ml partition\". i \(alpha\|ml\|haswell\|romeo\|gpu\|smp\|julia\|hpdlf\|scs5\)-\?\(interactive\)\?[^a-z]*partition +Give hints in the link text. Words such as \"here\" or \"this link\" are meaningless. i \[\s\?\(documentation\|here\|this \(link\|page\|subsection\)\|slides\?\|manpage\)\s\?\] +Use \"workspace\" instead of \"work space\". i work[ -]\+space" # Whitelisted files will be ignored @@ -39,6 +51,32 @@ function grepExceptions () { fi } +function checkFile(){ + f=$1 + echo "Check wording in file $f" + while read message; do + IFS=$'\t' read -r flags pattern exceptionPatterns + while IFS=$'\t' read -r -a exceptionPatternsArray; do + if [ $silent = false ]; then + echo " Pattern: $pattern" + fi + grepflag= + case "$flags" in + "i") + grepflag=-i + ;; + esac + if grep -n $grepflag $color "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" ; then + number_of_matches=`grep -n $grepflag $color "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" | wc -l` + ((cnt=cnt+$number_of_matches)) + if [ $silent = false ]; then + echo " $message" + fi + fi + done <<< $exceptionPatterns + done <<< $ruleset +} + function usage () { echo "$0 [options]" echo "Search forbidden patterns in markdown files." @@ -95,31 +133,19 @@ fi echo "... $files ..." cnt=0 -for f in $files; do - if [ "${f: -3}" == ".md" -a -f "$f" ]; then - if (printf '%s\n' "${whitelist[@]}" | grep -xq $f); then - echo "Skip whitelisted file $f" - continue +if [[ ! -z $file ]]; then + checkFile $file +else + for f in $files; do + if [ "${f: -3}" == ".md" -a -f "$f" ]; then + if (printf '%s\n' "${whitelist[@]}" | grep -xq $f); then + echo "Skip whitelisted file $f" + continue + fi + checkFile $f fi - echo "Check wording in file $f" - while IFS=$'\t' read -r flags pattern exceptionPatterns; do - while IFS=$'\t' read -r -a exceptionPatternsArray; do - if [ $silent = false ]; then - echo " Pattern: $pattern" - fi - grepflag= - case "$flags" in - "i") - grepflag=-i - ;; - esac - if grep -n $grepflag $color "$pattern" "$f" | grepExceptions "${exceptionPatternsArray[@]}" ; then - ((cnt=cnt+1)) - fi - done <<< $exceptionPatterns - done <<< $ruleset - fi -done + done +fi echo "" case $cnt in diff --git a/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.testdoc b/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.testdoc new file mode 100644 index 0000000000000000000000000000000000000000..2b674702cd81304662b439a61d2fe15246ef8215 --- /dev/null +++ b/doc.zih.tu-dresden.de/util/grep-forbidden-patterns.testdoc @@ -0,0 +1,46 @@ +# Diese Datei versucht alles falsch zu machen, worauf grep-forbidden-words.sh checkt. + +`i \[\s\?\(documentation\|here\|this \(link\|page\|subsection\)\|slides\?\|manpage\)\s\?\]` + +Man kann Workspace schreiben oder aber auch +work-Space, beides sollte auffallen. + +Die ML-Partition, +die Alpha-Partition, +die Haswell-Partition, +die Romeo-Partition, +die GPU-Partition, +die SMP-Partition, +die Julia-Partition, +die HPDLF-Partition, +die scs5-Partition (was ist das überhaupt?), +alle gibt es auch in interaktiv: +Die ML-interactive partition, +die Alpha-interactive partition, +die Haswell-interactive Partition, +die Romeo-interactive partition, +die GPU-interactive partition, +die SMP-interactive partition, +die Julia-interactive partition, +die HPDLF-interactive partition, +die scs5-interactive partition (was ist das überhaupt?), +alle diese Partitionen existieren, aber man darf sie nicht benennen. +``` +Denn sonst kommt das Leerzeichenmonster und packt Leerzeichen ans Ende der Zeile. +``` + +TODO: io sollte mit SLURM laufen. + +Das HDFS ist ein sehr gutes +file system auf taurus. + +Taurus ist erreichbar per +taurus.hrsk oder per +/taurus oder per +/TAURUS + +Was ist hrskii? Keine Ahnung! + +Was ist HPC-DA? Ist es ein attachurl? See (this page). +Or (here). +Or (manpage). diff --git a/doc.zih.tu-dresden.de/util/pre-commit b/doc.zih.tu-dresden.de/util/pre-commit index b86b75d9a07870a68118aa500ee80781e216c56b..eb63bbea24052eb1dff4ec16a17b8b5aba275e18 100755 --- a/doc.zih.tu-dresden.de/util/pre-commit +++ b/doc.zih.tu-dresden.de/util/pre-commit @@ -69,7 +69,7 @@ then fi echo "Forbidden words checking..." -docker run --name=hpc-compendium --rm -w /docs --mount src="$(pwd)",target=/docs,type=bind hpc-compendium ./doc.zih.tu-dresden.de/util/grep-forbidden-words.sh +docker run --name=hpc-compendium --rm -w /docs --mount src="$(pwd)",target=/docs,type=bind hpc-compendium ./doc.zih.tu-dresden.de/util/grep-forbidden-patterns.sh if [ $? -ne 0 ] then exit_ok=no diff --git a/doc.zih.tu-dresden.de/util/test-grep-forbidden-patterns.sh b/doc.zih.tu-dresden.de/util/test-grep-forbidden-patterns.sh new file mode 100755 index 0000000000000000000000000000000000000000..d5d13ffc4ffc2f1f63332ca628f1449525ee1690 --- /dev/null +++ b/doc.zih.tu-dresden.de/util/test-grep-forbidden-patterns.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +expected_match_count=32 + +number_of_matches=$(bash ./doc.zih.tu-dresden.de/util/grep-forbidden-words.sh -f doc.zih.tu-dresden.de/util/grep-forbidden-patterns.testdoc -c -c | grep "Forbidden Patterns:" | sed -e 's/.*: //' | sed -e 's/ matches.*//') + +if [ $number_of_matches -eq $expected_match_count ]; then + echo "Test OK" + exit 0 +else + echo "Test failed: $expected_match_count matches expected, but only $number_of_matches found" + exit 1 +fi