Corrected mistake in spell checks related to git diff output; unified

check-spelling.sh and check-spelling-changes.sh; added a few words to wordlist.aspell.

Corrected mistake in spell checks related to git diff output; unified
check-spelling.sh and check-spelling-changes.sh; added a few words to wordlist.aspell.
0c7797ee · Jan Frenzel · 761610f2 · 0c7797ee · 761610f2 · 0c7797ee
Commit 0c7797ee authored 3 years ago by Jan Frenzel
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -45,7 +45,7 @@ Check spelling for changed md-files:
    stage: test
    script:
        - docker run --rm -w /src -e CI_MERGE_REQUEST_TARGET_BRANCH_NAME "${DOCKER_IMAGE}"
-          doc.zih.tu-dresden.de/util/check-spelling-changes.sh
+          doc.zih.tu-dresden.de/util/check-spelling.sh
    only: [ merge_requests ]
 Check links for changed md-files:

--- a/doc.zih.tu-dresden.de/util/check-spelling-changes.sh
+++ b/doc.zih.tu-dresden.de/util/check-spelling-changes.sh
-#!/bin/bash
-set -euo pipefail
-scriptpath=${BASH_SOURCE[0]}
-basedir=`dirname "$scriptpath"`
-basedir=`dirname "$basedir"`
-wordlistfile=$(realpath $basedir/wordlist.aspell)
-function getNumberOfAspellOutputLines(){
-  cat - | aspell -p "$wordlistfile" --ignore 2 -l en_US list --mode=markdown | sort -u | wc -l
-}
-branch="preview"
-if [ -n "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" ]; then
-  branch="origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
-fi
-any_fails=false
-source_hash=`git merge-base HEAD "$branch"`
-#Remove everything except lines beginning with --- or +++
-files=`git diff $source_hash | sed -n 's/^[-+]\{3,3\} //p'`
-#echo "$files"
-#echo "-------------------------"
-#Assume that we have pairs of lines (starting with --- and +++).
-while read oldfile; do
-  read newfile
-  if [ "${newfile: -3}" == ".md" ]; then
-    if [[ $newfile == *"accessibility.md"* ||
-          $newfile == *"data_protection_declaration.md"* ||
-          $newfile == *"legal_notice.md"* ]]; then
-      echo "Skip $newfile"
-    else
-      echo "Check $newfile"
-      if [ "$oldfile" == "/dev/null" ]; then
-        #Added files should not introduce new spelling mistakes
-        previous_count=0
-      else
-        previous_count=`git show "$source_hash:${oldfile:2}" | getNumberOfAspellOutputLines`
-      fi
-      if [ "$newfile" == "/dev/null" ]; then
-        #Deleted files do not contain any spelling mistakes
-        current_count=0
-      else
-        #Remove the prefix "b/"
-        newfile=${newfile:2}
-        current_count=`cat "$newfile" | getNumberOfAspellOutputLines`
-      fi
-      if [ $current_count -gt $previous_count ]; then
-        echo "-- File $newfile"
-        echo "Change increases spelling mistake count (from $previous_count to $current_count)"
-        any_fails=true
-      fi
-    fi
-  fi
-done <<< "$files"
-if [ "$any_fails" == true ]; then
-  exit 1
-fi
--- a/doc.zih.tu-dresden.de/util/check-spelling.sh
+++ b/doc.zih.tu-dresden.de/util/check-spelling.sh
 #!/bin/bash
+set -euo pipefail
 scriptpath=${BASH_SOURCE[0]}
 basedir=`dirname "$scriptpath"`
 basedir=`dirname "$basedir"`
-wordlistfile=$basedir/wordlist.aspell
+wordlistfile=$(realpath $basedir/wordlist.aspell)
-acmd="aspell -p $wordlistfile --ignore 2 -l en_US list --mode=markdown"
+branch="origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-preview}"
-function spell_check () {
-  file_to_check=$1
-  ret=$(cat "$file_to_check" | $acmd)
-  if [ ! -z "$ret" ]; then
-    echo "-- File $file_to_check"
-    echo "$ret" | sort -u
-  fi
-}
 function usage() {
  cat <<-EOF
 usage: $0 [file]
-Outputs all words of the file (or, if no argument given, all files in the current directory, recursively), that the spell checker cannot recognize.
+If file is given, outputs all words of the file, that the spell checker cannot recognize.
+If file is omitted, checks whether any changed file contains more unrecognizable words than before the change.
 If you are sure a word is correct, you can put it in $wordlistfile.
 EOF
 }
+function getAspellOutput(){
+  aspell -p "$wordlistfile" --ignore 2 -l en_US --mode=markdown list | sort -u
+}
+function getNumberOfAspellOutputLines(){
+  getAspellOutput | wc -l
+}
+function isMistakeCountIncreasedByChanges(){
+  any_fails=false
+  source_hash=`git merge-base HEAD "$branch"`
+  #Remove everything except lines beginning with --- or +++
+  files=`git diff $source_hash | sed -n 's#^[-+]\{3,3\} \(\(/\|./\)[^[:space:]]\+\)$#\1#p'`
+  #echo "$files"
+  #echo "-------------------------"
+  #Assume that we have pairs of lines (starting with --- and +++).
+  while read oldfile; do
+    read newfile
+    if [ "${newfile: -3}" == ".md" ]; then
+      if [[ $newfile == *"accessibility.md"* ||
+            $newfile == *"data_protection_declaration.md"* ||
+            $newfile == *"legal_notice.md"* ]]; then
+        echo "Skip $newfile"
+      else
+        echo "Check $newfile"
+        if [ "$oldfile" == "/dev/null" ]; then
+          #Added files should not introduce new spelling mistakes
+          previous_count=0
+        else
+          previous_count=`git show "$source_hash:${oldfile:2}" | getNumberOfAspellOutputLines`
+        fi
+        if [ "$newfile" == "/dev/null" ]; then
+          #Deleted files do not contain any spelling mistakes
+          current_count=0
+        else
+          #Remove the prefix "b/"
+          newfile=${newfile:2}
+          current_count=`cat "$newfile" | getNumberOfAspellOutputLines`
+        fi
+        if [ $current_count -gt $previous_count ]; then
+          echo "-- File $newfile"
+          echo "Change increases spelling mistake count (from $previous_count to $current_count)"
+          any_fails=true
+        fi
+      fi
+    fi
+  done <<< "$files"
+  if [ "$any_fails" == true ]; then
+    return 1
+  fi
+  return 0
+}
 if [ $# -eq 1 ]; then
  case $1 in
  help | -help | --help)
@@ -30,13 +79,11 @@ if [ $# -eq 1 ]; then
    exit
  ;;
  *)
-    spell_check $1
+    cat "$1" | getAspellOutput
  ;;
  esac
 elif [ $# -eq 0 ]; then
-  for i in `find -name \*.md`; do
+  isMistakeCountIncreasedByChanges
-  spell_check $i
-  done
 else
  usage
 fi
--- a/doc.zih.tu-dresden.de/wordlist.aspell
+++ b/doc.zih.tu-dresden.de/wordlist.aspell
 personal_ws-1.1 en 1805 
+Altix
 analytics
+BeeGFS
 benchmarking
+bsub
+ccNUMA
 citable
 CPU
+CPUs
 CUDA
+CXFS
+DFG
 EasyBuild
+fastfs
+filesystem
+Filesystem
 Flink
+Fortran
+GFLOPS
+gfortran
+gnuplot
+Gnuplot
 GPU
 hadoop
 Haswell
 HDFS
 Horovod
 HPC
+icc
+icpc
+ifort
 ImageNet
 Infiniband
+Itanium
+jpg
 Jupyter
 Keras
+LoadLeveler
+lsf
+LSF
+MEGWARE
+MIMD
+MKL
+Montecito
+mountpoint
 MPI
+mpicc
+mpiCC
+mpicxx
+mpif
+mpifort
+mpirun
+multicore
+multithreaded
+Neptun
+NFS
+NUMA
+NUMAlink
 OPARI
 OpenACC
 OpenCL
 OpenMP
+openmpi
+OpenMPI
+Opteron
 PAPI
+pdf
+pipelining
+png
 rome
 romeo
+RSA
 salloc
+Saxonid
 sbatch
 ScaDS
 Scalasca
 scancel
 scontrol
 scp
+SGI
+SHA
 SHMEM
+SLES
 Slurm
+SMP
 squeue
 srun
 SSD
+stderr
+stdout
+SUSE
+TBB
 TensorFlow
+TFLOPS
 Theano
+tmp
+Trition
 Vampir
 ZIH
-DFG
-NUMAlink
-ccNUMA
-NUMA
-Montecito
-Opteron
-Saxonid
-MIMD
-LSF
-lsf
-Itanium
-mpif
-mpicc
-mpiCC
-mpicxx
-mpirun
-mpifort
-ifort
-icc
-icpc
-gfortran
-Altix
-Neptun
-Trition
-SUSE
-SLES
-Fortran
-SMP
-MEGWARE
-SGI
-CXFS
-NFS
-CPUs
-GFLOPS
-TFLOPS
-png
-jpg
-pdf
-bsub
-OpenMPI
-openmpi
-multicore
-fastfs
-tmp
-MKL
-TBB
-LoadLeveler
-Gnuplot
-gnuplot
-RSA
-SHA
-pipelining
-stdout
-stderr
-multithreaded