From 0c7797ee480fc084db9fb67d241abce555976c86 Mon Sep 17 00:00:00 2001
From: Jan Frenzel <jan.frenzel@tu-dresden.de>
Date: Fri, 27 Aug 2021 12:51:29 +0200
Subject: [PATCH] Corrected mistake in spell checks related to git diff output;
 unified check-spelling.sh and check-spelling-changes.sh; added a few words to
 wordlist.aspell.

---
 .gitlab-ci.yml                                |   2 +-
 .../util/check-spelling-changes.sh            |  61 ----------
 doc.zih.tu-dresden.de/util/check-spelling.sh  |  79 +++++++++---
 doc.zih.tu-dresden.de/wordlist.aspell         | 114 +++++++++---------
 4 files changed, 123 insertions(+), 133 deletions(-)
 delete mode 100755 doc.zih.tu-dresden.de/util/check-spelling-changes.sh

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index af21749fe..f1875b348 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -45,7 +45,7 @@ Check spelling for changed md-files:
     stage: test
     script:
         - docker run --rm -w /src -e CI_MERGE_REQUEST_TARGET_BRANCH_NAME "${DOCKER_IMAGE}"
-          doc.zih.tu-dresden.de/util/check-spelling-changes.sh
+          doc.zih.tu-dresden.de/util/check-spelling.sh
     only: [ merge_requests ]
 
 Check links for changed md-files:
diff --git a/doc.zih.tu-dresden.de/util/check-spelling-changes.sh b/doc.zih.tu-dresden.de/util/check-spelling-changes.sh
deleted file mode 100755
index 670e687ce..000000000
--- a/doc.zih.tu-dresden.de/util/check-spelling-changes.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-scriptpath=${BASH_SOURCE[0]}
-basedir=`dirname "$scriptpath"`
-basedir=`dirname "$basedir"`
-wordlistfile=$(realpath $basedir/wordlist.aspell)
-
-function getNumberOfAspellOutputLines(){
-  cat - | aspell -p "$wordlistfile" --ignore 2 -l en_US list --mode=markdown | sort -u | wc -l
-}
-
-branch="preview"
-if [ -n "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" ]; then
-  branch="origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
-fi
-
-any_fails=false
-
-source_hash=`git merge-base HEAD "$branch"`
-#Remove everything except lines beginning with --- or +++
-files=`git diff $source_hash | sed -n 's/^[-+]\{3,3\} //p'`
-#echo "$files"
-#echo "-------------------------"
-#Assume that we have pairs of lines (starting with --- and +++).
-while read oldfile; do
-  read newfile
-  if [ "${newfile: -3}" == ".md" ]; then
-    if [[ $newfile == *"accessibility.md"* ||
-          $newfile == *"data_protection_declaration.md"* ||
-          $newfile == *"legal_notice.md"* ]]; then
-      echo "Skip $newfile"
-    else
-      echo "Check $newfile"
-      if [ "$oldfile" == "/dev/null" ]; then
-        #Added files should not introduce new spelling mistakes
-        previous_count=0
-      else
-        previous_count=`git show "$source_hash:${oldfile:2}" | getNumberOfAspellOutputLines`
-      fi
-      if [ "$newfile" == "/dev/null" ]; then
-        #Deleted files do not contain any spelling mistakes
-        current_count=0
-      else
-        #Remove the prefix "b/"
-        newfile=${newfile:2}
-        current_count=`cat "$newfile" | getNumberOfAspellOutputLines`
-      fi
-      if [ $current_count -gt $previous_count ]; then
-        echo "-- File $newfile"
-        echo "Change increases spelling mistake count (from $previous_count to $current_count)"
-        any_fails=true
-      fi
-    fi
-  fi
-done <<< "$files"
-
-if [ "$any_fails" == true ]; then
-  exit 1
-fi
diff --git a/doc.zih.tu-dresden.de/util/check-spelling.sh b/doc.zih.tu-dresden.de/util/check-spelling.sh
index 8b7da3217..8630852ba 100755
--- a/doc.zih.tu-dresden.de/util/check-spelling.sh
+++ b/doc.zih.tu-dresden.de/util/check-spelling.sh
@@ -1,28 +1,77 @@
 #!/bin/bash
 
+set -euo pipefail
+
 scriptpath=${BASH_SOURCE[0]}
 basedir=`dirname "$scriptpath"`
 basedir=`dirname "$basedir"`
-wordlistfile=$basedir/wordlist.aspell
-acmd="aspell -p $wordlistfile --ignore 2 -l en_US list --mode=markdown"
-
-function spell_check () {
-  file_to_check=$1
-  ret=$(cat "$file_to_check" | $acmd)
-  if [ ! -z "$ret" ]; then
-    echo "-- File $file_to_check"
-    echo "$ret" | sort -u
-  fi
-}
+wordlistfile=$(realpath $basedir/wordlist.aspell)
+branch="origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-preview}"
 
 function usage() {
   cat <<-EOF
 usage: $0 [file]
-Outputs all words of the file (or, if no argument given, all files in the current directory, recursively), that the spell checker cannot recognize.
+If file is given, outputs all words of the file, that the spell checker cannot recognize.
+If file is omitted, checks whether any changed file contains more unrecognizable words than before the change.
 If you are sure a word is correct, you can put it in $wordlistfile.
 EOF
 }
 
+function getAspellOutput(){
+  aspell -p "$wordlistfile" --ignore 2 -l en_US --mode=markdown list | sort -u
+}
+
+function getNumberOfAspellOutputLines(){
+  getAspellOutput | wc -l
+}
+
+function isMistakeCountIncreasedByChanges(){
+  any_fails=false
+
+  source_hash=`git merge-base HEAD "$branch"`
+  #Remove everything except lines beginning with --- or +++
+  files=`git diff $source_hash | sed -n 's#^[-+]\{3,3\} \(\(/\|./\)[^[:space:]]\+\)$#\1#p'`
+  #echo "$files"
+  #echo "-------------------------"
+  #Assume that we have pairs of lines (starting with --- and +++).
+  while read oldfile; do
+    read newfile
+    if [ "${newfile: -3}" == ".md" ]; then
+      if [[ $newfile == *"accessibility.md"* ||
+            $newfile == *"data_protection_declaration.md"* ||
+            $newfile == *"legal_notice.md"* ]]; then
+        echo "Skip $newfile"
+      else
+        echo "Check $newfile"
+        if [ "$oldfile" == "/dev/null" ]; then
+          #Added files should not introduce new spelling mistakes
+          previous_count=0
+        else
+          previous_count=`git show "$source_hash:${oldfile:2}" | getNumberOfAspellOutputLines`
+        fi
+        if [ "$newfile" == "/dev/null" ]; then
+          #Deleted files do not contain any spelling mistakes
+          current_count=0
+        else
+          #Remove the prefix "b/"
+          newfile=${newfile:2}
+          current_count=`cat "$newfile" | getNumberOfAspellOutputLines`
+        fi
+        if [ $current_count -gt $previous_count ]; then
+          echo "-- File $newfile"
+          echo "Change increases spelling mistake count (from $previous_count to $current_count)"
+          any_fails=true
+        fi
+      fi
+    fi
+  done <<< "$files"
+
+  if [ "$any_fails" == true ]; then
+    return 1
+  fi
+  return 0
+}
+
 if [ $# -eq 1 ]; then
   case $1 in
   help | -help | --help)
@@ -30,13 +79,11 @@ if [ $# -eq 1 ]; then
     exit
   ;;
   *)
-    spell_check $1
+    cat "$1" | getAspellOutput
   ;;
   esac
 elif [ $# -eq 0 ]; then
-  for i in `find -name \*.md`; do
-  spell_check $i
-  done
+  isMistakeCountIncreasedByChanges
 else
   usage
 fi
diff --git a/doc.zih.tu-dresden.de/wordlist.aspell b/doc.zih.tu-dresden.de/wordlist.aspell
index 3acfcf421..955db4276 100644
--- a/doc.zih.tu-dresden.de/wordlist.aspell
+++ b/doc.zih.tu-dresden.de/wordlist.aspell
@@ -1,97 +1,101 @@
 personal_ws-1.1 en 1805 
+Altix
 analytics
+BeeGFS
 benchmarking
+bsub
+ccNUMA
 citable
 CPU
+CPUs
 CUDA
+CXFS
+DFG
 EasyBuild
+fastfs
+filesystem
+Filesystem
 Flink
+Fortran
+GFLOPS
+gfortran
+gnuplot
+Gnuplot
 GPU
 hadoop
 Haswell
 HDFS
 Horovod
 HPC
+icc
+icpc
+ifort
 ImageNet
 Infiniband
+Itanium
+jpg
 Jupyter
 Keras
+LoadLeveler
+lsf
+LSF
+MEGWARE
+MIMD
+MKL
+Montecito
+mountpoint
 MPI
+mpicc
+mpiCC
+mpicxx
+mpif
+mpifort
+mpirun
+multicore
+multithreaded
+Neptun
+NFS
+NUMA
+NUMAlink
 OPARI
 OpenACC
 OpenCL
 OpenMP
+openmpi
+OpenMPI
+Opteron
 PAPI
+pdf
+pipelining
+png
 rome
 romeo
+RSA
 salloc
+Saxonid
 sbatch
 ScaDS
 Scalasca
 scancel
 scontrol
 scp
+SGI
+SHA
 SHMEM
+SLES
 Slurm
+SMP
 squeue
 srun
 SSD
+stderr
+stdout
+SUSE
+TBB
 TensorFlow
+TFLOPS
 Theano
+tmp
+Trition
 Vampir
 ZIH
-DFG
-NUMAlink
-ccNUMA
-NUMA
-Montecito
-Opteron
-Saxonid
-MIMD
-LSF
-lsf
-Itanium
-mpif
-mpicc
-mpiCC
-mpicxx
-mpirun
-mpifort
-ifort
-icc
-icpc
-gfortran
-Altix
-Neptun
-Trition
-SUSE
-SLES
-Fortran
-SMP
-MEGWARE
-SGI
-CXFS
-NFS
-CPUs
-GFLOPS
-TFLOPS
-png
-jpg
-pdf
-bsub
-OpenMPI
-openmpi
-multicore
-fastfs
-tmp
-MKL
-TBB
-LoadLeveler
-Gnuplot
-gnuplot
-RSA
-SHA
-pipelining
-stdout
-stderr
-multithreaded
-- 
GitLab