diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index af21749feafce056b6d6a5440ce2836fdcd30d46..f1875b3481da6d11053e5ad8aed49ae53033e5c4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -45,7 +45,7 @@ Check spelling for changed md-files: stage: test script: - docker run --rm -w /src -e CI_MERGE_REQUEST_TARGET_BRANCH_NAME "${DOCKER_IMAGE}" - doc.zih.tu-dresden.de/util/check-spelling-changes.sh + doc.zih.tu-dresden.de/util/check-spelling.sh only: [ merge_requests ] Check links for changed md-files: diff --git a/doc.zih.tu-dresden.de/util/check-spelling-changes.sh b/doc.zih.tu-dresden.de/util/check-spelling-changes.sh deleted file mode 100755 index 670e687ce898a613264219a6cc37bd20479da0c3..0000000000000000000000000000000000000000 --- a/doc.zih.tu-dresden.de/util/check-spelling-changes.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -scriptpath=${BASH_SOURCE[0]} -basedir=`dirname "$scriptpath"` -basedir=`dirname "$basedir"` -wordlistfile=$(realpath $basedir/wordlist.aspell) - -function getNumberOfAspellOutputLines(){ - cat - | aspell -p "$wordlistfile" --ignore 2 -l en_US list --mode=markdown | sort -u | wc -l -} - -branch="preview" -if [ -n "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" ]; then - branch="origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" -fi - -any_fails=false - -source_hash=`git merge-base HEAD "$branch"` -#Remove everything except lines beginning with --- or +++ -files=`git diff $source_hash | sed -n 's/^[-+]\{3,3\} //p'` -#echo "$files" -#echo "-------------------------" -#Assume that we have pairs of lines (starting with --- and +++). -while read oldfile; do - read newfile - if [ "${newfile: -3}" == ".md" ]; then - if [[ $newfile == *"accessibility.md"* || - $newfile == *"data_protection_declaration.md"* || - $newfile == *"legal_notice.md"* ]]; then - echo "Skip $newfile" - else - echo "Check $newfile" - if [ "$oldfile" == "/dev/null" ]; then - #Added files should not introduce new spelling mistakes - previous_count=0 - else - previous_count=`git show "$source_hash:${oldfile:2}" | getNumberOfAspellOutputLines` - fi - if [ "$newfile" == "/dev/null" ]; then - #Deleted files do not contain any spelling mistakes - current_count=0 - else - #Remove the prefix "b/" - newfile=${newfile:2} - current_count=`cat "$newfile" | getNumberOfAspellOutputLines` - fi - if [ $current_count -gt $previous_count ]; then - echo "-- File $newfile" - echo "Change increases spelling mistake count (from $previous_count to $current_count)" - any_fails=true - fi - fi - fi -done <<< "$files" - -if [ "$any_fails" == true ]; then - exit 1 -fi diff --git a/doc.zih.tu-dresden.de/util/check-spelling.sh b/doc.zih.tu-dresden.de/util/check-spelling.sh index 8b7da3217c3f086a44eeb042cc645fcd63d49364..7fa9d2824d4a61ce86ae258d656acfe90c574269 100755 --- a/doc.zih.tu-dresden.de/util/check-spelling.sh +++ b/doc.zih.tu-dresden.de/util/check-spelling.sh @@ -1,28 +1,88 @@ #!/bin/bash +set -euo pipefail + scriptpath=${BASH_SOURCE[0]} basedir=`dirname "$scriptpath"` basedir=`dirname "$basedir"` -wordlistfile=$basedir/wordlist.aspell -acmd="aspell -p $wordlistfile --ignore 2 -l en_US list --mode=markdown" - -function spell_check () { - file_to_check=$1 - ret=$(cat "$file_to_check" | $acmd) - if [ ! -z "$ret" ]; then - echo "-- File $file_to_check" - echo "$ret" | sort -u - fi -} +wordlistfile=$(realpath $basedir/wordlist.aspell) +branch="origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-preview}" +aspellmode= +if aspell dump modes | grep -q markdown; then + aspellmode="--mode=markdown" +fi function usage() { cat <<-EOF usage: $0 [file] -Outputs all words of the file (or, if no argument given, all files in the current directory, recursively), that the spell checker cannot recognize. +If file is given, outputs all words of the file, that the spell checker cannot recognize. +If file is omitted, checks whether any changed file contains more unrecognizable words than before the change. If you are sure a word is correct, you can put it in $wordlistfile. EOF } +function getAspellOutput(){ + aspell -p "$wordlistfile" --ignore 2 -l en_US $aspellmode list | sort -u +} + +function getNumberOfAspellOutputLines(){ + getAspellOutput | wc -l +} + +function isMistakeCountIncreasedByChanges(){ + any_fails=false + + #Unfortunately, sort depends on locale and docker does not provide much. + #Therefore, it uses bytewise comparison. We avoid problems with the command tr. + if ! sed 1d "$wordlistfile" | tr [:upper:] [:lower:] | sort -C; then + echo "Unsorted wordlist in $wordlistfile" + any_fails=true + fi + + source_hash=`git merge-base HEAD "$branch"` + #Remove everything except lines beginning with --- or +++ + files=`git diff $source_hash | sed -E -n 's#^(---|\+\+\+) ((/|./)[^[:space:]]+)$#\2#p'` + #echo "$files" + #echo "-------------------------" + #Assume that we have pairs of lines (starting with --- and +++). + while read oldfile; do + read newfile + if [ "${newfile: -3}" == ".md" ]; then + if [[ $newfile == *"accessibility.md"* || + $newfile == *"data_protection_declaration.md"* || + $newfile == *"legal_notice.md"* ]]; then + echo "Skip $newfile" + else + echo "Check $newfile" + if [ "$oldfile" == "/dev/null" ]; then + #Added files should not introduce new spelling mistakes + previous_count=0 + else + previous_count=`git show "$source_hash:${oldfile:2}" | getNumberOfAspellOutputLines` + fi + if [ "$newfile" == "/dev/null" ]; then + #Deleted files do not contain any spelling mistakes + current_count=0 + else + #Remove the prefix "b/" + newfile=${newfile:2} + current_count=`cat "$newfile" | getNumberOfAspellOutputLines` + fi + if [ $current_count -gt $previous_count ]; then + echo "-- File $newfile" + echo "Change increases spelling mistake count (from $previous_count to $current_count)" + any_fails=true + fi + fi + fi + done <<< "$files" + + if [ "$any_fails" == true ]; then + return 1 + fi + return 0 +} + if [ $# -eq 1 ]; then case $1 in help | -help | --help) @@ -30,13 +90,11 @@ if [ $# -eq 1 ]; then exit ;; *) - spell_check $1 + cat "$1" | getAspellOutput ;; esac elif [ $# -eq 0 ]; then - for i in `find -name \*.md`; do - spell_check $i - done + isMistakeCountIncreasedByChanges else usage fi diff --git a/doc.zih.tu-dresden.de/wordlist.aspell b/doc.zih.tu-dresden.de/wordlist.aspell index 04443c556de09d533be9789e6c866c3b667e8be1..4c4eff0d4fe4817a17b36356d408afcfb848a1ca 100644 --- a/doc.zih.tu-dresden.de/wordlist.aspell +++ b/doc.zih.tu-dresden.de/wordlist.aspell @@ -1,166 +1,169 @@ -personal_ws-1.1 en 154 -APIs +personal_ws-1.1 en 1805 Altix +analytics +APIs +BeeGFS +benchmarking +broadwell +bsub +ccNUMA +centauri +citable +conda CPU CPUs CUDA +cuDNN CXFS -DDP -DFG +dask Dask +dataframes DataFrames DataParallel +DDP +DFG DistributedDataParallel DockerHub -ESSL EasyBuild +env +ESSL +fastfs +filesystem +Filesystem +filesystems +Filesystems Flink +foreach Fortran GFLOPS +gfortran +gnuplot +Gnuplot GPU GPUs -Gnuplot -HDFS -HPC +hadoop +haswell Haswell +HDFS Horovod +hostname +HPC +hyperparameter Hyperparameter +hyperparameters +icc +icpc +ifort ImageNet Infiniband +IOPS Itanium +jobqueue +jpg Jupyter JupyterHub JupyterLab Keras -LSF +lapply LoadLeveler +lsf +LSF +Mathematica MEGWARE MIMD +Miniconda MKL MNIST -MPI -Mathematica -Miniconda Montecito +mountpoint +mpi +MPI +mpicc +mpiCC +mpicxx +mpif +mpifort +mpirun +multicore +multithreaded NCCL +Neptun NFS NRINGS NUMA NUMAlink -Neptun NumPy -OPARI +OME OmniOpt +OPARI OpenACC OpenCL OpenMP +openmpi OpenMPI Opteron +overfitting PAPI +parallelization +parallelize +pdf PESSL PGI -PSOCK +pipelining +png PowerAI +ppc +PSOCK +randint +README +Rmpi +rome +romeo RSA RStudio -Rmpi +salloc +Saxonid +sbatch +ScaDS +Scalasca +scancel +Scikit +SciPy +scontrol +scp SGI SHA SHMEM SLES +Slurm SMP SMT +squeue +srun SSD +stderr +stdout SUSE -Saxonid -ScaDS -Scalasca -SciPy -Scikit -Slurm TBB TCP -TFLOPS -TODO TensorBoard TensorFlow +TFLOPS Theano +tmp +todo ToDo +TODO +transferability Trition Vampir -XArray -XGBoost -XLC -XLF -ZIH -analytics -benchmarking -broadwell -bsub -ccNUMA -centauri -citable -conda -cuDNN -dask -dataframes -env -fastfs -foreach -gfortran -gnuplot -hadoop -haswell -hyperparameter -hyperparameters -icc -icpc -ifort -jobqueue -jpg -lapply -lsf -mpi -mpiCC -mpicc -mpicxx -mpif -mpifort -mpirun -multicore -openmpi -overfitting -parallelization -parallelize -pdf -pipelining -png -ppc -randint -rome -romeo -salloc -sbatch -scancel -scontrol -scp -squeue -srun -tmp -transferability vectorization venv virtualenv workspace workspaces -stdout -stderr -multithreaded -hostname -Filesystems -IOPS -OME -README -filesystem -filesystems -todo +XArray +XGBoost +XLC +XLF +ZIH