#!/bin/bash # Copyright (C) 2024 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Script to auto-correct common spelling mistakes. # # Example usage: # $ ./gdb/contrib/spellcheck.sh gdb* scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P) this_script=$scriptdir/$(basename "$0") url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines cache_dir=$scriptdir/../../.git cache_file=wikipedia-common-misspellings.txt dictionary=$cache_dir/$cache_file local_dictionary=$scriptdir/common-misspellings.txt cache_file2=spell-check.pat1 bash_version_at_least () { local major major="$1" local minor minor="$2" if [ "$bash_major" = "" ]; then bash_major=$(echo "$BASH_VERSION" | awk -F '.' '{print $1}') bash_minor=$(echo "$BASH_VERSION" | awk -F '.' '{print $2}') fi if [ "$bash_major" -lt "$major" ]; then # Major version less then required, return false. return 1 fi if [ "$bash_major" -gt "$major" ]; then # Major version more then required, return true. return 0 fi # Check minor version. [ "$bash_minor" -ge "$minor" ] } # Separators: space, slash, tab, colon, comma. declare -a grep_separators grep_separators=( " " "/" " " ":" "," "\"" ) declare -a sed_separators sed_separators=( " " "/" "\t" ":" "," "\"" ) # Pre: start of line, left parenthesis. declare -a grep_pre grep_pre=( "^" "\(" ) declare -a sed_pre sed_pre=( "^" "(" ) # Post: dot, right parenthesis, end of line. declare -a grep_post grep_post=( "\." "\)" "$" ) declare -a sed_post sed_post=( "\." ")" "$" ) join () { local or or="$1" shift local res res="" local first first=true for item in "$@"; do if $first; then first=false res="$item" else res="$res$or$item" fi done echo "$res" } grep_or="|" sed_or="\|" grep_join () { local res res=$(join $grep_or "$@") echo "($res)" } sed_join () { local res res=$(join $sed_or "$@") echo "\($res\)" } usage () { echo "usage: $(basename "$0") [--check] +" echo " $(basename "$0") --print-dictionary" } make_absolute () { local arg arg="$1" case "$arg" in /*) ;; *) arg=$(pwd -P)/"$arg" ;; esac echo "$arg" } parse_args () { local files files=$(mktemp) trap 'rm -f "$files"' EXIT if [ $# -eq 1 ] && [ "$1" = "--print-dictionary" ]; then print_dictionary=true return fi while true; do case " $1 " in " --check ") check=true shift ;; *) break ;; esac done if [ $# -eq -0 ]; then usage exit 1 fi local arg for arg in "$@"; do if [ -f "$arg" ]; then arg=$(make_absolute "$arg") readlink -e "$arg" \ >> "$files" elif [ -d "$arg" ]; then arg=$(make_absolute "$arg") local f find "$arg" -type f -exec readlink -e {} \; \ >> "$files" else echo "Not a file or directory: $arg" exit 1 fi done mapfile -t unique_files \ < <(sort -u "$files" \ | grep -v ChangeLog) rm -f "$files" trap "" EXIT } get_dictionary () { if [ -f "$dictionary" ]; then return fi local webpage webpage=$(mktemp) trap 'rm -f "$webpage"' EXIT # Download web page containing table. wget $url -O "$webpage" # Extract table from web page. awk '/
/,/<\/pre>/' "$webpage" \
	| sed 's/
//;s/<\/pre>//' \
	| grep -E -v "^$" \
	       > "$dictionary"

    rm -f "$webpage"
    trap "" EXIT
}

output_local_dictionary ()
{
    # Filter out comments and empty lines.
    grep -E -v \
	 "^#|^$" \
	 "$local_dictionary"
}

output_dictionaries ()
{
    (
	output_local_dictionary
	cat "$dictionary"
    ) | grep -E -v "[A-Z]"
}

parse_dictionary ()
{
    # Parse dictionary.
    mapfile -t words \
	    < <(awk -F '->' '{print $1}' <(output_dictionaries))
    mapfile -t replacements \
	    < <(awk -F '->' '{print $2}' <(output_dictionaries))

    local words_done
    declare -A words_done
    local i word replacement
    i=0
    for word in "${words[@]}"; do
	replacement=${replacements[i]}

	# Skip words that are already handled.  This ensures that the local
	# dictionary overrides the wiki dictionary.
	if [ "${words_done[$word]}" == 1 ]; then
	    words[i]=""
	    replacements[i]=""
	    i=$((i + 1))
	    continue
	fi
	words_done[$word]=1

	# Skip identity rules.
	if [ "$word" = "$replacement" ]; then
	    words[i]=""
	    replacements[i]=""
	fi

	i=$((i + 1))
    done
}

print_dictionary ()
{
    local i word replacement
    i=0
    for word in "${words[@]}"; do
	replacement=${replacements[i]}
	i=$((i + 1))

	if [ "$word" == "" ]; then
	    continue
	fi

	echo "$word -> $replacement"
    done
}

find_files_matching_words ()
{
    local cache_id
    cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \
		 | md5sum  \
		 | awk '{print $1}')

    local patfile
    patfile="$cache_dir/$cache_file2".$cache_id

    local pat
    if [ -f "$patfile" ]; then
	pat=$(cat "$patfile")
    else
	rm -f "$cache_dir/$cache_file2".*

	declare -a re_words
	mapfile -t re_words \
		< <(for f in "${words[@]}"; do
			if [ "$f" = "" ]; then
			    continue
			fi
			echo "$f"
		    done \
			| sed "s/^\(.\)/[\u\1\1]/")

	pat=$(grep_join "${re_words[@]}")

	local before after
	before=$(grep_join \
		     "${grep_pre[@]}" \
		     "${grep_separators[@]}")
	after=$(grep_join \
		    "${grep_separators[@]}" \
		    "${grep_post[@]}")

	pat="$before$pat$after"

	echo "$pat" \
	     > "$patfile"
    fi

    grep -E \
	-l \
	"$pat" \
	"$@"
}

find_files_matching_word ()
{
    local pat
    pat="$1"
    shift

    local before after
    before=$(grep_join \
		 "${grep_pre[@]}" \
		 "${grep_separators[@]}")
    after=$(grep_join \
		"${grep_separators[@]}" \
		"${grep_post[@]}")

    if bash_version_at_least 5 1; then
	patc=${pat@u}
    else
	# shellcheck disable=SC2001
	patc=$(echo "$pat" | sed 's/^\(.\)/\u\1/')
    fi
    pat="($patc|$pat)"

    pat="$before$pat$after"

    grep -E \
	-l \
	"$pat" \
	"$@"
}

replace_word_in_file ()
{
    local word
    word="$1"

    local replacement
    replacement="$2"

    local file
    file="$3"

    local before after
    before=$(sed_join \
		 "${sed_pre[@]}" \
		 "${sed_separators[@]}")
    after=$(sed_join \
		"${sed_separators[@]}" \
		"${sed_post[@]}")

    if bash_version_at_least 5 1; then
	wordc=${word@u}
	replacementc=${replacement@u}
    else
	# shellcheck disable=SC2001
	wordc=$(echo "$word" | sed 's/^\(.\)/\u\1/')
	# shellcheck disable=SC2001
	replacementc=$(echo "$replacement" | sed 's/^\(.\)/\u\1/')
    fi

    local repl1
    local repl2
    repl1="s%$before$word$after%\1$replacement\2%g"
    repl2="s%$before$wordc$after%\1$replacementc\2%g"

    sed -i \
	"$repl1;$repl2" \
	"$file"
}

replace_word_in_files ()
{
    local word
    word="$1"

    local replacement
    replacement="$2"

    shift 2

    local id
    id="$word -> $replacement"

    # Reduce set of files for sed to operate on.
    local files_matching_word
    declare -a files_matching_word
    mapfile -t files_matching_word \
	    < <(find_files_matching_word "$word" "$@")

    if [ ${#files_matching_word[@]} -eq 0 ]; then
	return
    fi

    if echo "$replacement"| grep -q ","; then
	echo "TODO: $id"
	return
    fi

    declare -A md5sums

    local changed f before after
    changed=false
    for f in "${files_matching_word[@]}"; do
	if [ "${md5sums[$f]}" = "" ]; then
	    md5sums[$f]=$(md5sum "$f")
	fi

	before="${md5sums[$f]}"

	replace_word_in_file \
	    "$word" \
	    "$replacement" \
	    "$f"

	after=$(md5sum "$f")

	if [ "$after" != "$before" ]; then
	    md5sums[$f]="$after"
	    changed=true
	fi
    done

    if $changed; then
	echo "$id"
    fi

    find_files_matching_word "$word" "${files_matching_word[@]}" \
	| awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
}

main ()
{
    declare -a unique_files
    check=false
    print_dictionary=false
    parse_args "$@"

    get_dictionary

    declare -a words
    declare -a replacements
    parse_dictionary

    if $print_dictionary; then
	print_dictionary
	exit 0
    fi

    # Reduce set of files for sed to operate on.
    local files_matching_words
    declare -a files_matching_words
    mapfile -t files_matching_words \
	    < <(find_files_matching_words "${unique_files[@]}")

    if [ ${#files_matching_words[@]} -eq 0 ]; then
	return
    fi

    if $check; then
	exit 1
    fi

    local i word replacement
    i=0
    for word in "${words[@]}"; do
	replacement=${replacements[i]}
	i=$((i + 1))

	if [ "$word" = "" ]; then
	    continue
	fi

	replace_word_in_files \
	    "$word" \
	    "$replacement" \
	    "${files_matching_words[@]}"
    done
}

main "$@"