Remove reference to dwarf2_per_cu_data::dwarf2_per_objfile in queue_and_load_all_dwo_tus
[deliverable/binutils-gdb.git] / gdb / contrib / words.sh
CommitLineData
496af5c8
TV
1#!/bin/sh
2
b811d2c2 3# Copyright (C) 2019-2020 Free Software Foundation, Inc.
496af5c8
TV
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
3cf2f237 17# This script intends to facilitate spell checking of source/doc files.
496af5c8 18# It:
3cf2f237 19# - transforms the files into a list of lowercase words
496af5c8
TV
20# - prefixes each word with the frequency
21# - filters out words within a frequency range
22# - sorts the words, longest first
23#
3cf2f237
TV
24# If '-c' is passed as option, it operates on the C comments only, rather than
25# on the entire file.
26#
496af5c8
TV
27# For:
28# ...
3cf2f237
TV
29# $ files=$(find gdb -type f -name "*.c" -o -name "*.h")
30# $ ./gdb/contrib/words.sh -c $files
496af5c8
TV
31# ...
32# it generates a list of ~15000 words prefixed with frequency.
33#
34# This could be used to generate a dictionary that is kept as part of the
35# sources, against which new code can be checked, generating a warning or
36# error. The hope is that misspellings would trigger this frequently, and rare
37# words rarely, otherwise the burden of updating the dictionary would be too
38# much.
39#
40# And for:
41# ...
3cf2f237
TV
42# $ files=$(find gdb -type f -name "*.c" -o -name "*.h")
43# $ ./gdb/contrib/words.sh -c -f 1 $files
496af5c8
TV
44# ...
45# it generates a list of ~5000 words with frequency 1.
46#
47# This can be used to scan for misspellings manually.
48#
49
50minfreq=
51maxfreq=
3cf2f237 52c=false
496af5c8
TV
53while [ $# -gt 0 ]; do
54 case "$1" in
3cf2f237
TV
55 -c)
56 c=true
57 shift
58 ;;
496af5c8
TV
59 --freq|-f)
60 minfreq=$2
61 maxfreq=$2
62 shift 2
63 ;;
64 --min)
65 minfreq=$2
66 if [ "$maxfreq" = "" ]; then
67 maxfreq=0
68 fi
69 shift 2
70 ;;
71 --max)
72 maxfreq=$2
73 if [ "$minfreq" = "" ]; then
74 minfreq=0
75 fi
76 shift 2
77 ;;
78 *)
79 break;
80 ;;
81 esac
82done
83
84if [ "$minfreq" = "" ] && [ "$maxfreq" = "" ]; then
85 minfreq=0
86 maxfreq=0
87fi
88
89awkfile=$(mktemp)
90trap 'rm -f "$awkfile"' EXIT
91
92cat > "$awkfile" <<EOF
93BEGIN {
94 in_comment=0
95}
96
97// {
98 line=\$0
99}
100
101/\/\*/ {
102 in_comment=1
103 sub(/.*\/\*/, "", line)
104}
105
106/\*\// {
107 sub(/\*\/.*/, "", line)
108 in_comment=0
109 print line
110 next
111}
112
113// {
114 if (in_comment) {
115 print line
116 }
117}
118EOF
119
120# Stabilize sort.
121export LC_ALL=C
122
3cf2f237
TV
123if $c; then
124 awk \
125 -f "$awkfile" \
126 -- "$@"
127else
128 cat "$@"
129fi \
f6180073 130 | sed \
85e7588d 131 -e 's/[!"?;:%^$~#{}`&=@,. \t\/_()|<>\+\*-]/\n/g' \
f6180073
TV
132 -e 's/\[/\n/g' \
133 -e 's/\]/\n/g' \
85e7588d 134 -e "s/'/\n/g" \
f6180073
TV
135 -e 's/[0-9][0-9]*/\n/g' \
136 -e 's/[ \t]*//g' \
496af5c8 137 | tr '[:upper:]' '[:lower:]' \
496af5c8
TV
138 | sort \
139 | uniq -c \
140 | awk "{ if (($minfreq == 0 || $minfreq <= \$1) \
141 && ($maxfreq == 0 || \$1 <= $maxfreq)) { print \$0; } }" \
142 | awk '{ print length($0) " " $0; }' \
143 | sort -n -r \
144 | cut -d ' ' -f 2-
This page took 0.08013 seconds and 4 git commands to generate.