-
-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
check-pr.sh
executable file
Β·222 lines (182 loc) Β· 6.21 KB
/
check-pr.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env bash
# SPDX-License-Identifier: MIT
# shellcheck disable=SC2016,SC2059
# This script is executed by GitHub Actions for every pull request opened.
# It currently accomplishes the following objectives:
#
# 1. Detect pages that were just copied (i.e. cp pages/{common,linux}/7z.md).
# 2. Detect English pages that were added in a platform specific directory although
# they already exist under 'common'.
# 4. Detect translated pages that do not exist as English pages yet.
# 5. Detect outdated pages. A page is marked as outdated when the number of
# commands differ from the number of commands in the English page or the
# contents of the commands differ from the English page.
# 6. Detect other miscellaneous anomalies in the pages folder.
#
# Results are printed to stdout, logs and errors to stderr.
#
# NOTE: must be run from the repository root directory to correctly work!
# NOTE: no `set -e`, failure of this script should not invalidate the build.
# Check for duplicated pages.
function check_duplicates {
local page=$1 # page path in the format 'pages<.language_code>/platform/pagename.md'
local parts
readarray -td'/' parts < <(echo -n "$page")
local language_folder=${parts[0]}
if [[ "$language_folder" != "pages" ]]; then # only check for duplicates in English
return 1
fi
local platform=${parts[1]}
local file=${parts[2]}
case "$platform" in
common) # skip common-platform
;;
*) # check if page already exists under common
if [[ -f "pages/common/$file" ]]; then
printf "\x2d $MSG_EXISTS" "$page" 'common'
fi
;;
esac
}
function check_missing_english_page() {
local page=$1
local english_page="pages/${page#pages*\/}"
if [[ "$page" = "$english_page" ]]; then
return 1
fi
if [[ ! -f "$english_page" ]]; then
printf "\x2d $MSG_NOT_EXISTS" "$page" "$english_page"
fi
}
function count_commands() {
local file="$1"
local regex="$2"
grep -c "$regex" "$file"
}
function strip_commands() {
local file="$1"
local regex="$2"
local stripped_commands=()
mapfile -t stripped_commands < <(
grep "$regex" "$file" |
sed 's/{{[^}]*}}/{{}}/g' |
sed 's/<[^>]*>//g' |
sed 's/([^)]*)//g' |
sed 's/"[^"]*"/""/g' |
sed "s/'[^']*'//g" |
sed 's/`//g'
)
printf "%s\n" "${stripped_commands[*]}"
}
function check_outdated_page() {
local page=$1
local english_page="pages/${page#pages*\/}"
local command_regex='^`[^`]\+`$'
if [[ "$page" = "$english_page" ]] || [[ ! -f "$english_page" ]]; then
return 1
fi
local english_commands
english_commands=$(count_commands "$english_page" "$command_regex")
local commands
commands=$(count_commands "$page" "$command_regex")
local english_commands_as_string
english_commands_as_string=$(strip_commands "$english_page" "$command_regex")
local commands_as_string
commands_as_string=$(strip_commands "$page" "$command_regex")
if [[ "$english_commands" != "$commands" ]]; then
printf "\x2d $MSG_OUTDATED" "$page" "based on number of commands"
elif [[ "$english_commands_as_string" != "$commands_as_string" ]]; then
printf "\x2d $MSG_OUTDATED" "$page" "based on the command contents itself"
fi
}
function check_more_info_link() {
local page=$1
if grep "$page" "more-info-links.txt" > /dev/null; then
printf "\x2d $MSG_MORE_INFO" "$page"
fi
}
function check_page_title() {
local page=$1
if grep "$page" "page-titles.txt" > /dev/null; then
printf "\x2d $MSG_PAGE_TITLE" "$page"
fi
}
# Look at git diff and check for copied/duplicated pages.
function check_diff {
local git_diff
local line
local entry
git_diff=$(git diff --name-status --find-copies-harder --diff-filter=ACM origin/main -- pages*/)
if [[ -n $git_diff ]]; then
echo -e "Check PR: git diff:\n$git_diff" >&2
else
echo 'Check PR: git diff looks fine, no interesting changes detected.' >&2
return 0
fi
python3 scripts/set-more-info-link.py -Sn > more-info-links.txt
python3 scripts/set-page-title.py -Sn > page-titles.txt
while read -r line; do
readarray -td$'\t' entry < <(echo -n "$line")
local change="${entry[0]}"
local file1="${entry[1]}"
local file2="${entry[2]}"
case "$change" in
C*) # file2 is a copy of file1
local percentage=${change#C}
percentage=${percentage#0}
percentage=${percentage#0}
printf "\x2d $MSG_IS_COPY" "$file2" "$file1" "$percentage"
;;
A) # file1 was newly added
check_duplicates "$file1"
check_missing_english_page "$file1"
check_outdated_page "$file1"
check_more_info_link "$file1"
check_page_title "$file1"
;;
M) # file1 was modified
check_missing_english_page "$file1"
check_outdated_page "$file1"
check_more_info_link "$file1"
check_page_title "$file1"
;;
esac
done <<< "$git_diff"
rm more-info-links.txt page-titles.txt
}
# Recursively check the pages/ folder for anomalies.
function check_structure {
for platform in $PLATFORMS; do
if [[ ! -d "pages/$platform" ]]; then
printf "\x2d $MSG_NOT_DIR" "pages/$platform"
else
for page in "pages/$platform"/*; do
if [[ ! -f $page ]]; then
printf "\x2d $MSG_NOT_FILE" "$page"
elif [[ ${page:(-3)} != ".md" ]]; then
printf "\x2d $MSG_NOT_MD" "$page"
fi
done
fi
done
}
###################################
# MAIN
###################################
MSG_EXISTS='The page `%s` already exists in the `%s` directory.\n'
MSG_NOT_EXISTS='The page `%s` does not exists as English page `%s` yet.\n'
MSG_OUTDATED='The page `%s` is outdated, %s.\n'
MSG_IS_COPY='The page `%s` seems to be a copy of `%s` (%d%% matching).\n'
MSG_NOT_DIR='The file `%s` does not look like a directory.\n'
MSG_NOT_FILE='The file `%s` does not look like a regular file.\n'
MSG_NOT_MD='The file `%s` does not have a `.md` extension.\n'
MSG_MORE_INFO='The page `%s` has an outdated more info link.\n'
MSG_PAGE_TITLE='The page `%s` has an outdated page title.\n'
PLATFORMS=$(ls pages/)
if [[ $CI == true && $GITHUB_REPOSITORY == "tldr-pages/tldr" && $PULL_REQUEST_ID != "" ]]; then
check_diff
check_structure
else
echo 'Not a pull request, refusing to run.' >&2
exit 0
fi