blob: 819f228b073dc816655c50a6adc73437a0d2ee54 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
#!/usr/bin/env sh
# Remove duplicate Macros
clean_table() {
sqlite3 "$DB_PATH" "DELETE FROM searchIndex WHERE EXISTS (SELECT 1 FROM searchIndex s2 WHERE searchIndex.name = s2.name AND searchIndex.type = s2.type AND searchIndex.type = \"Macro\" AND searchIndex.rowid > s2.rowid)"
}
create_table() {
sqlite3 "$DB_PATH" "CREATE TABLE IF NOT EXISTS searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);"
sqlite3 "$DB_PATH" "CREATE UNIQUE INDEX IF NOT EXISTS anchor ON searchIndex (name, type, path);"
}
get_title() {
FILE="$1"
pup -p -f "$FILE" 'title text{}' | \
sed 's/ (Autoconf)//g' | \
sed 's/\"/\"\"/g'
}
get_type() {
FILE="$1"
PATTERN="The node you are looking for is at.*Limitations-of-.*\.html;Builtin
The node you are looking for is at;Macro"
echo "$PATTERN" | while read -r line; do
#echo "$line"
if grep -Eq "$(echo "$line" | cut -d ';' -f 1)" "$FILE"; then
echo "$line" | cut -d ';' -f 2
break
fi
done
}
insert() {
NAME="$1"
TYPE="$2"
PAGE_PATH="$3"
sqlite3 "$DB_PATH" "INSERT INTO searchIndex(name, type, path) VALUES (\"$NAME\",\"$TYPE\",\"$PAGE_PATH\");"
}
insert_index_terms() {
# Get each term from an index page and insert
while [ -n "$1" ]; do
grep -Eo "<a href.*></a>" "$1" | while read -r line; do
insert_term "$line"
done
shift
done
}
insert_pages() {
# Get title and insert into table for each html file
while [ -n "$1" ]; do
unset PAGE_NAME
unset PAGE_TYPE
PAGE_NAME="$(get_title "$1")"
if [ -n "$PAGE_NAME" ]; then
PAGE_TYPE="$(get_type "$1")"
#get_type "$1"
if [ -z "$PAGE_TYPE" ]; then
PAGE_TYPE="Guide"
fi
#echo "$PAGE_TYPE"
insert "$PAGE_NAME" "$PAGE_TYPE" "$(basename "$1")"
fi
shift
done
}
insert_term() {
LINK="$1"
NAME="$(echo "$LINK" | pup -p 'a text{}' | sed 's/"/\"\"/g' | tr -d \\n)"
TYPE="$INDEX_TYPE"
PAGE_PATH="$(echo "$LINK" | pup -p 'a attr{href}')"
insert "$NAME" "$TYPE" "$PAGE_PATH"
}
TYPE="PAGES"
# Check flags
while true; do
case "$1" in
-i|--index)
TYPE="INDEX"
shift
INDEX_TYPE="$1"
shift
;;
*)
break
esac
done
DB_PATH="$1"
shift
create_table
case "$TYPE" in
PAGES)
insert_pages "$@"
;;
INDEX)
insert_index_terms "$@"
clean_table
;;
esac
|