summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLouie Shprung <lshprung@scu.edu>2023-03-14 14:24:59 -0700
committerLouie Shprung <lshprung@scu.edu>2023-03-14 14:24:59 -0700
commit80dbf5531816e8c56b41bcf895e47d655a87b75a (patch)
treeb428c9fcd293056ada7c164c547b0ac3d9b42d2e /src
parent0569fa14d9bfc07f5fcffebb3c53c3b9aa6a7592 (diff)
Bump default version; restructure source to use pup for html parsing
Diffstat (limited to 'src')
-rwxr-xr-xsrc/index.sh65
1 files changed, 30 insertions, 35 deletions
diff --git a/src/index.sh b/src/index.sh
index 4d4b12b..05c066e 100755
--- a/src/index.sh
+++ b/src/index.sh
@@ -1,29 +1,20 @@
#!/usr/bin/env sh
-DB_PATH="$1"
-shift
+create_table() {
+ sqlite3 "$DB_PATH" "CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);"
+ sqlite3 "$DB_PATH" "CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);"
+}
get_title() {
FILE="$1"
- PATTERN="<title>.*\(Autoconf Archive\).*</title>"
-
- #Find pattern in file
- grep -Eo "$PATTERN" "$FILE" |
- #Remove tag
- sed 's/<[^>]*>//g' | \
- #Remove '(automake)'
+ pup -p -f "$FILE" 'title text{}' | \
sed 's/(Autoconf Archive)//g' | \
- #Remove trailing space
- sed 's/[ ]*$//g' | \
- #Replace '&amp' with '&'
- sed 's/&amp/&/g' | \
- #Replace '&lt;' with '<'
- sed 's/&lt;/</g'
+ sed 's/\"/\"\"/g'
}
get_type() {
- FILE="$(basename $1)"
+ FILE="$(basename "$1")"
MACRO_PATTERN="^ax_"
if echo "$FILE" | grep -q "$MACRO_PATTERN"; then
@@ -39,23 +30,27 @@ insert() {
sqlite3 "$DB_PATH" "INSERT INTO searchIndex(name, type, path) VALUES (\"$NAME\",\"$TYPE\",\"$PAGE_PATH\");"
}
-# Create table
-sqlite3 "$DB_PATH" "CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);"
-sqlite3 "$DB_PATH" "CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);"
-
-# Get title and insert into table for each html file
-while [ -n "$1" ]; do
- unset PAGE_NAME
- unset PAGE_TYPE
- PAGE_NAME="$(get_title "$1")"
- if [ -n "$PAGE_NAME" ]; then
- PAGE_TYPE="$(get_type "$1")"
- #get_type "$1"
- if [ -z "$PAGE_TYPE" ]; then
- PAGE_TYPE="Guide"
+insert_pages() {
+ # Get title and insert into table for each html file
+ while [ -n "$1" ]; do
+ unset PAGE_NAME
+ unset PAGE_TYPE
+ PAGE_NAME="$(get_title "$1")"
+ if [ -n "$PAGE_NAME" ]; then
+ PAGE_TYPE="$(get_type "$1")"
+ #get_type "$1"
+ if [ -z "$PAGE_TYPE" ]; then
+ PAGE_TYPE="Guide"
+ fi
+ #echo "$PAGE_TYPE"
+ insert "$PAGE_NAME" "$PAGE_TYPE" "$(basename "$1")"
fi
- #echo "$PAGE_TYPE"
- insert "$PAGE_NAME" "$PAGE_TYPE" "$(basename "$1")"
- fi
- shift
-done
+ shift
+ done
+}
+
+DB_PATH="$1"
+shift
+
+create_table
+insert_pages "$@"