diff options
author | Louie Shprung <lshprung@scu.edu> | 2023-03-14 14:24:59 -0700 |
---|---|---|
committer | Louie Shprung <lshprung@scu.edu> | 2023-03-14 14:24:59 -0700 |
commit | 80dbf5531816e8c56b41bcf895e47d655a87b75a (patch) | |
tree | b428c9fcd293056ada7c164c547b0ac3d9b42d2e /src | |
parent | 0569fa14d9bfc07f5fcffebb3c53c3b9aa6a7592 (diff) |
Bump default version; restructure source to use pup for html parsing
Diffstat (limited to 'src')
-rwxr-xr-x | src/index.sh | 65 |
1 files changed, 30 insertions, 35 deletions
diff --git a/src/index.sh b/src/index.sh index 4d4b12b..05c066e 100755 --- a/src/index.sh +++ b/src/index.sh @@ -1,29 +1,20 @@ #!/usr/bin/env sh -DB_PATH="$1" -shift +create_table() { + sqlite3 "$DB_PATH" "CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);" + sqlite3 "$DB_PATH" "CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);" +} get_title() { FILE="$1" - PATTERN="<title>.*\(Autoconf Archive\).*</title>" - - #Find pattern in file - grep -Eo "$PATTERN" "$FILE" | - #Remove tag - sed 's/<[^>]*>//g' | \ - #Remove '(automake)' + pup -p -f "$FILE" 'title text{}' | \ sed 's/(Autoconf Archive)//g' | \ - #Remove trailing space - sed 's/[ ]*$//g' | \ - #Replace '&' with '&' - sed 's/&/&/g' | \ - #Replace '<' with '<' - sed 's/</</g' + sed 's/\"/\"\"/g' } get_type() { - FILE="$(basename $1)" + FILE="$(basename "$1")" MACRO_PATTERN="^ax_" if echo "$FILE" | grep -q "$MACRO_PATTERN"; then @@ -39,23 +30,27 @@ insert() { sqlite3 "$DB_PATH" "INSERT INTO searchIndex(name, type, path) VALUES (\"$NAME\",\"$TYPE\",\"$PAGE_PATH\");" } -# Create table -sqlite3 "$DB_PATH" "CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);" -sqlite3 "$DB_PATH" "CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);" - -# Get title and insert into table for each html file -while [ -n "$1" ]; do - unset PAGE_NAME - unset PAGE_TYPE - PAGE_NAME="$(get_title "$1")" - if [ -n "$PAGE_NAME" ]; then - PAGE_TYPE="$(get_type "$1")" - #get_type "$1" - if [ -z "$PAGE_TYPE" ]; then - PAGE_TYPE="Guide" +insert_pages() { + # Get title and insert into table for each html file + while [ -n "$1" ]; do + unset PAGE_NAME + unset PAGE_TYPE + PAGE_NAME="$(get_title "$1")" + if [ -n "$PAGE_NAME" ]; then + PAGE_TYPE="$(get_type "$1")" + #get_type "$1" + if [ -z "$PAGE_TYPE" ]; then + PAGE_TYPE="Guide" + fi + #echo "$PAGE_TYPE" + insert "$PAGE_NAME" "$PAGE_TYPE" "$(basename "$1")" fi - #echo "$PAGE_TYPE" - insert "$PAGE_NAME" "$PAGE_TYPE" "$(basename "$1")" - fi - shift -done + shift + done +} + +DB_PATH="$1" +shift + +create_table +insert_pages "$@" |