diff options
author | Louie Shprung <lshprung@scu.edu> | 2022-11-22 21:45:29 -0800 |
---|---|---|
committer | Louie Shprung <lshprung@scu.edu> | 2022-11-22 21:45:29 -0800 |
commit | 20aa42ad2c7740d64247e900e02638317b650209 (patch) | |
tree | ada1ba445a2eefcafcec49c47a148cd6adca1980 /src/index.sh | |
parent | fd69ee62aba50908a3aee851ec16753cbd99e48d (diff) |
Rewrite in POSIX shell
Diffstat (limited to 'src/index.sh')
-rwxr-xr-x | src/index.sh | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/src/index.sh b/src/index.sh new file mode 100755 index 0000000..e81a0f7 --- /dev/null +++ b/src/index.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env sh + +DB_PATH="$1" +shift + +get_title() { + FILE="$1" + + PATTERN="<title>.*\(Autoconf\).*</title>" + + #Find pattern in file + grep -Eo "$PATTERN" "$FILE" | + #Remove tag + sed 's/<[^>]*>//g' | \ + #Remove '(automake)' + sed 's/(Autoconf)//g' | \ + #Remove trailing space + sed 's/[ ]*$//g' | \ + #Replace '&' with '&' + sed 's/&/&/g' | \ + #Replace '<' with '<' + sed 's/</</g' +} + +get_type() { + FILE="$1" + PATTERN="The node you are looking for is at.*Limitations-of-.*\.html;Builtin + The node you are looking for is at;Macro" + + echo "$PATTERN" | while read -r line; do + #echo "$line" + if grep -Eq "$(echo "$line" | cut -d ';' -f 1)" "$FILE"; then + echo "$line" | cut -d ';' -f 2 + break + fi + done +} + +insert() { + NAME="$1" + TYPE="$2" + PAGE_PATH="$3" + + sqlite3 "$DB_PATH" "INSERT INTO searchIndex(name, type, path) VALUES (\"$NAME\",\"$TYPE\",\"$PAGE_PATH\");" +} + +# Create table +sqlite3 "$DB_PATH" "CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);" +sqlite3 "$DB_PATH" "CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);" + +# Get title and insert into table for each html file +while [ -n "$1" ]; do + unset PAGE_NAME + unset PAGE_TYPE + PAGE_NAME="$(get_title "$1")" + if [ -n "$PAGE_NAME" ]; then + PAGE_TYPE="$(get_type "$1")" + #get_type "$1" + if [ -z "$PAGE_TYPE" ]; then + PAGE_TYPE="Guide" + fi + #echo "$PAGE_TYPE" + insert "$PAGE_NAME" "$PAGE_TYPE" "$(basename "$1")" + fi + shift +done |