diff options
author | Louie S <louie@example.com> | 2024-06-19 17:02:00 -0400 |
---|---|---|
committer | Louie S <louie@example.com> | 2024-06-19 17:02:55 -0400 |
commit | b949528c9a8bb9c3c7bd6723bd81d2c917b6d50e (patch) | |
tree | a8286dffa80c26aa5656c47a6d4b6e3498220de1 /src/index-terms.sh | |
parent | ff4a1dd269f2fb81f95d6f07f1a0dca998a8a4ba (diff) |
Update script
Diffstat (limited to 'src/index-terms.sh')
-rwxr-xr-x | src/index-terms.sh | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/src/index-terms.sh b/src/index-terms.sh new file mode 100755 index 0000000..c9344d4 --- /dev/null +++ b/src/index-terms.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env sh + +# shellcheck source=../../../scripts/create_table.sh +. "$(dirname "$0")"/../../../scripts/create_table.sh +# shellcheck source=../../../scripts/insert.sh +. "$(dirname "$0")"/../../../scripts/insert.sh + +TYPE="$1" +shift +DB_PATH="$1" +shift + +insert_index_terms() { + # Get each term from an index page and insert + while [ -n "$1" ]; do + grep -Eo "<a href.*</a>:" "$1" | while read -r line; do + insert_term "$line" + done + + shift + done +} + +insert_term() { + LINK="$1" + NAME="$(echo "$LINK" | pup -p 'a text{}' | sed 's/\"\"//g' | tr -d \\n)" + PAGE_PATH="$(echo "$LINK" | pup -p 'a attr{href}')" + + insert "$DB_PATH" "$NAME" "$TYPE" "$PAGE_PATH" +} + +create_table "$DB_PATH" +insert_index_terms "$@" |