diff options
author | Louie Shprung <lshprung@tutanota.com> | 2024-07-18 18:32:20 -0400 |
---|---|---|
committer | Louie Shprung <lshprung@tutanota.com> | 2024-07-18 18:32:20 -0400 |
commit | 7210a1a695d59d928fc32079d09c1d965cf6e9ad (patch) | |
tree | 5e1b86ec0b8170dfad394025b436cae7186584f0 /src/scripts/gnu/index-terms-class.sh | |
parent | 95d98c95e84891b2809ce88bb6640e6881357451 (diff) |
Diffstat (limited to 'src/scripts/gnu/index-terms-class.sh')
-rwxr-xr-x | src/scripts/gnu/index-terms-class.sh | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/src/scripts/gnu/index-terms-class.sh b/src/scripts/gnu/index-terms-class.sh new file mode 100755 index 0000000..dc14178 --- /dev/null +++ b/src/scripts/gnu/index-terms-class.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env sh + +# shellcheck source=../create_table.sh +. "$(dirname "$0")"/../create_table.sh +# shellcheck source=../insert.sh +. "$(dirname "$0")"/../insert.sh + +TYPE="$1" +shift +INDEX_ENTRY_CLASS="$1" +shift +DB_PATH="$1" +shift + +insert_index_terms() { + # Get each term from an index page and insert + while [ -n "$1" ]; do + grep -Eo "class=${INDEX_ENTRY_CLASS}.*<a href.*</a>" "$1" | while read -r line; do + insert_term "$line" + done + + shift + done +} + +insert_term() { + LINK="$1" + NAME="$(echo "$LINK" | pup -p 'a text{}' | sed 's/"/\"\"/g' | tr -d \\n)" + PAGE_PATH="$(echo "$LINK" | pup -p 'a attr{href}')" + + insert "$DB_PATH" "$NAME" "$TYPE" "$PAGE_PATH" +} + +create_table "$DB_PATH" +insert_index_terms "$@" |