summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLouie Shprung <lshprung@scu.edu>2022-11-22 21:45:29 -0800
committerLouie Shprung <lshprung@scu.edu>2022-11-22 21:45:29 -0800
commit20aa42ad2c7740d64247e900e02638317b650209 (patch)
treeada1ba445a2eefcafcec49c47a148cd6adca1980 /src
parentfd69ee62aba50908a3aee851ec16753cbd99e48d (diff)
Rewrite in POSIX shell
Diffstat (limited to 'src')
-rwxr-xr-xsrc/index.rb40
-rwxr-xr-xsrc/index.sh66
2 files changed, 66 insertions, 40 deletions
diff --git a/src/index.rb b/src/index.rb
deleted file mode 100755
index 81b042a..0000000
--- a/src/index.rb
+++ /dev/null
@@ -1,40 +0,0 @@
-require 'pathname'
-
-puts %Q[
- CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);
- CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);
-]
-
-INSERT_SQL = %Q[
- INSERT INTO searchIndex(name, type, path) VALUES ('%s','%s','%s');
-]
-
-PATTERN = %r[<title>(.*)\(Autoconf\)(.*)</title>]
-
-BUILTIN_PATTERN = /The node you are looking for is at.*Limitations-of-.*\.html/
-MACRO_PATTERN = /The node you are looking for is at/
-
-def quote(s)
- s.gsub(/&amp;/, '&').gsub(/'/, "\\'").gsub(/&lt;/, '<')
-end
-
-ARGV.each do |arg|
- Pathname.glob(arg) do |path|
- macro_match = path.each_line.lazy.map { |line| MACRO_PATTERN.match(line) }.find { |m| m }
- builtin_match = path.each_line.lazy.map { |line| BUILTIN_PATTERN.match(line) }.find { |m| m }
- if builtin_match
- type = "Builtin"
- elsif macro_match
- type = "Macro"
- else
- type = "Guide"
- end
-
- match = path.each_line.lazy.map { |line| PATTERN.match(line) }.find { |m| m }
- if match
- printf INSERT_SQL, quote(match[1]), type, path.basename
- else
- $stderr.puts "%{path.basename}: no title found"
- end
- end
-end
diff --git a/src/index.sh b/src/index.sh
new file mode 100755
index 0000000..e81a0f7
--- /dev/null
+++ b/src/index.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env sh
+
+DB_PATH="$1"
+shift
+
+get_title() {
+ FILE="$1"
+
+ PATTERN="<title>.*\(Autoconf\).*</title>"
+
+ #Find pattern in file
+ grep -Eo "$PATTERN" "$FILE" |
+ #Remove tag
+ sed 's/<[^>]*>//g' | \
+ #Remove '(automake)'
+ sed 's/(Autoconf)//g' | \
+ #Remove trailing space
+ sed 's/[ ]*$//g' | \
+ #Replace '&amp' with '&'
+ sed 's/&amp/&/g' | \
+ #Replace '&lt;' with '<'
+ sed 's/&lt;/</g'
+}
+
+get_type() {
+ FILE="$1"
+ PATTERN="The node you are looking for is at.*Limitations-of-.*\.html;Builtin
+ The node you are looking for is at;Macro"
+
+ echo "$PATTERN" | while read -r line; do
+ #echo "$line"
+ if grep -Eq "$(echo "$line" | cut -d ';' -f 1)" "$FILE"; then
+ echo "$line" | cut -d ';' -f 2
+ break
+ fi
+ done
+}
+
+insert() {
+ NAME="$1"
+ TYPE="$2"
+ PAGE_PATH="$3"
+
+ sqlite3 "$DB_PATH" "INSERT INTO searchIndex(name, type, path) VALUES (\"$NAME\",\"$TYPE\",\"$PAGE_PATH\");"
+}
+
+# Create table
+sqlite3 "$DB_PATH" "CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);"
+sqlite3 "$DB_PATH" "CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);"
+
+# Get title and insert into table for each html file
+while [ -n "$1" ]; do
+ unset PAGE_NAME
+ unset PAGE_TYPE
+ PAGE_NAME="$(get_title "$1")"
+ if [ -n "$PAGE_NAME" ]; then
+ PAGE_TYPE="$(get_type "$1")"
+ #get_type "$1"
+ if [ -z "$PAGE_TYPE" ]; then
+ PAGE_TYPE="Guide"
+ fi
+ #echo "$PAGE_TYPE"
+ insert "$PAGE_NAME" "$PAGE_TYPE" "$(basename "$1")"
+ fi
+ shift
+done