From 9ae383cdf96656e03fdfdfe4d38ba6ee40c4512d Mon Sep 17 00:00:00 2001 From: epoch Date: Fri, 21 Jan 2022 19:35:42 +0000 Subject: removed the share/urn stuff. that lives in a repo named "data". uri title now checks for data files. urititle does a unshorten attempt first. --- .gitignore | 1 + Makefile | 1 - share/urn/fcc:frs | 22 ---------------------- urititle | 54 ++++++++++++++++++++++++++++++++++-------------------- urnresolve | 10 +++++----- 5 files changed, 40 insertions(+), 48 deletions(-) delete mode 100644 share/urn/fcc:frs diff --git a/.gitignore b/.gitignore index 6879c00..b48bde2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ uriunescape urijoin shorten urimatchpairs +sqesc diff --git a/Makefile b/Makefile index 5d74d6c..6683bab 100644 --- a/Makefile +++ b/Makefile @@ -51,5 +51,4 @@ install: all install -t $(PREFIX)/bin shorten install -t $(PREFIX)/bin urnresolve install -t $(PREFIX)/bin urnstart - install -Dt $(PREFIX)/share/urn share/urn/* chgrp shorten $(PREFIX)/bin/shorten && chmod g+s $(PREFIX)/bin/shorten diff --git a/share/urn/fcc:frs b/share/urn/fcc:frs deleted file mode 100644 index bee917a..0000000 --- a/share/urn/fcc:frs +++ /dev/null @@ -1,22 +0,0 @@ -1 462.5625 -2 462.5875 -3 462.6125 -4 462.6375 -5 462.6625 -6 462.6875 -7 462.7125 -8 467.5625 -9 467.5875 -10 467.6125 -11 467.6375 -12 467.6625 -13 467.6875 -14 467.7125 -15 462.5500 -16 462.5750 -17 462.6000 -18 462.6250 -19 462.6500 -20 462.6750 -21 462.7000 -22 462.7250 diff --git a/urititle b/urititle index efac514..f687bd0 100755 --- a/urititle +++ b/urititle @@ -1,9 +1,23 @@ #!/usr/bin/env bash -scheme=$(printf "%s\n" "$1" | uricut -s) -path=$(printf "%s\n" "$1" | uricut -p) -qs=$(printf "%s\n" "$1" | uricut -q) -domain=$(printf "%s\n" "$1" | uricut -d) -port=$(printf "%s\n" "$1" | uricut -P) +uri="$1" + +scheme=$(printf "%s\n" "$uri" | uricut -s) +path=$(printf "%s\n" "$uri" | uricut -p) +qs=$(printf "%s\n" "$uri" | uricut -q) +domain=$(printf "%s\n" "$uri" | uricut -d) +port=$(printf "%s\n" "$uri" | uricut -P) +if [ "$scheme" = "http" -o "$scheme" = "https" ];then + ## dereference ONLY ONCE. give up after that. + newuri="$(unshorten.sh "$uri")" + if [ "$newuri" ];then + uri="$newuri" + fi +# newuri="$(unshorten.sh "$uri")" +# if [ "$newuri" ];then +# uri="$newuri" +# fi +fi + if [ ! "$port" ];then if [ "$scheme" = "https" ];then port=443 @@ -21,7 +35,7 @@ http*) if [ "$port" ];then UA="Mozilla/5.0 (impersonator)" # content_type="$(printf "HEAD %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | grep -i '^Content-Type: ' | head -n1 | cut '-d ' -f2 | cut '-d;' -f1 | tr -d '\r\n')" - a_header="$((curl -gA "$UA" -Lsi "$1" || echo curl failed) | head -c 10000 | egrep -ai '^curl failed|^Location: |^Content-Type: ' | head -n1 | tr -d '\r\n')" + a_header="$((curl -gA "$UA" -Lsi "$uri" || echo curl failed) | head -c 10000 | egrep -ai '^curl failed|^Location: |^Content-Type: ' | head -n1 | tr -d '\r\n')" if printf "%s\n" "${a_header}" | grep -i '^Content-Type: ' 2>&1 >/dev/null 2>&1;then content_type="$(printf '%s\n' "${a_header}" | cut '-d ' -f2- | cut '-d;' -f1)" fi @@ -34,7 +48,7 @@ http*) ### main.lv doesn't have content-type on some pages, so if the content-type is missing or empty, we're assuming html if [ "${content_type}" = "text/html" -o "${content_type}" = "application/xhtml+xml" -o "${content_type}" = "" ];then # title="$(printf "GET %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | head -c 10000 | tr -d '\n' | tr '<' '\n' | grep -A 10 '^title>' | grep -B 10 '^\/title>' | cut '-d>' -f2)" - title="$(curl -gsi "$1" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)" + title="$(curl -gsi "$uri" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)" if [ "${content_type}" = "" ];then printf "WTF: header: %s\n" "${a_header}" printf "WARNING: NO CONTENT-TYPE RETURNED FROM SERVER. Assuming text/html. title: %s\n" "$title" | html_entities_decode @@ -51,7 +65,7 @@ gemini) path="${path}?${qs}" fi first=1 - gemini-get "$1" | while read -r line;do + gemini-get "$uri" | while read -r line;do if [ "$first" ];then unset first type="$(printf "%s\n" "$line" | tr -s ' ' | cut '-d ' -f2 | tr -d '\r')" @@ -70,7 +84,7 @@ gemini) done | head -n1 ;; magnet) - printf "title: %s\n" "$(printf "%s\n" "$1" | tr '&' '\n' | grep ^dn= | cut -d= -f2- | uriunescape)" + printf "title: %s\n" "$(printf "%s\n" "$uri" | tr '&' '\n' | grep ^dn= | cut -d= -f2- | uriunescape)" ;; dns) if [ "$qs" ] ; then @@ -92,27 +106,27 @@ dns) echo # the tr above strips out the trailing \n ;; ftp) - curl -g "$1" 2>&1 | tail -n1 + curl -g "$uri" 2>&1 | tail -n1 ;; gopher) if [ "$qs" ];then path="${path}?${qs}" fi - type="$(printf "%s\n" "$1" | uricut -p | cut -b2- | cut -b1)" + type="$(printf "%s\n" "$uri" | uricut -p | cut -b2- | cut -b1)" if [ "$type" = 1 -o "$type" = "" ];then - printf "title: %s\n" "$(curl -gs "$1" | grep ^i | head -n1 | cut -f1 | cut -b2-)" + printf "title: %s\n" "$(curl -gs "$uri" | grep ^i | head -n1 | cut -f1 | cut -b2-)" elif [ "$type" = 0 ];then - printf "title: %s\n" "$(curl -gs "$1" | head -n1)" + printf "title: %s\n" "$(curl -gs "$uri" | head -n1)" elif [ "$type" = "h" ];then - printf "title: %s\n" "$(curl -gs "$1" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)" + printf "title: %s\n" "$(curl -gs "$uri" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)" else printf "title: don't know how to get title of non-1 gopher links" fi ;; urn) - nid="$(printf "%s\n" "$1" | cut -d: -f2)" - nss="$(printf "%s\n" "$1" | cut -d: -f3)" - nss2="$(printf "%s\n" "$1" | cut -d: -f4)" + nid="$(printf "%s\n" "$uri" | cut -d: -f2)" + nss="$(printf "%s\n" "$uri" | cut -d: -f3)" + nss2="$(printf "%s\n" "$uri" | cut -d: -f4)" if [ "$nid" = "ietf" ];then if [ "$nss" = "rfc" ];then cat "/var/db/rfc/rfc${nss2}.json" | jq .title @@ -129,11 +143,11 @@ urn) fi if [ "$nid" = "mitre" ];then if [ "$nss" = "cve" ];then - urititle "$(urnresolve "$1")" + urititle "$(urnresolve "$uri")" exit 0 fi fi - urnresolve "$1" + urnresolve "$uri" ;; ssh) if [ ! "$port" ];then @@ -150,5 +164,5 @@ file) fi ;; *) - printf "DONT KNOW HOW TO GET TITLE FOR THIS URL: %s\n" "$1" + printf "DONT KNOW HOW TO GET TITLE FOR THIS URL: %s\n" "$uri" esac diff --git a/urnresolve b/urnresolve index d59558f..4da8671 100755 --- a/urnresolve +++ b/urnresolve @@ -45,12 +45,12 @@ if [ "$nid" = "mitre" ];then fi fi -if [ "$nid" = "fcc" ];then - if [ "$nss" = "frs" ];then - grep "^$nss2\s" $PREFIX/share/urn/fcc:frs | cut -f2 - fi -fi if [ "$nid" = "btih" ];then btih2magnet "$nss" fi + +file="/usr/local/share/urn/$nid:$nss" +if [ -e "$file" ];then + grep "^${nss2}\s" "$file" | cut -f2- +fi -- cgit v1.2.3