summaryrefslogtreecommitdiff
path: root/urititle
diff options
context:
space:
mode:
authorepoch <epoch@hack.thebackupbox.net>2022-01-21 19:35:42 +0000
committerepoch <epoch@hack.thebackupbox.net>2022-01-21 19:35:42 +0000
commit9ae383cdf96656e03fdfdfe4d38ba6ee40c4512d (patch)
tree4d262fb10eeff49dbdc5e4a284134333dd607fe3 /urititle
parentc6b3499c83bd95bd111f3c658c6df68c4381992f (diff)
downloaduritools-9ae383cdf96656e03fdfdfe4d38ba6ee40c4512d.tar.gz
uritools-9ae383cdf96656e03fdfdfe4d38ba6ee40c4512d.zip
removed the share/urn stuff. that lives in a repo named "data". uri title now checks for data files. urititle does a unshorten attempt first.
Diffstat (limited to 'urititle')
-rwxr-xr-xurititle54
1 files changed, 34 insertions, 20 deletions
diff --git a/urititle b/urititle
index efac514..f687bd0 100755
--- a/urititle
+++ b/urititle
@@ -1,9 +1,23 @@
#!/usr/bin/env bash
-scheme=$(printf "%s\n" "$1" | uricut -s)
-path=$(printf "%s\n" "$1" | uricut -p)
-qs=$(printf "%s\n" "$1" | uricut -q)
-domain=$(printf "%s\n" "$1" | uricut -d)
-port=$(printf "%s\n" "$1" | uricut -P)
+uri="$1"
+
+scheme=$(printf "%s\n" "$uri" | uricut -s)
+path=$(printf "%s\n" "$uri" | uricut -p)
+qs=$(printf "%s\n" "$uri" | uricut -q)
+domain=$(printf "%s\n" "$uri" | uricut -d)
+port=$(printf "%s\n" "$uri" | uricut -P)
+if [ "$scheme" = "http" -o "$scheme" = "https" ];then
+ ## dereference ONLY ONCE. give up after that.
+ newuri="$(unshorten.sh "$uri")"
+ if [ "$newuri" ];then
+ uri="$newuri"
+ fi
+# newuri="$(unshorten.sh "$uri")"
+# if [ "$newuri" ];then
+# uri="$newuri"
+# fi
+fi
+
if [ ! "$port" ];then
if [ "$scheme" = "https" ];then
port=443
@@ -21,7 +35,7 @@ http*)
if [ "$port" ];then
UA="Mozilla/5.0 (impersonator)"
# content_type="$(printf "HEAD %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | grep -i '^Content-Type: ' | head -n1 | cut '-d ' -f2 | cut '-d;' -f1 | tr -d '\r\n')"
- a_header="$((curl -gA "$UA" -Lsi "$1" || echo curl failed) | head -c 10000 | egrep -ai '^curl failed|^Location: |^Content-Type: ' | head -n1 | tr -d '\r\n')"
+ a_header="$((curl -gA "$UA" -Lsi "$uri" || echo curl failed) | head -c 10000 | egrep -ai '^curl failed|^Location: |^Content-Type: ' | head -n1 | tr -d '\r\n')"
if printf "%s\n" "${a_header}" | grep -i '^Content-Type: ' 2>&1 >/dev/null 2>&1;then
content_type="$(printf '%s\n' "${a_header}" | cut '-d ' -f2- | cut '-d;' -f1)"
fi
@@ -34,7 +48,7 @@ http*)
### main.lv doesn't have content-type on some pages, so if the content-type is missing or empty, we're assuming html
if [ "${content_type}" = "text/html" -o "${content_type}" = "application/xhtml+xml" -o "${content_type}" = "" ];then
# title="$(printf "GET %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | head -c 10000 | tr -d '\n' | tr '<' '\n' | grep -A 10 '^title>' | grep -B 10 '^\/title>' | cut '-d>' -f2)"
- title="$(curl -gsi "$1" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)"
+ title="$(curl -gsi "$uri" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)"
if [ "${content_type}" = "" ];then
printf "WTF: header: %s\n" "${a_header}"
printf "WARNING: NO CONTENT-TYPE RETURNED FROM SERVER. Assuming text/html. title: %s\n" "$title" | html_entities_decode
@@ -51,7 +65,7 @@ gemini)
path="${path}?${qs}"
fi
first=1
- gemini-get "$1" | while read -r line;do
+ gemini-get "$uri" | while read -r line;do
if [ "$first" ];then
unset first
type="$(printf "%s\n" "$line" | tr -s ' ' | cut '-d ' -f2 | tr -d '\r')"
@@ -70,7 +84,7 @@ gemini)
done | head -n1
;;
magnet)
- printf "title: %s\n" "$(printf "%s\n" "$1" | tr '&' '\n' | grep ^dn= | cut -d= -f2- | uriunescape)"
+ printf "title: %s\n" "$(printf "%s\n" "$uri" | tr '&' '\n' | grep ^dn= | cut -d= -f2- | uriunescape)"
;;
dns)
if [ "$qs" ] ; then
@@ -92,27 +106,27 @@ dns)
echo # the tr above strips out the trailing \n
;;
ftp)
- curl -g "$1" 2>&1 | tail -n1
+ curl -g "$uri" 2>&1 | tail -n1
;;
gopher)
if [ "$qs" ];then
path="${path}?${qs}"
fi
- type="$(printf "%s\n" "$1" | uricut -p | cut -b2- | cut -b1)"
+ type="$(printf "%s\n" "$uri" | uricut -p | cut -b2- | cut -b1)"
if [ "$type" = 1 -o "$type" = "" ];then
- printf "title: %s\n" "$(curl -gs "$1" | grep ^i | head -n1 | cut -f1 | cut -b2-)"
+ printf "title: %s\n" "$(curl -gs "$uri" | grep ^i | head -n1 | cut -f1 | cut -b2-)"
elif [ "$type" = 0 ];then
- printf "title: %s\n" "$(curl -gs "$1" | head -n1)"
+ printf "title: %s\n" "$(curl -gs "$uri" | head -n1)"
elif [ "$type" = "h" ];then
- printf "title: %s\n" "$(curl -gs "$1" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)"
+ printf "title: %s\n" "$(curl -gs "$uri" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)"
else
printf "title: don't know how to get title of non-1 gopher links"
fi
;;
urn)
- nid="$(printf "%s\n" "$1" | cut -d: -f2)"
- nss="$(printf "%s\n" "$1" | cut -d: -f3)"
- nss2="$(printf "%s\n" "$1" | cut -d: -f4)"
+ nid="$(printf "%s\n" "$uri" | cut -d: -f2)"
+ nss="$(printf "%s\n" "$uri" | cut -d: -f3)"
+ nss2="$(printf "%s\n" "$uri" | cut -d: -f4)"
if [ "$nid" = "ietf" ];then
if [ "$nss" = "rfc" ];then
cat "/var/db/rfc/rfc${nss2}.json" | jq .title
@@ -129,11 +143,11 @@ urn)
fi
if [ "$nid" = "mitre" ];then
if [ "$nss" = "cve" ];then
- urititle "$(urnresolve "$1")"
+ urititle "$(urnresolve "$uri")"
exit 0
fi
fi
- urnresolve "$1"
+ urnresolve "$uri"
;;
ssh)
if [ ! "$port" ];then
@@ -150,5 +164,5 @@ file)
fi
;;
*)
- printf "DONT KNOW HOW TO GET TITLE FOR THIS URL: %s\n" "$1"
+ printf "DONT KNOW HOW TO GET TITLE FOR THIS URL: %s\n" "$uri"
esac