#!/usr/bin/env bash uri="$1" scheme=$(printf "%s\n" "$uri" | uricut -s) path=$(printf "%s\n" "$uri" | uricut -p) qs=$(printf "%s\n" "$uri" | uricut -q) domain=$(printf "%s\n" "$uri" | uricut -d) port=$(printf "%s\n" "$uri" | uricut -P) if [ "$scheme" = "http" -o "$scheme" = "https" ];then ## dereference ONLY ONCE. give up after that. newuri="$(unshorten.sh "$uri")" if [ "$newuri" ];then uri="$newuri" fi # newuri="$(unshorten.sh "$uri")" # if [ "$newuri" ];then # uri="$newuri" # fi fi if [ ! "$port" ];then if [ "$scheme" = "https" ];then port=443 SSL=--ssl fi if [ "$scheme" = "http" ];then port=80 fi fi case "$scheme" in http*) if [ "$qs" ];then path="${path}?${qs}" fi if [ "$port" ];then UA="Mozilla/5.0 (impersonator)" # content_type="$(printf "HEAD %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | grep -i '^Content-Type: ' | head -n1 | cut '-d ' -f2 | cut '-d;' -f1 | tr -d '\r\n')" a_header="$((curl -gA "$UA" -Lsi "$uri" || echo curl failed) | head -c 10000 | egrep -ai '^curl failed|^Location: |^Content-Type: ' | head -n1 | tr -d '\r\n')" if printf "%s\n" "${a_header}" | grep -i '^Content-Type: ' 2>&1 >/dev/null 2>&1;then content_type="$(printf '%s\n' "${a_header}" | cut '-d ' -f2- | cut '-d;' -f1)" fi #if printf "%s\n" "${a_header}" | grep -i '^Location: ' 2>&1 >/dev/null 2>&1;then # content_type="redirect. ${a_header}" #fi if printf "%s\n" "${a_hreader}" | grep -i '^curl failed' 2>&1 >/dev/null 2>&1;then content_type="curl failed. cert expired? dunno yet. TODO: code openssl checker." fi ### main.lv doesn't have content-type on some pages, so if the content-type is missing or empty, we're assuming html if [ "${content_type}" = "text/html" -o "${content_type}" = "application/xhtml+xml" -o "${content_type}" = "" ];then # title="$(printf "GET %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | head -c 10000 | tr -d '\n' | tr '<' '\n' | grep -A 10 '^title>' | grep -B 10 '^\/title>' | cut '-d>' -f2)" title="$(curl -gsi "$uri" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)" if [ "${content_type}" = "" ];then printf "WTF: header: %s\n" "${a_header}" printf "WARNING: NO CONTENT-TYPE RETURNED FROM SERVER. Assuming text/html. title: %s\n" "$title" | html_entities_decode else printf "title: %s\n" "$title" | html_entities_decode fi else printf "%s\n" "${a_header}" fi fi ;; gemini) if [ "$qs" ];then path="${path}?${qs}" fi first=1 gemini-get "$uri" | while read -r line;do if [ "$first" ];then unset first type="$(printf "%s\n" "$line" | tr -s ' ' | cut '-d ' -f2 | tr -d '\r')" if [ "$type" != 'text/gemini' ];then if [ "$type" = "text/html" ];then head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep . elif [ "$type" = "text/plain" ];then printf "title: %s\n" "$(head -n1)" else printf "title: %s\n" "$(printf "%s\n" "$line" | tr '\t' ' ' | tr -s ' ' | cut '-d ' -f2-)" fi fi else printf "title: %s\n" "$(printf "%s\n" "$line" | grep '^#' | sed 's/^#* *//g')" fi done | head -n1 ;; magnet) printf "title: %s\n" "$(printf "%s\n" "$uri" | tr '&' '\n' | grep ^dn= | cut -d= -f2- | uriunescape)" ;; dns) if [ "$qs" ] ; then export QUERY_STRING="$(printf "%s\n" "$qs" | tr 'A-Z' 'a-z' | tr ';' '&')" if [ "$(query_param type)" ];then type="-t $(query_param "type")" fi if [ "$(query_param class)" ];then class="-c $(query_param "class")" fi fi if [ "$domain" ];then server="@$domain" path="$(printf "%s\n" "$path" | sed "s|^/||")" else server="" fi dig $class $type "$path" +short $server | tr '\n' ' ' echo # the tr above strips out the trailing \n ;; ftp) curl -g "$uri" 2>&1 | tail -n1 ;; gopher) if [ "$qs" ];then path="${path}?${qs}" fi type="$(printf "%s\n" "$uri" | uricut -p | cut -b2- | cut -b1)" if [ "$type" = 1 -o "$type" = "" ];then printf "title: %s\n" "$(curl -gs "$uri" | grep ^i | head -n1 | cut -f1 | cut -b2-)" elif [ "$type" = 0 ];then printf "title: %s\n" "$(curl -gs "$uri" | head -n1)" elif [ "$type" = "h" ];then printf "title: %s\n" "$(curl -gs "$uri" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)" else printf "title: don't know how to get title of non-1 gopher links" fi ;; urn) nid="$(printf "%s\n" "$uri" | cut -d: -f2)" nss="$(printf "%s\n" "$uri" | cut -d: -f3)" nss2="$(printf "%s\n" "$uri" | cut -d: -f4)" if [ "$nid" = "ietf" ];then if [ "$nss" = "rfc" ];then cat "/var/db/rfc/rfc${nss2}.json" | jq .title exit 0 fi fi if [ "$nid" = "phrack" ];then if [ ! "${nss2}" ];then printf "issue %s of phrack has %d articles\n" "${nss}" "$(grep -c '.' /var/db/phrack/meta/${nss}.tsv)" exit 0 fi cat /var/db/phrack/meta/${nss}.tsv | grep "^${nss}/${nss2}"$'\t' | sed 's|/|\t|' | tr '\n\t' '\0\0' | xargs -n4 -0 printf 'issue %2s article %2s %79s by [%s]\n' exit 0 fi if [ "$nid" = "mitre" ];then if [ "$nss" = "cve" ];then urititle "$(urnresolve "$uri")" exit 0 fi fi urnresolve "$uri" ;; ssh) if [ ! "$port" ];then port=22 fi printf "title: %s\n" "$(printf "" | nc "$domain" "$port" | head -n1)" ;; file) file="$(uriunescape "$path")" if [ -e "$file" ];then mime-type -v "$file" else printf "file '%s' doesn't exist.\n" "$file" fi ;; *) printf "DONT KNOW HOW TO GET TITLE FOR THIS URL: %s\n" "$uri" esac