summaryrefslogtreecommitdiff
path: root/urititle
blob: 18897847baaf989f945965a2566ef68795c5908c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env bash
scheme=$(printf "%s\n" "$1" | uricut -s)
path=$(printf "%s\n" "$1" | uricut -p)
qs=$(printf "%s\n" "$1" | uricut -q)
domain=$(printf "%s\n" "$1" | uricut -d)
port=$(printf "%s\n" "$1" | uricut -P)
if [ ! "$port" ];then
  if [ "$scheme" = "https" ];then
    port=443
    SSL=--ssl
  fi
  if [ "$scheme" = "http" ];then
    port=80
  fi
fi
case "$scheme" in
http*)
  if [ "$qs" ];then
    path="${path}?${qs}"
  fi
  if [ "$port" ];then
    UA="Mozilla/5.0 (impersonator)"
#  content_type="$(printf "HEAD %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | grep -i '^Content-Type: ' | head -n1 | cut '-d ' -f2 | cut '-d;' -f1 | tr -d '\r\n')"
    a_header="$((curl -gA "$UA" -Lsi "$1" || echo curl failed) | head -c 10000 | egrep -ai '^curl failed|^Location: |^Content-Type: ' | head -n1 | tr -d '\r\n')"
    if printf "%s\n" "${a_header}" | grep -i '^Content-Type: ' 2>&1 >/dev/null 2>&1;then
      content_type="$(printf '%s\n' "${a_header}" | cut '-d ' -f2- | cut '-d;' -f1)"
    fi
    #if printf "%s\n" "${a_header}" | grep -i '^Location: ' 2>&1 >/dev/null 2>&1;then
    #  content_type="redirect. ${a_header}"
    #fi
    if printf "%s\n" "${a_hreader}" | grep -i '^curl failed' 2>&1 >/dev/null 2>&1;then
      content_type="curl failed. cert expired? dunno yet. TODO: code openssl checker."
    fi
### main.lv doesn't have content-type on some pages, so if the content-type is missing or empty, we're assuming html
    if [ "${content_type}" = "text/html" -o "${content_type}" = "application/xhtml+xml" -o "${content_type}" = "" ];then
#    title="$(printf "GET %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | head -c 10000 | tr -d '\n' | tr '<' '\n' | grep -A 10 '^title>' | grep -B 10 '^\/title>' | cut '-d>' -f2)"
      title="$(curl -gsi "$1" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)"
      if [ "${content_type}" = "" ];then
        printf "WTF: header: %s\n" "${a_header}"
        printf "WARNING: NO CONTENT-TYPE RETURNED FROM SERVER. Assuming text/html. title: %s\n" "$title" | html_entities_decode
      else
        printf "title: %s\n" "$title" | html_entities_decode
      fi
    else
      printf "%s\n" "${a_header}"
    fi
  fi
  ;;
gemini)
  if [ "$qs" ];then
    path="${path}?${qs}"
  fi
  first=1
  gemini-get "$1" | while read -r line;do
    if [ "$first" ];then
      unset first
      type="$(printf "%s\n" "$line" | tr -s ' ' | cut '-d ' -f2 | tr -d '\r')"
      if [ "$type" != 'text/gemini' ];then
        if [ "$type" = "text/html" ];then
          head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .
        elif [ "$type" = "text/plain" ];then
          printf "title: %s\n" "$(head -n1)"
        else
          printf "title: %s\n" "$(printf "%s\n" "$line" | tr '\t' ' ' | tr -s ' ' | cut '-d ' -f2-)"
        fi
      fi
    else
      printf "title: %s\n" "$(printf "%s\n" "$line" | grep '^#' | sed 's/^#* *//g')"
    fi
  done | head -n1
  ;;
magnet)
  printf "title: %s\n" "$(printf "%s\n" "$1" | tr '&' '\n' | grep ^dn= | cut -d= -f2- | uriunescape)"
  ;;
dns)
  if [ "$qs" ] ; then
    export QUERY_STRING="$qs"
    type="-t $(query_param "type")"
  fi
  if [ "$domain" ];then
    server="@$domain"
    path="$(printf "%s\n" "$path" | sed "s|^/||")"
  else
    server=""
  fi
  dig $type "$path" +short $server | tr '\n' ' '
  echo # the tr above strips out the trailing \n
  ;;
ftp)
  curl -g "$1" 2>&1 | tail -n1
  ;;
gopher)
  if [ "$qs" ];then
    path="${path}?${qs}"
  fi
  type="$(printf "%s\n" "$1" | uricut -p | cut -b2- | cut -b1)"
  if [ "$type" = 1 -o "$type" = "" ];then
    printf "title: %s\n" "$(curl -gs "$1" | grep ^i | head -n1 | cut -f1 | cut -b2-)"
  elif [ "$type" = 0 ];then
    printf "title: %s\n" "$(curl -gs "$1" | head -n1)"
  elif [ "$type" = "h" ];then
    printf "title: %s\n" "$(curl -gs "$1" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | tr '\t' ' ' | sed 's/^ *//g' | sed 's/ *$//g' | grep .)"
  else
    printf "title: don't know how to get title of non-1 gopher links"
  fi
  ;;
urn)
  nid="$(printf "%s\n" "$1" | cut -d: -f2)"
  nss="$(printf "%s\n" "$1" | cut -d: -f3)"
  nss2="$(printf "%s\n" "$1" | cut -d: -f4)"
  if [ "$nid" = "ietf" ];then
    if [ "$nss" = "rfc" ];then
      cat "/var/db/rfc/rfc${nss2}.json" | jq .title
      exit 0
    fi
  fi
  if [ "$nid" = "phrack" ];then
    if [ ! "${nss2}" ];then
      printf "issue %s of phrack has %d articles\n" "${nss}" "$(grep -c '.' /var/db/phrack/meta/${nss}.tsv)"
      exit 0
    fi
    cat /var/db/phrack/meta/${nss}.tsv | grep "^${nss}/${nss2}"$'\t' | sed 's|/|\t|' | tr '\n\t' '\0\0' | xargs -n4 -0 printf 'issue %2s article %2s %79s by [%s]\n'
    exit 0
  fi
  if [ "$nid" = "mitre" ];then
    if [ "$nss" = "cve" ];then
      urititle "$(urnresolve "$1")"
      exit 0
    fi
  fi
  urnresolve "$1"
  ;;
ssh)
  if [ ! "$port" ];then
    port=22
  fi
  printf "title: %s\n" "$(printf "" | nc "$domain" "$port" | head -n1)"
  ;;
*)
  printf "DONT KNOW HOW TO GET TITLE FOR THIS URL: %s\n" "$1"
esac