summaryrefslogtreecommitdiff
path: root/urititle
blob: a79670a08e93340c5bacaf471184e001a10336f2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/sh
scheme=$(printf "%s\n" "$1" | uricut -s)
path=$(printf "%s\n" "$1" | uricut -p)
qs=$(printf "%s\n" "$1" | uricut -q)
domain=$(printf "%s\n" "$1" | uricut -d)
port=$(printf "%s\n" "$1" | uricut -P)
if [ ! "$port" ];then
  if [ "$scheme" = "https" ];then
    port=443
    SSL=--ssl
  fi
  if [ "$scheme" = "http" ];then
    port=80
  fi
fi
if [ "$qs" ];then
  path="${path}?${qs}"
fi
if [ "$port" ];then
  UA="Mozilla/5.0 (impersonator)"
#  content_type="$(printf "HEAD %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | grep -i '^Content-Type: ' | head -n1 | cut '-d ' -f2 | cut '-d;' -f1 | tr -d '\r\n')"
  a_header="$(curl -Lsi "$1" | egrep -ai '^Location: |^Content-Type: ' | head -n1 | tr -d '\r\n')"
  if printf "%s\n" "${a_header}" | grep -i '^Content-Type: ' 2>&1 >/dev/null 2>&1;then
    content_type="$(printf '%s\n' "${a_header}" | cut '-d ' -f2- | cut '-d;' -f1)"
  else
    content_type="redirect probably"
  fi
  if [ "${content_type}" = "text/html" -o "${content_type}" = "application/xhtml+xml" ];then
#    title="$(printf "GET %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n" "$path" "$domain" "$UA" | ncat -4 $SSL "$domain" "$port" | head -c 10000000 | tr -d '\n' | tr '<' '\n' | grep -A 10 '^title>' | grep -B 10 '^\/title>' | cut '-d>' -f2)"
    title="$(curl -si "$1" | head -c 1000000 | tr -d '\n' | tr '<' '\n' | grep -iA 10 '^title' | grep -iB 10 '^\/title>' | cut '-d>' -f2 | sed 's/^ .//g' | sed 's/ .$//g' | grep .)"
    printf "title: %s\n" "$title" | html_entities_decode
  else
    printf "%s\n" "${a_header}"
  fi
else
  printf "DONT KNOW HOW TO GET TITLE FOR THIS URL: %s\n" "$1"
fi