#!/usr/bin/env bash
## list grabbed from:
## https://github.com/newhouse/url-tracking-stripper/blob/master/assets/js/trackers.js
## referenced from:
## https://github.com/rknightuk/TrackerZapper/blob/3fe21dc2a59240dc2883569f8d435415010610d2/TrackerZapper/AppDelegate.swift#L162
## more:
## https://maxchadwick.xyz/tracking-query-params-registry/

## usages:
##   ./paramfilter URL1 URL2 URL3 ...
##   ./paramfilter < list.uris
## if you use arguments it won't try to read from stdin.
## so if you want to use paramfilter as a filter don't use arguments.

if [ "$1" ];then
  while [ "$1" ];do
    printf "%s\n" "$1"
    shift
  done
else
  cat #useless cat?
fi | while read line;do
  q="$(uricut -q "$line")"
  if [ "$q" ];then
    newq="$(printf "%s\n" "$q" | tr '&' '\n' | grep -vf <(grep -A 1000 "^__LIST__$" "$0" | tail -n+2 | sed 's/^/^/g;s/$/=/g') | tr '\n' '&' | sed 's/&$//')"
    cat <(uricut "$line") <(printf "query_string: %s\n" "$newq") | urijoin 2>/dev/null
  else
    printf "%s\n" "$line"
  fi
done

exit 0
#this was generated with:
# wget -qO- https://github.com/mpchadwick/tracking-query-params-registry/raw/master/_data/params.csv | tail -n+2 | cut -d, -f1 >> uriqueryfilter
# echo ref_src >> uriqueryfilter
__LIST__
fbclid
gclid
gclsrc
utm_content
utm_term
utm_campaign
utm_medium
utm_source
utm_id
_ga
mc_cid
mc_eid
_bta_tid
_bta_c
trk_contact
trk_msg
trk_module
trk_sid
gdfms
gdftrk
gdffi
_ke
redirect_log_mongo_id
redirect_mongo_id
sb_referer_host
mkwid
pcrid
ef_id
s_kwcid
msclkid
dm_i
epik
pk_campaign
pk_kwd
pk_keyword
piwik_campaign
piwik_kwd
piwik_keyword
mtm_campaign
mtm_keyword
mtm_source
mtm_medium
mtm_content
mtm_cid
mtm_group
mtm_placement
matomo_campaign
matomo_keyword
matomo_source
matomo_medium
matomo_content
matomo_cid
matomo_group
matomo_placement
hsa_cam
hsa_grp
hsa_mt
hsa_src
hsa_ad
hsa_acc
hsa_net
hsa_kw
hsa_tgt
hsa_ver
ref_src