aboutsummaryrefslogtreecommitdiffstats
path: root/uriqueryfilter
blob: 72dca4722d120cdd73c6f7e26c3b54782aad66ed (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env bash
## list grabbed from:
## https://github.com/newhouse/url-tracking-stripper/blob/master/assets/js/trackers.js
## referenced from:
## https://github.com/rknightuk/TrackerZapper/blob/3fe21dc2a59240dc2883569f8d435415010610d2/TrackerZapper/AppDelegate.swift#L162
## more:
## https://maxchadwick.xyz/tracking-query-params-registry/

## usages:
##   ./paramfilter URL1 URL2 URL3 ...
##   ./paramfilter < list.uris
## if you use arguments it won't try to read from stdin.
## so if you want to use paramfilter as a filter don't use arguments.

if [ "$1" ];then
  while [ "$1" ];do
    printf "%s\n" "$1"
    shift
  done
else
  cat #useless cat?
fi | while read line;do
  q="$(uricut -q "$line")"
  if [ "$q" ];then
    newq="$(printf "%s\n" "$q" | tr '&' '\n' | grep -vf <(grep -A 1000 "^__LIST__$" "$0" | tail -n+2 | sed 's/^/^/g;s/$/=/g') | tr '\n' '&' | sed 's/&$//')"
    cat <(uricut "$line") <(printf "query_string: %s\n" "$newq") | urijoin 2>/dev/null
  else
    printf "%s\n" "$line"
  fi
done

exit 0
#this was generated with:
# wget -qO- https://github.com/mpchadwick/tracking-query-params-registry/raw/master/_data/params.csv | tail -n+2 | cut -d, -f1 >> uriqueryfilter
# echo ref_src >> uriqueryfilter
__LIST__
fbclid
gclid
gclsrc
utm_content
utm_term
utm_campaign
utm_medium
utm_source
utm_id
_ga
mc_cid
mc_eid
_bta_tid
_bta_c
trk_contact
trk_msg
trk_module
trk_sid
gdfms
gdftrk
gdffi
_ke
redirect_log_mongo_id
redirect_mongo_id
sb_referer_host
mkwid
pcrid
ef_id
s_kwcid
msclkid
dm_i
epik
pk_campaign
pk_kwd
pk_keyword
piwik_campaign
piwik_kwd
piwik_keyword
mtm_campaign
mtm_keyword
mtm_source
mtm_medium
mtm_content
mtm_cid
mtm_group
mtm_placement
matomo_campaign
matomo_keyword
matomo_source
matomo_medium
matomo_content
matomo_cid
matomo_group
matomo_placement
hsa_cam
hsa_grp
hsa_mt
hsa_src
hsa_ad
hsa_acc
hsa_net
hsa_kw
hsa_tgt
hsa_ver
ref_src