summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorepoch <epoch@enzo.thebackupbox.net>2021-09-19 00:20:15 +0000
committerepoch <epoch@enzo.thebackupbox.net>2021-09-19 00:20:15 +0000
commit120031ad92c74d2f2e523a67772952e539424c18 (patch)
tree468c9f62d92dcca541c7b0eea34c0168c177ac79
parentd8002b33fad5e37e37a10f829f56aeb6b312e8d1 (diff)
downloaduritools-120031ad92c74d2f2e523a67772952e539424c18.tar.gz
uritools-120031ad92c74d2f2e523a67772952e539424c18.zip
forgot to actually add the tool. lol
-rwxr-xr-xuriqueryfilter99
1 files changed, 99 insertions, 0 deletions
diff --git a/uriqueryfilter b/uriqueryfilter
new file mode 100755
index 0000000..233937d
--- /dev/null
+++ b/uriqueryfilter
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+## list grabbed from:
+## https://github.com/newhouse/url-tracking-stripper/blob/master/assets/js/trackers.js
+## referenced from:
+## https://github.com/rknightuk/TrackerZapper/blob/3fe21dc2a59240dc2883569f8d435415010610d2/TrackerZapper/AppDelegate.swift#L162
+## more:
+## https://maxchadwick.xyz/tracking-query-params-registry/
+
+## usages:
+## ./paramfilter URL1 URL2 URL3 ...
+## ./paramfilter < list.uris
+## if you use arguments it won't try to read from stdin.
+## so if you want to use paramfilter as a filter don't use arguments.
+
+if [ "$1" ];then
+ while [ "$1" ];do
+ printf "%s\n" "$1"
+ shift
+ done
+else
+ cat #useless cat?
+fi | while read line;do
+ q="$(uricut -q "$line")"
+ if [ "$q" ];then
+ newq="$(printf "%s\n" "$q" | tr '&' '\n' | grep -vf <(grep -A 1000 "^__LIST__$" "$0" | tail -n+2 | sed 's/^/^/g;s/$/=/g') | tr '\n' '&' | sed 's/&$//')"
+ cat <(uricut "$line") <(printf "query_string: %s\n" "$newq") | urijoin 2>/dev/null
+ else
+ printf "%s\n" "$line"
+ fi
+done
+
+exit 0
+#this was generated with:
+# wget -qO- https://github.com/mpchadwick/tracking-query-params-registry/raw/master/_data/params.csv | tail -n+2 | cut -d, -f1 >> paramfilter.sh
+__LIST__
+fbclid
+gclid
+gclsrc
+utm_content
+utm_term
+utm_campaign
+utm_medium
+utm_source
+utm_id
+_ga
+mc_cid
+mc_eid
+_bta_tid
+_bta_c
+trk_contact
+trk_msg
+trk_module
+trk_sid
+gdfms
+gdftrk
+gdffi
+_ke
+redirect_log_mongo_id
+redirect_mongo_id
+sb_referer_host
+mkwid
+pcrid
+ef_id
+s_kwcid
+msclkid
+dm_i
+epik
+pk_campaign
+pk_kwd
+pk_keyword
+piwik_campaign
+piwik_kwd
+piwik_keyword
+mtm_campaign
+mtm_keyword
+mtm_source
+mtm_medium
+mtm_content
+mtm_cid
+mtm_group
+mtm_placement
+matomo_campaign
+matomo_keyword
+matomo_source
+matomo_medium
+matomo_content
+matomo_cid
+matomo_group
+matomo_placement
+hsa_cam
+hsa_grp
+hsa_mt
+hsa_src
+hsa_ad
+hsa_acc
+hsa_net
+hsa_kw
+hsa_tgt
+hsa_ver