#!/usr/bin/env bash ## list grabbed from: ## https://github.com/newhouse/url-tracking-stripper/blob/master/assets/js/trackers.js ## referenced from: ## https://github.com/rknightuk/TrackerZapper/blob/3fe21dc2a59240dc2883569f8d435415010610d2/TrackerZapper/AppDelegate.swift#L162 ## more: ## https://maxchadwick.xyz/tracking-query-params-registry/ ## usages: ## ./paramfilter URL1 URL2 URL3 ... ## ./paramfilter < list.uris ## if you use arguments it won't try to read from stdin. ## so if you want to use paramfilter as a filter don't use arguments. if [ "$1" ];then while [ "$1" ];do printf "%s\n" "$1" shift done else cat #useless cat? fi | while read line;do q="$(uricut -q "$line")" if [ "$q" ];then newq="$(printf "%s\n" "$q" | tr '&' '\n' | grep -vf <(grep -A 1000 "^__LIST__$" "$0" | tail -n+2 | sed 's/^/^/g;s/$/=/g') | tr '\n' '&' | sed 's/&$//')" cat <(uricut "$line") <(printf "query_string: %s\n" "$newq") | urijoin 2>/dev/null else printf "%s\n" "$line" fi done exit 0 #this was generated with: # wget -qO- https://github.com/mpchadwick/tracking-query-params-registry/raw/master/_data/params.csv | tail -n+2 | cut -d, -f1 >> paramfilter.sh __LIST__ fbclid gclid gclsrc utm_content utm_term utm_campaign utm_medium utm_source utm_id _ga mc_cid mc_eid _bta_tid _bta_c trk_contact trk_msg trk_module trk_sid gdfms gdftrk gdffi _ke redirect_log_mongo_id redirect_mongo_id sb_referer_host mkwid pcrid ef_id s_kwcid msclkid dm_i epik pk_campaign pk_kwd pk_keyword piwik_campaign piwik_kwd piwik_keyword mtm_campaign mtm_keyword mtm_source mtm_medium mtm_content mtm_cid mtm_group mtm_placement matomo_campaign matomo_keyword matomo_source matomo_medium matomo_content matomo_cid matomo_group matomo_placement hsa_cam hsa_grp hsa_mt hsa_src hsa_ad hsa_acc hsa_net hsa_kw hsa_tgt hsa_ver