urimatchpairs is new, urigetline got rewritten in C and WEW is it fast now. compare with .sh version.

author: epoch <epoch@enzo.thebackupbox.net> 2021-11-14 04:26:25 +0000
committer: epoch <epoch@enzo.thebackupbox.net> 2021-11-14 04:26:25 +0000
commit: a0ef1e229f20ea97d5538c8c4a8f1b6045af0c0c (patch)
tree: d4bf05ad846af0f6a22c49a8ac50cf24a01b96fa
parent: e43385e2cd39e7f31d594b348228d62b41515a68 (diff)
download: uritools-a0ef1e229f20ea97d5538c8c4a8f1b6045af0c0c.tar.gz
uritools-a0ef1e229f20ea97d5538c8c4a8f1b6045af0c0c.zip
4 files changed, 201 insertions, 29 deletions
diff --git a/urigetline b/urigetline
deleted file mode 100755
index 3f6bb51..0000000
--- a/urigetline
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-uri="$1"
-### old way.
-# line="$(grep "^${scheme}:" ~/.config/uristart.conf | cut -d: -f2- | sed 's/^[ \t]*//g' | sed 's/\\/\\\\/g')"
-
-if [ ! "$uristart_config" ];then
-  uristart_config=~/.config/uristart.conf
-fi
-
-### fancy way.
-grep '^[^#]' "$uristart_config" \
-  | while read -r l;do
-        uritmp="$uri"
-        uritmp="$(printf "%s\n" "$l" \
-          | cut -d: -f1 \
-          | tr ' ' '\n' \
-          | paste '-d ' - - \
-          | while read -r a b;do
-                uritmp="$(printf "%s\n" "${uritmp}" | urimatch "$a" "$b")"
-                printf "%s\n" "$uritmp"
-            done | tail -n1)"
-        if [ "$uritmp" ];then
-                printf '%s\n' "$l"
-                break
-        fi
-    done \
-  | cut -d: -f2- \
-  | sed 's/^[ \t]*//g' \
-  | sed 's/\\/\\\\/g'
diff --git a/urigetline.c b/urigetline.c
new file mode 100644
index 0000000..2d6cd28
--- /dev/null
+++ b/urigetline.c
@@ -0,0 +1,118 @@
+#include "uri.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fnmatch.h>
+
+#define LINE_LENGTH 1024
+
+#define MATCH_UNEXIST 0
+#define MATCH_PATTERN 1
+#define MATCH_REVERSE -1
+
+// return 1 if the match and rule should have the main function print this URL.
+int match(char rule,char *part,char *arg) {
+ switch(rule) {
+  case MATCH_UNEXIST:
+   if(part == 0) return 1;
+   break;
+  case MATCH_REVERSE:
+   if(part) {
+     if(!fnmatch(arg,part,FNM_NOESCAPE)) return 0;
+     else return 1;
+   }
+   break;
+  case MATCH_PATTERN:
+   if(part) if(!fnmatch(arg,part,FNM_NOESCAPE)) return 1;
+   break;
+  default:
+   fprintf(stderr,"oh god. what the hell happened to get here?\n");
+   break;
+ }
+ return 0;
+}
+
+int main(int argc,char *argv[]) {//argument needs to be the URI
+  int j;
+  int ret=1;
+  struct uri u;
+  char matches;
+  char rule=MATCH_PATTERN;
+  char *line=malloc(LINE_LENGTH);
+  char *a,*b,*c;
+  char *command;
+  char all=0;
+  if(argc > 1) {
+    if(!strcmp(argv[1],"-a")) {
+      all=1;
+      argv++;
+      argc--;
+    }
+  }
+  if(argc < 2) {
+   fprintf(stderr,"usage: urigetline [-a] uri < uristart.conf\n");
+   return 1;
+  }
+  urifromline(&u,argv[1]);//only argv[1] is a URI?
+  while(fgets(line,LINE_LENGTH-1,stdin)) {//each line comes from the config. we need to split it on spaces.
+    if(strchr(line,'\r')) *strchr(line,'\r')=0;
+    if(strchr(line,'\n')) *strchr(line,'\n')=0;
+    if(*line == '#') continue; //skip this line too. comment.
+    if((command=strchr(line,':'))) {
+      *command=0;
+      command++;
+      while(*command == '\t') command++;
+    } else continue;//skip this line. needs at least one : to work..
+    a=line;
+    matches=1;
+    for(;a;) {
+      rule=MATCH_PATTERN;
+      j=0;
+      switch(a[0]) {
+        case '-': j=1; rule=MATCH_PATTERN; break;
+        case 'n': j=1; rule=MATCH_UNEXIST; break;
+        case 'r': j=1; rule=MATCH_REVERSE; break;
+        default: break;
+      }
+      b=0;
+      c=0;
+      if(rule != MATCH_UNEXIST) {
+        if(!(b=strchr(a,' '))) {
+          fprintf(stderr,"argument '%s' wants a value in the next argument and didn't get it. throwing a fit.\n",a);
+          return 2;
+        }
+        *b=0; b++;
+        if((c=strchr(b,' '))) {//not required
+          *c=0; c++;
+        }
+      } else {
+        if((c=strchr(a,' '))) {
+          *c=0; c++;
+        }
+      }
+      switch(a[j]) {
+        case 's': if(!match(rule,u.scheme,b))       { matches=0;} break;
+        case 'u': if(!match(rule,u.username,b))     { matches=0;} break;
+        case 'k': if(!match(rule,u.password,b))     { matches=0;} break;
+        case 'd': if(!match(rule,u.domain,b))       { matches=0;} break;
+        case 'P': if(!match(rule,u.port,b))         { matches=0;} break;
+        case 'p': if(!match(rule,u.path,b))         { matches=0;} break;
+        case 'q': if(!match(rule,u.query_string,b)) { matches=0;} break;
+        case 'f': if(!match(rule,u.fragment_id,b))  { matches=0;} break;
+          break;
+        default:
+          fprintf(stderr,"unknown url part letter! %s\n",a);
+          return 3;
+      }
+      //if(b) printf("two: %s %s\n",a,b);
+      //else printf("one: %s\n",a);
+      a=c;
+    }
+    if(matches) {
+      //printf("comm: %s\n",command);
+      printf("%s\n",command);
+      if(!all) return ret;//bail early if we only need first match
+    }
+  }
+  return ret;
+}
diff --git a/urigetline.sh b/urigetline.sh
new file mode 100755
index 0000000..8589880
--- /dev/null
+++ b/urigetline.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+### this script reads a config from stdin
+### and does multiple urimatches on the uri argument.
+### can optionall use -a to output all matching.
+### stdin format is:
+### [^\t]+:[ \t]+command\n
+
+if [ "$1" = "-a" ];then
+  all=1
+  shift
+fi
+uri="$1"
+### old way.
+# line="$(grep "^${scheme}:" ~/.config/uristart.conf | cut -d: -f2- | sed 's/^[ \t]*//g' | sed 's/\\/\\\\/g')"
+
+### going to use urimatchpairs until I get the whole thing rewritten in C
+grep '^[^#]' \
+  | while read -r l;do
+      uritmp="$uri"
+      uritmp="$(urimatchpairs $(printf "%s\n" "$l" | cut -d: -f1) \
+        | while read -r a b;do
+            #printf "pair: '%s' '%s'\n" "$a" "$b" >&2
+            uritmp="$(printf "%s\n" "${uritmp}" | urimatch $a $b)"
+            printf "%s\n" "${uritmp}"
+          done | tail -n1)"
+      if [ "$uritmp" ];then
+          #printf 'matching line: %s\n' "$l" >&2
+          printf "%s\n" "$l"
+          if [ ! "$all" ];then
+               break
+          fi
+      fi
+    done \
+  | cut -d: -f2- \
+  | sed 's/^[ \t]*//g' \
+  | sed 's/\\/\\\\/g'
+
+### previous version that doesn't work right for some of the match syntaxes
+#grep '^[^#]' \
+#  | while read -r l;do
+#        uritmp="$uri"
+#        uritmp="$(printf "%s\n" "$l" \
+#          | cut -d: -f1 \
+#          | tr ' ' '\n' \
+#          | paste '-d ' - - \
+#          | while read -r a b;do
+#                uritmp="$(printf "%s\n" "${uritmp}" | urimatch $a $b)"
+#                printf "%s\n" "$uritmp"
+#            done | tail -n1)"
+#        if [ "$uritmp" ];then
+#                printf 'matching line: %s\n' "$l" >&2
+#                printf '%s\n' "$l"
+#                if [ ! "$all" ]; then
+#                        break
+#	        fi
+#        fi
+#    done \
+#  | cut -d: -f2- \
+#  | sed 's/^[ \t]*//g' \
+#  | sed 's/\\/\\\\/g'
diff --git a/urimatchpairs.c b/urimatchpairs.c
new file mode 100644
index 0000000..6da5d92
--- /dev/null
+++ b/urimatchpairs.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+
+int main(int argc,char *argv[]) {
+  int i,j;
+  for(i=1;i<argc;i+=2) {
+    switch(argv[i][0]) {
+      case '-': j=1; break;
+      case 'n': j=1; printf("%s\n",argv[i]); i--; continue;
+      case 'r': j=1; printf("r"); break;
+      default:
+    }
+    switch(argv[i][j]) {
+      case 's':
+      case 'u':
+      case 'k':
+      case 'd':
+      case 'P':
+      case 'p':
+      case 'q':
+      case 'f': printf("%c %s\n",argv[i][j],argv[i+1]);
+    }
+  }
+}
author	epoch <epoch@enzo.thebackupbox.net>	2021-11-14 04:26:25 +0000
committer	epoch <epoch@enzo.thebackupbox.net>	2021-11-14 04:26:25 +0000
commit	a0ef1e229f20ea97d5538c8c4a8f1b6045af0c0c (patch)
tree	d4bf05ad846af0f6a22c49a8ac50cf24a01b96fa
parent	e43385e2cd39e7f31d594b348228d62b41515a68 (diff)
download	uritools-a0ef1e229f20ea97d5538c8c4a8f1b6045af0c0c.tar.gz uritools-a0ef1e229f20ea97d5538c8c4a8f1b6045af0c0c.zip