From 6f402e2d2f052972886712f60d592684c8671982 Mon Sep 17 00:00:00 2001
From: epoch <epoch@hacking.allowed.org>
Date: Sat, 20 Apr 2019 05:32:27 -0500
Subject: rebased on an old copy of this repo. renamed everything. rewrote the
 uri parser. added uricmp. wew.

---
 .gitignore         |   7 +-
 Makefile           |  34 +++----
 cuturl.c           | 202 -----------------------------------------
 matchurl.c         |  69 --------------
 printfurl          |   5 -
 start              |   4 -
 start.conf.example |   8 --
 uri.h              | 253 +++++++++++++++++++++++++++++++++++++++++++++++++++
 uricmp.c           |  20 ++++
 uricut.c           | 158 ++++++++++++++++++++++++++++++++
 uriescape.c        |  14 +++
 urimatch.c         |  70 ++++++++++++++
 uriprintf          |   5 +
 uristart           |   4 +
 uriunescape.c      |  12 +++
 url.h              | 261 -----------------------------------------------------
 urlescape.c        |  14 ---
 urlunescape.c      |  12 ---
 18 files changed, 559 insertions(+), 593 deletions(-)
 delete mode 100644 cuturl.c
 delete mode 100644 matchurl.c
 delete mode 100755 printfurl
 delete mode 100755 start
 delete mode 100644 start.conf.example
 create mode 100644 uri.h
 create mode 100644 uricmp.c
 create mode 100644 uricut.c
 create mode 100644 uriescape.c
 create mode 100644 urimatch.c
 create mode 100755 uriprintf
 create mode 100755 uristart
 create mode 100644 uriunescape.c
 delete mode 100644 url.h
 delete mode 100644 urlescape.c
 delete mode 100644 urlunescape.c

diff --git a/.gitignore b/.gitignore
index 805d9dc..499d9be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
-matchurl
-cuturl
+uricmp
+uricut
+uriescape
+urimatch
+uriunescape
diff --git a/Makefile b/Makefile
index 7edc560..b67a2d6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,26 +1,28 @@
-CFLAGS=-std=c99 -pedantic -Wall
-PREFIX=/usr/local
-CC=gcc
+CFLAGS:=-std=c11 -pedantic -Wall
+PREFIX:=/usr/local
+CC:=gcc
 
-all: cuturl matchurl urlunescape urlescape
+all: uricut urimatch uriunescape uriescape uricmp
 
-matchurl: matchurl.c url.h
+urimatch: urimatch.c uri.h
 
-cuturl: cuturl.c url.h
+uricut: uricut.c uri.h
 
-urlunescape: urlunescape.c url.h
+uricmp: uricmp.c uri.h
 
-urlescape: urlescape.c url.h
+uriunescape: uriunescape.c uri.h
+
+uriescape: uriescape.c uri.h
 
 clean:
-	rm -f matchurl
-	rm -f cuturl
+	rm -f uricut urimatch uriunescape uriescape uricmp
 	rm -f *.o
 
 install: all
-	install matchurl $(PREFIX)/bin/matchurl
-	install cuturl $(PREFIX)/bin/cuturl
-	install start $(PREFIX)/bin/start
-	install printfurl $(PREFIX)/bin/printfurl
-	install urlunescape $(PREFIX)/bin/urlunescape
-	install urlescape $(PREFIX)/bin/urlescape
+	install urimatch $(PREFIX)/bin/urimatch
+	install uricut $(PREFIX)/bin/uricut
+	install uricmp $(PREFIX)/bin/uricmp
+	install uristart $(PREFIX)/bin/uristart
+	install uriprintf $(PREFIX)/bin/uriprintf
+	install uriunescape $(PREFIX)/bin/uriunescape
+	install uriescape $(PREFIX)/bin/uriescape
diff --git a/cuturl.c b/cuturl.c
deleted file mode 100644
index f1cbf42..0000000
--- a/cuturl.c
+++ /dev/null
@@ -1,202 +0,0 @@
-#include <netdb.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/wait.h>
-
-#include "url.h"
-
-#define MAGIC
-
-/*
- schemes are case sensitive but cononicals are lower case.
- domain is case insensitive. return it lowercased?
- port is optional and in decimal
- path
- scheme://username:password@domain:port/path?query_string#fragment_id
- mailto:username@domain
-
- optional stuff:
- scheme, username, password, port, path, query_string, fragment_id
-*/
-
-#define AorB(a,b) ((a)?(a):(b))
-
-#define F_SCHEME 1<<0
-#define F_USERNAME 1<<1
-#define F_PASSWORD 1<<2
-#define F_DOMAIN 1<<3
-#define F_PORT 1<<4
-#define F_PATH 1<<5
-#define F_QUERY_STRING 1<<6
-#define F_FRAGMENT_ID 1<<7
-#define F_WHOLE_URL 1<<8
-
-char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URL",0};
-char *short_opts[]={"s","u","k","d","P","p","q","f","U"};
-
-int main(int argc,char *argv[]) {
- char *url;
- char *name[2];
- char *line=0;
- short args[256];//this needs to be a short to make room for the F_WHOLE_URL
- int i,j,c=0;
- int size=1024;
- int status;
- char fixme=0;
- char using_stdin=1;
- char malloced=0;
- struct url u;
- if(argc > 1) {
-  if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) {
-   printf("usage: echo urls | cuturl [options]\n");
-   printf("usage: cuturl [options] url [options] [url]\n\n");
-   printf("options: \n");
-   for(i=0;long_opts[i];i++) {
-    printf("        -%s|--%s\n",short_opts[i],long_opts[i]);
-   }
-   printf("To set default values use environment variables like: CUTURL_[OPTION]\n");
-   return 2;
-  }
- }
- argv++;
- argc--;
- while(1) {
-  u.scheme=0;
-  u.username=0;
-  u.password=0;
-  u.domain=0;
-  u.port=0;
-  u.path=0;
-  u.query_string=0;
-  u.fragment_id=0;
-  if(!using_stdin) c=0;
-  if(argc >= 1) {
-   for(;argc>0;argc--,argv++) {
-    for(i=0;long_opts[i];i++) {
-     if(!strncmp(*argv,"--",2)) {
-      if(!strcmp(*argv+2,long_opts[i])) {
-       args[c]=1<<i;
-       c++;
-       break;
-      }
-     }
-    }
-    fixme=0;
-    if(**argv=='-' && argv[0][1] != '-') {
-     for(j=1;argv[0][j];j++) {
-      for(i=0;short_opts[i];i++) {
-       if(argv[0][j]==*short_opts[i]) {
-        args[c]=1<<i;
-        c++;
-        fixme=1;
-       }
-      }
-     }
-    }
-    if(fixme) continue;
-    if(long_opts[i]) continue;
-    //if we get here we are at data instead of flags. work on it.
-    line=*argv;
-    using_stdin=0;
-    argc--;
-    argv++;
-    break;
-   }
-  }
-  if(!argc && !line) {//if we are out of arguments and it didn't include data
-   using_stdin=1;
-  }
-  if(using_stdin) {
-   line=malloc(size+1);
-   malloced=1;
-   if(!fgets(line,size,stdin)) {
-    return 0;
-   }
-  }
-  if(!line) return 0;
-  for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++);
-  line[i]=0;
-
-  url=strdup(line);
-  urlfromline(&u,line);
-
-  // printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n");
-  //let's set them to what'll get printed now...
-
-#ifdef MAGIC
-  magic_and_defaults(&u);
-/*
-  u.scheme=AorB(u.scheme,AorB(getenv("CUTURL_SCHEME"),"DEFAULT"));
-  u.username=AorB(u.username,AorB(getenv("CUTURL_USERNAME"),"DEFAULT"));
-  u.password=AorB(u.password,AorB(getenv("CUTURL_PASSWORD"),"DEFAULT"));
-  u.domain=AorB(u.domain,AorB(getenv("CURURL_DOMAIN"),"DEFAULT"));
-  serv=getservbyname(u.scheme,strcmp(u.scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80
-  if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port));
-  u.port=AorB(u.port,AorB(getenv("CUTURL_PORT"),(serv?sport:"DEFAULT")));
-  u.path=AorB(u.path,AorB(getenv("CUTURL_PATH"),"DEFAULT"));
-  u.query_string=AorB(u.query_string,AorB(getenv("CUTURL_QUERY_STRING"),"DEFAULT"));
-  u.fragment_id=AorB(u.fragment_id,AorB(getenv("CUTURL_FRAGMENT_ID"),"DEFAULT"));
-*/
-#endif
-
-  if((name[0]=getenv("CUTURL__"))) {
-   setenv("CUTURL__SCHEME",u.scheme,1);
-   setenv("CUTURL__USERNAME",u.username,1);
-   setenv("CUTURL__PASSWORD",u.password,1);
-   setenv("CUTURL__DOMAIN",u.domain,1);
-   setenv("CUTURL__PORT",u.port,1);
-   setenv("CUTURL__PATH",u.path,1);
-   setenv("CUTURL__QUERY_STRING",u.query_string,1);
-   setenv("CUTURL__FRAGMENT_ID",u.fragment_id,1);
-   name[1]=0;
-   switch(fork()) {
-    case 0:
-     execv(name[0],name);
-     perror("execv");
-     return errno;
-    case -1:
-     perror("fork");
-     return errno;
-    default:
-     break;
-   }
-   wait(&status);
-  } else {
-   if(c) {
-    for(i=0;i<c;i++) {
-     if(args[i]&F_SCHEME) printf("%s\n",AorB(u.scheme,""));
-     if(args[i]&F_USERNAME) printf("%s\n",AorB(u.username,""));
-     if(args[i]&F_PASSWORD) printf("%s\n",AorB(u.password,""));
-     if(args[i]&F_DOMAIN) printf("%s\n",AorB(u.domain,""));
-     if(args[i]&F_PORT) printf("%s\n",AorB(u.port,""));
-     if(args[i]&F_PATH) printf("%s\n",AorB(u.path,""));
-     if(args[i]&F_QUERY_STRING) printf("%s\n",AorB(u.query_string,""));
-     if(args[i]&F_FRAGMENT_ID) printf("%s\n",AorB(u.fragment_id,""));
-     if(args[i]&F_WHOLE_URL) printf("%s\n",url);
-    }
-   } else {
-    printf("scheme: %s\n",u.scheme);
-    printf("username: %s\n",u.username);
-    printf("password: %s\n",u.password);
-    printf("domain: %s\n",u.domain);
-    printf("port: %s\n",u.port);
-    printf("path: %s\n",u.path);
-    printf("query_string: %s\n",u.query_string);
-    printf("fragment_id: %s\n",u.fragment_id);
-    printf("whole_url: %s\n",url);
-   }
-  }
-  free(url);//this is definitely malloc()d
-  if(malloced) {
-   free(line);
-   malloced=0;
-   line=0;
-  } else {
-   line=0;//???
-  }
- }
- return 0;
-}
diff --git a/matchurl.c b/matchurl.c
deleted file mode 100644
index 47a2a20..0000000
--- a/matchurl.c
+++ /dev/null
@@ -1,69 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "url.h"
-
-#define LINE_LENGTH 1024
-
-int match(char negate,char *part,char *arg) {
- if(negate) {
-  if(part == 0) return 1;//we found that the part isn't here!
- } else {
-  if(part) {
-   if(!strcmp(part,arg)) return 1;
-  }
- }
- return 0;
-}
-
-int main(int argc,char *argv[]) {
- int i;
- int ret=1;
- struct url u;
- char negate=0;
- char *line=malloc(LINE_LENGTH);
- char copy[LINE_LENGTH];
- if(argc < 2) {
-  printf("usage: matchurl [-][n][s|u|k|d|D|P|p|q|f] [string]\n");
-  printf("scheme://username:password@domain:port/path?query_string#fragment_id\n");
-  printf("s://u:k@d:P/p?q#f\n");
-  printf("The D flag is special. it matches its argument against the last bytes of the input url's domain.\n");
-  printf("This allows matching of subdomains, like `echo epoch.ano | matchurl -D ano` would match.\n");
-  printf("the 'n' flag can be put before any of the other flags to check for a missing.\n");
-  return 1;
- }
- while(fgets(line,LINE_LENGTH-1,stdin)) {
-  if(strchr(line,'\r')) *strchr(line,'\r')=0;
-  if(strchr(line,'\n')) *strchr(line,'\n')=0;
-  strcpy(copy,line);
-  memset(&u,0,sizeof(u));
-  urlfromline(&u,line);
-  //use the character in argv[1] to match stdin against argv[2]. if match print whole line.
-  for(i=1;i<argc;i+=2) {
-   if(negate) {i--;}//we didn't really need to go that far.
-   negate=0;
-   if(argv[i][0] == '-') argv[i]++;
-   if(argv[i][0] == 'n') {argv[i]++; negate=1; }//heh.
-   switch(argv[i][0]) {
-    case 's': if(match(negate,u.scheme,argv[i+1]))       { printf("%s\n",copy); ret=0;} break;
-    case 'u': if(match(negate,u.username,argv[i+1]))     { printf("%s\n",copy); ret=0;} break;
-    case 'k': if(match(negate,u.password,argv[i+1]))     { printf("%s\n",copy); ret=0;} break;
-    case 'd': if(match(negate,u.domain,argv[i+1]))       { printf("%s\n",copy); ret=0;} break;
-    case 'P': if(match(negate,u.port,argv[i+1]))         { printf("%s\n",copy); ret=0;} break;
-    case 'p': if(match(negate,u.path,argv[i+1]))         { printf("%s\n",copy); ret=0;} break;
-    case 'q': if(match(negate,u.query_string,argv[i+1])) { printf("%s\n",copy); ret=0;} break;
-    case 'f': if(match(negate,u.fragment_id,argv[i+1]))  { printf("%s\n",copy); ret=0;} break;
-    case 'D': //not sure how to look for a missing one of these. it'd be like d.
-     if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) 
-      printf("%s\n",copy);
-      ret=0;
-     break;
-    default:
-     printf("unknown url part letter! '%c'\n",argv[i][0]);
-     return ret;
-   }
-  }
- }
- return ret;
-}
-
diff --git a/printfurl b/printfurl
deleted file mode 100755
index c141bc1..0000000
--- a/printfurl
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-cutargs="$(printf "%s\n" "$1" | sed 's/%[^sukdPpqfU]//g' | tr '%' '\n' | tail -n+2 | sed 's/^\(.\).*/-\1/g' | tr '\n' ' ')"
-count="$(echo $cutargs | tr '-' '\n' | grep -c .)"
-printfargs="$(printf "%s\n" "$1" | sed 's/%[sukdPpqfU]/%s/g')"
-cuturl "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs"
diff --git a/start b/start
deleted file mode 100755
index 9a3dca6..0000000
--- a/start
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-scheme="$(printf "%s\n" "$1" | cuturl -s)"
-line="$(grep "^${scheme}:" ~/.config/start.conf | cut -d: -f2-)"
-eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | printfurl "$line")"
diff --git a/start.conf.example b/start.conf.example
deleted file mode 100644
index 6864da3..0000000
--- a/start.conf.example
+++ /dev/null
@@ -1,8 +0,0 @@
-### start single-quote escapes the url parts so place them inside single-quote or else!
-### (if you're given a bad link someone might be able to run shell commands)
-finger:printf "%%s\r\n" '%p' | ncat '%d' 79 | tr -d '\r' | xmessage -file -
-### new! subshells works
-whois:whois "$(printf '%%s\\\\n' '%d' | sed 's/^..*$/-h/')" '%d' '%p' | xmessage -file -
-irc:x-terminal-emulator -e irssi -c '%d' -p '%P'
-http:dillo '%U'
-DEFAULT:xdg-open '%U'
diff --git a/uri.h b/uri.h
new file mode 100644
index 0000000..97ce3c2
--- /dev/null
+++ b/uri.h
@@ -0,0 +1,253 @@
+#ifndef uri_H
+#define uri_H
+
+#define _XOPEN_SOURCE 500 //for strdup
+#include <string.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+//uri_reserved = gen-delims / sub-delims
+#define pe_gen_delims ":/?#[]@"
+#define pe_sub_delims "!$&'()*+,;="
+//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims; 
+#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+#define pe_DIGIT "0123456789"
+#define pe_HPUT "-._~"
+//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT;
+
+unsigned char rfc3086_percent_encoding[256];
+
+#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F'))
+#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a)
+
+char *uri_reserved={
+  pe_gen_delims
+  pe_sub_delims
+  pe_ALPHA
+  pe_DIGIT
+  pe_HPUT
+};
+
+int uriescapelength(char *in,int len) {
+  int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end.
+  int i;
+  for(i=0;i<len;i++) {
+    rlen+=strchr(uri_reserved,in[i])?1:3;
+  }
+  return rlen;
+}
+
+// make sure your out char * has enough space! use uriescapelength for it.
+void uriescape(char *in,char *out,int len) {
+  int i;
+  int j;
+  for(i=0,j=0;i<len;i++) {
+    if(strchr(uri_reserved,in[i])) {
+      out[j]=in[i];
+      j++;
+    } else {
+      out[j]='%';
+      j++;
+      out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)];
+      j++;
+      out[j]="0123456789ABCDEF"[(in[i] % 16)];
+      j++;
+    }
+  }
+}
+
+int uriunescape(char *in,char *out) {
+ char *o=out;
+ char *t;
+ char a,b;
+ char *s=in;
+ if(!strchr(s,'%')) memmove(out,in,strlen(in));
+ while((t=strchr(s,'%'))) {
+  if(t-s) {//if there are actually bytes to copy.
+   memmove(o,s,t-s);
+   o+=(t-s);
+   s+=(t-s);
+  }
+  if(isxdigit(t[1]) && isxdigit(t[2])) {
+   s+=3;//skip the %XX
+   a=toupper(t[1]);
+   b=toupper(t[2]);
+   *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10); 
+   o++;
+  } else {
+   s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is.
+   *o='%';
+   o++;
+  }
+ }
+ //copy the last part.
+ memmove(o,s,strlen(s));
+ o[strlen(s)]=0;
+ return o+strlen(s)-out;
+}
+
+struct uri {//warning. it is technically undefined behavior to set one half of a union then use the other half.
+  union {
+    char *A[8];
+    struct {
+      union { char *s;char *scheme; };
+      union { char *u;char *username; };
+      union { char *k;char *password; };
+      union { char *d;char *domain; };
+      union { char *P;char *port; };
+      union { char *p;char *path; };
+      union { char *q;char *query_string; };
+      union { char *f;char *fragment_id; };
+    };
+  };
+};
+
+//returns 0 on success, returns a byte with bits set for non-matching pieces.
+unsigned int uricmp(struct uri *a,struct uri *b) {
+  int i;
+  int ret=0;
+  for(i=0;i<8;i++) {
+    if(a->A[i] && !b->A[i]) ret |=(1<<(i+8));//we have a's but not b's
+    if(!a->A[i] && b->A[i]) ret |=(1<<(i+16));
+    //for testing if(!a->A[i] && !b->A[i]) ret |=(1<<(i+24));//no problem here. both empty.
+    if(a->A[i] && b->A[i]) {
+      if(strcmp(a->A[i],b->A[i])) {
+        ret|=(1<<i);
+      }
+    }
+  }
+  return ret;
+}
+
+/*
+ schemes are case sensitive but cononicals are lower case.
+ domain is case insensitive. return it lowercased?
+ port is optional and in decimal
+ path
+ scheme://username:password@domain:port/path?query_string#fragment_id
+ mailto:username@domain
+
+ optional stuff:
+ scheme, username, password, port, path, query_string, fragment_id
+*/
+
+//should it be a dick about what characters are allowed?
+//should it just try to ignore weird shit?
+
+//return 0 on fail //not sure what this means.
+//return 1 on success
+int urifromline(struct uri *u,char *line) {
+  //these first two are easy. the rest... not so much.
+  char *t;
+//  memset(u,0,sizeof(struct uri)); //this function shouldn't do this.
+  if((u->fragment_id=strchr(line,'#'))) {
+    *u->fragment_id=0;
+    u->fragment_id++;
+  }
+  if((u->query_string=strchr(line,'?'))) {
+    *u->query_string=0;
+    u->query_string++;
+  }
+  //now we have scheme, user, pass, domain, port, and path. maybe.
+  //what character can we split on now? : is a terrible choice.
+  // how about /? first / is either a separator between scheme
+  //could find the first non-scheme character.
+  //so we might have... scheme://user:pass@host:port/path
+  //or... user:pass@host:port/path ?
+  //we need to do this based on /s
+  // we're either going to find the scheme and authority separator
+  // or we're going to find the start of a path.
+  //there: scheme:/path, scheme://host (empty path), or scheme:path/morepath
+  //or...  should we do paths without
+  //scheme must start with a-z
+/*  if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe
+    u->path=line;
+    return;//we're done. nothing else to do.
+  }
+  if(*line == '.') { //we have a relative path like: ./derp or ../merp
+    u->path=line;
+    return;//we're done here. nothing else to do.
+  }*/
+  //let's see if this starts with a scheme
+  if(strchr(line,':') && ((*line >= 'a' && *line <= 'z') || (*line >= 'A' && *line <= 'Z'))) {
+    for(u->scheme=strchr(line,':')-1;u->scheme > line;u->scheme--) {
+      if((*u->scheme >= 'a' && *u->scheme <= 'z') ||
+         (*u->scheme >= 'A' && *u->scheme <= 'Z') ||
+         (*u->scheme >= '0' && *u->scheme <= '9') ||
+         *u->scheme == '+' || *u->scheme == '-' || *u->scheme == '.') {
+        //this is still a scheme.
+      } else {
+        break;
+      }
+    }
+    if(u->scheme == line) {//we got through the for loop alright. line starts with a scheme.
+      line=strchr(line,':');
+      *line=0;
+      line++;
+      for(t=u->scheme;*t;t++) {
+        if(*t >= 'A' && *t <= 'Z') *t+=' ';
+      }
+    }
+  }
+
+  //copy-pasted from above the scheme strip attempt.
+  if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe
+    u->path=line;
+    return 1;//we're done. nothing else to do.
+  }
+  if(*line == '.') { //we have a relative path like: ./derp or ../merp
+    u->path=line;
+    return 1;//we're done here. nothing else to do.
+  }
+
+  if(*line == '/' && line[1] == '/') {//we have an authority section.
+    //let's left-shift this shit over until the third /
+    for(t=line+1;*(t+1) && *(t+1) != '/';t++) {
+      *t=*(t+1);
+    }
+    *t=0;
+    u->path=t+1;//if there was a /, path points at it and the stuff after.
+    //if there wasn't a /, it points at a null byte. so "empty"
+    u->username=line+1;
+  } else {
+    //we have all we need.
+    return 1;
+  }
+
+  if(u->username) {//this contains all of the authority.
+    if((u->domain=strchr(u->username,'@'))) {//we have user@host at least.
+      *u->domain=0;
+      u->domain++;
+    } else {//this isn't really a username. it is the domain.
+      u->domain=u->username;
+      u->username=0;
+    }
+  }
+  //if we still have u->username we try to split to user and password
+  if(u->username) {
+    if((u->password=strchr(u->username,':'))) {
+      *u->password=0;
+      u->password++;
+    }
+  }
+  if(u->domain) {
+    if((u->port=strchr(u->domain,']')) && *u->domain == '[') {//this is an IPv6 host
+      *u->port=0;
+      u->port++;
+      if(*u->port == ':') {
+        *u->port=0;
+        u->port++;//if it ends up being empty, whatever. that's a URI like: http://host:/path
+      }
+    } else { //we're safe to split port off at :
+      if((u->port=strchr(u->domain,':'))) {
+        *u->port=0;
+        u->port++;
+      } //there isn't a port. leave it unset.
+    }
+  }
+  //I dunno.
+  return 1;
+}
+
+#endif
diff --git a/uricmp.c b/uricmp.c
new file mode 100644
index 0000000..9af0fed
--- /dev/null
+++ b/uricmp.c
@@ -0,0 +1,20 @@
+#include "uri.h"
+#include <stdio.h>
+
+int main(int argc,char *argv[]) {
+  int i;
+  int ret;
+  struct uri *a=malloc(sizeof(struct uri));
+  struct uri *b=malloc(sizeof(struct uri));
+  if(argc < 3) {
+    fprintf(stderr,"usage: uricmp uri1 uri2\n");//we didn't ask for usage so it goes to stderr
+    return 1;
+  }
+  urifromline(a,argv[1]);
+  urifromline(b,argv[2]);
+  for(i=0;i<8;i++) {
+    printf("%s ? %s\n",a->A[i],b->A[i]);
+  }
+  printf("%08x\n",ret=uricmp(a,b));
+  return ret > 0 ? 2 : 0;
+}
diff --git a/uricut.c b/uricut.c
new file mode 100644
index 0000000..5fe8764
--- /dev/null
+++ b/uricut.c
@@ -0,0 +1,158 @@
+#include "uri.h"
+
+#include <netdb.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+/*
+ schemes are case sensitive but cononicals are lower case.
+ domain is case insensitive. return it lowercased?
+ port is optional and in decimal
+ path
+ scheme://username:password@domain:port/path?query_string#fragment_id
+ mailto:username@domain
+
+ optional stuff:
+ scheme, username, password, port, path, query_string, fragment_id
+*/
+
+#define AorB(a,b) ((a)?(a):(b))
+
+#define F_SCHEME 1<<0
+#define F_USERNAME 1<<1
+#define F_PASSWORD 1<<2
+#define F_DOMAIN 1<<3
+#define F_PORT 1<<4
+#define F_PATH 1<<5
+#define F_QUERY_STRING 1<<6
+#define F_FRAGMENT_ID 1<<7
+#define F_WHOLE_URI 1<<8
+
+char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URI",0};
+char *short_opts[]={"s","u","k","d","P","p","q","f","U"};
+
+int main(int argc,char *argv[]) {
+ char *uri;
+ char *line=0;
+ short args[256];//this needs to be a short to make room for the F_WHOLE_URI
+ int i,j,c=0;
+ int size=1024;
+ char fixme=0;
+ char using_stdin=1;
+ char malloced=0;
+ struct uri u;
+ if(argc > 1) {
+  if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) {
+   printf("usage: echo uris | uricut [options]\n");
+   printf("usage: uricut [options] uri [options] [uri]\n\n");
+   printf("options: \n");
+   for(i=0;long_opts[i];i++) {
+    printf("        -%s|--%s\n",short_opts[i],long_opts[i]);
+   }
+   printf("To set default values use environment variables like: CUTURI_[OPTION]\n");
+   return 2;
+  }
+ }
+ argv++;
+ argc--;
+ while(1) {
+  u.scheme=0;
+  u.username=0;
+  u.password=0;
+  u.domain=0;
+  u.port=0;
+  u.path=0;
+  u.query_string=0;
+  u.fragment_id=0;
+  if(!using_stdin) c=0;
+  if(argc >= 1) {
+   for(;argc>0;argc--,argv++) {
+    for(i=0;long_opts[i];i++) {
+     if(!strncmp(*argv,"--",2)) {
+      if(!strcmp(*argv+2,long_opts[i])) {
+       args[c]=1<<i;
+       c++;
+       break;
+      }
+     }
+    }
+    fixme=0;
+    if(**argv=='-' && argv[0][1] != '-') {
+     for(j=1;argv[0][j];j++) {
+      for(i=0;short_opts[i];i++) {
+       if(argv[0][j]==*short_opts[i]) {
+        args[c]=1<<i;
+        c++;
+        fixme=1;
+       }
+      }
+     }
+    }
+    if(fixme) continue;
+    if(long_opts[i]) continue;
+    //if we get here we are at data instead of flags. work on it.
+    line=*argv;
+    using_stdin=0;
+    argc--;
+    argv++;
+    break;
+   }
+  }
+  if(!argc && !line) {//if we are out of arguments and it didn't include data
+   using_stdin=1;
+  }
+  if(using_stdin) {
+   line=malloc(size+1);
+   malloced=1;
+   if(!fgets(line,size,stdin)) {
+    return 0;
+   }
+  }
+  if(!line) return 0;
+  for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++);
+  line[i]=0;
+
+  uri=strdup(line);
+  urifromline(&u,line);
+
+  // printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n");
+  //let's set them to what'll get printed now...
+
+   if(c) {
+    for(i=0;i<c;i++) {
+     if(args[i]&F_SCHEME) printf("%s\n",AorB(u.scheme,""));
+     if(args[i]&F_USERNAME) printf("%s\n",AorB(u.username,""));
+     if(args[i]&F_PASSWORD) printf("%s\n",AorB(u.password,""));
+     if(args[i]&F_DOMAIN) printf("%s\n",AorB(u.domain,""));
+     if(args[i]&F_PORT) printf("%s\n",AorB(u.port,""));
+     if(args[i]&F_PATH) printf("%s\n",AorB(u.path,""));
+     if(args[i]&F_QUERY_STRING) printf("%s\n",AorB(u.query_string,""));
+     if(args[i]&F_FRAGMENT_ID) printf("%s\n",AorB(u.fragment_id,""));
+     if(args[i]&F_WHOLE_URI) printf("%s\n",uri);
+    }
+   } else {
+    printf("scheme: %s\n",u.scheme);
+    printf("username: %s\n",u.username);
+    printf("password: %s\n",u.password);
+    printf("domain: %s\n",u.domain);
+    printf("port: %s\n",u.port);
+    printf("path: %s\n",u.path);
+    printf("query_string: %s\n",u.query_string);
+    printf("fragment_id: %s\n",u.fragment_id);
+    printf("whole_uri: %s\n",uri);
+   }
+  free(uri);//this is definitely malloc()d
+  if(malloced) {
+   free(line);
+   malloced=0;
+   line=0;
+  } else {
+   line=0;//???
+  }
+ }
+ return 0;
+}
diff --git a/uriescape.c b/uriescape.c
new file mode 100644
index 0000000..da3da7e
--- /dev/null
+++ b/uriescape.c
@@ -0,0 +1,14 @@
+#include "uri.h"
+#include <stdio.h>
+
+int main(int argc,char *argv[]) {
+  int len;
+  char *out;
+  if(argc < 2) return 1;
+  len=uriescapelength(argv[1],strlen(argv[1]));
+  out=malloc(len+1);
+  uriescape(argv[1],out,len);
+  out[len]=0;
+  printf("%s\n",out);
+  return 0;
+}
diff --git a/urimatch.c b/urimatch.c
new file mode 100644
index 0000000..42ee0aa
--- /dev/null
+++ b/urimatch.c
@@ -0,0 +1,70 @@
+#include "uri.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define LINE_LENGTH 1024
+
+int match(char negate,char *part,char *arg) {
+ if(negate) {
+  if(part == 0) return 1;//we found that the part isn't here!
+ } else {
+  if(part) {
+   if(!strcmp(part,arg)) return 1;
+  }
+ }
+ return 0;
+}
+
+int main(int argc,char *argv[]) {
+ int i;
+ int ret=1;
+ struct uri u;
+ char negate=0;
+ char *line=malloc(LINE_LENGTH);
+ char copy[LINE_LENGTH];
+ if(argc < 2) {
+  printf("usage: urimatch [-][n][s|u|k|d|D|P|p|q|f] [string]\n");
+  printf("scheme://username:password@domain:port/path?query_string#fragment_id\n");
+  printf("s://u:k@d:P/p?q#f\n");
+  printf("The D flag is special. it matches its argument against the last bytes of the input url's domain.\n");
+  printf("This allows matching of subdomains, like `echo epoch.ano | urimatch -D ano` would match.\n");
+  printf("the 'n' flag can be put before any of the other flags to check for a missing.\n");
+  return 1;
+ }
+ while(fgets(line,LINE_LENGTH-1,stdin)) {
+  if(strchr(line,'\r')) *strchr(line,'\r')=0;
+  if(strchr(line,'\n')) *strchr(line,'\n')=0;
+  strcpy(copy,line);
+  memset(&u,0,sizeof(u));
+  urifromline(&u,line);
+  //use the character in argv[1] to match stdin against argv[2]. if match print whole line.
+  for(i=1;i<argc;i+=2) {
+   if(negate) {i--;}//we didn't really need to go that far.
+   negate=0;
+   if(argv[i][0] == '-') argv[i]++;
+   if(argv[i][0] == 'n') {argv[i]++; negate=1; }//heh.
+   switch(argv[i][0]) {
+    case 's': if(match(negate,u.scheme,argv[i+1]))       { printf("%s\n",copy); ret=0;} break;
+    case 'u': if(match(negate,u.username,argv[i+1]))     { printf("%s\n",copy); ret=0;} break;
+    case 'k': if(match(negate,u.password,argv[i+1]))     { printf("%s\n",copy); ret=0;} break;
+    case 'd': if(match(negate,u.domain,argv[i+1]))       { printf("%s\n",copy); ret=0;} break;
+    case 'P': if(match(negate,u.port,argv[i+1]))         { printf("%s\n",copy); ret=0;} break;
+    case 'p': if(match(negate,u.path,argv[i+1]))         { printf("%s\n",copy); ret=0;} break;
+    case 'q': if(match(negate,u.query_string,argv[i+1])) { printf("%s\n",copy); ret=0;} break;
+    case 'f': if(match(negate,u.fragment_id,argv[i+1]))  { printf("%s\n",copy); ret=0;} break;
+    case 'D': //not sure how to look for a missing one of these. it'd be like d.
+     if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) {
+      printf("%s\n",copy);
+      ret=0;
+     }
+     break;
+    default:
+     printf("unknown url part letter! '%c'\n",argv[i][0]);
+     return ret;
+   }
+  }
+ }
+ return ret;
+}
+
diff --git a/uriprintf b/uriprintf
new file mode 100755
index 0000000..f79d067
--- /dev/null
+++ b/uriprintf
@@ -0,0 +1,5 @@
+#!/bin/sh
+cutargs="$(printf "%s\n" "$1" | sed 's/%[^sukdPpqfU]//g' | tr '%' '\n' | tail -n+2 | sed 's/^\(.\).*/-\1/g' | tr '\n' ' ')"
+count="$(echo $cutargs | tr '-' '\n' | grep -c .)"
+printfargs="$(printf "%s\n" "$1" | sed 's/%[sukdPpqfU]/%s/g')"
+uricut "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs"
diff --git a/uristart b/uristart
new file mode 100755
index 0000000..78454b7
--- /dev/null
+++ b/uristart
@@ -0,0 +1,4 @@
+#!/bin/sh
+scheme="$(printf "%s\n" "$1" | uricut -s)"
+line="$(grep "^${scheme}:" ~/.config/uristart.conf | cut -d: -f2- | sed 's/^[ \t]//g')"
+eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | uriprintf "$line")"
diff --git a/uriunescape.c b/uriunescape.c
new file mode 100644
index 0000000..aafc6ea
--- /dev/null
+++ b/uriunescape.c
@@ -0,0 +1,12 @@
+#include "uri.h"
+#include <unistd.h>
+
+int main(int argc,char *argv[]) {
+  int len;
+  for(argv++,argc--;argc;argc--,argv++) {
+   len=uriunescape(*argv,*argv);
+   write(1,*argv,len);
+   if(argc-1) write(1," ",1);
+  }
+  return 0;
+}
diff --git a/url.h b/url.h
deleted file mode 100644
index 6304a48..0000000
--- a/url.h
+++ /dev/null
@@ -1,261 +0,0 @@
-#ifndef URL_H
-#define URL_H
-
-#include <netdb.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-//reserved = gen-delims / sub-delims
-#define pe_gen_delims ":/?#[]@"
-#define pe_sub_delims "!$&'()*+,;="
-//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims; 
-#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
-#define pe_DIGIT "0123456789"
-#define pe_HPUT "-._~"
-//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT;
-
-unsigned char rfc3086_percent_encoding[256];
-
-#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F'))
-#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a)
-
-char *reserved={
-  pe_gen_delims
-  pe_sub_delims
-  pe_ALPHA
-  pe_DIGIT
-  pe_HPUT
-};
-
-int urlescapelength(char *in,int len) {
-  int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end.
-  int i;
-  for(i=0;i<len;i++) {
-    rlen+=strchr(reserved,in[i])?1:3;
-  }
-  return rlen;
-}
-
-// make sure your out char * has enough space! use urlescapelength for it.
-void urlescape(char *in,char *out,int len) {
-  int i;
-  int j;
-  for(i=0,j=0;i<len;i++) {
-    if(strchr(reserved,in[i])) {
-      out[j]=in[i];
-      j++;
-    } else {
-      out[j]='%';
-      j++;
-      out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)];
-      j++;
-      out[j]="0123456789ABCDEF"[(in[i] % 16)];
-      j++;
-    }
-  }
-}
-
-int urlunescape(char *in,char *out) {
- char *o=out;
- char *t;
- char a,b;
- char *s=in;
- if(!strchr(s,'%')) memmove(out,in,strlen(in));
- while((t=strchr(s,'%'))) {
-  if(t-s) {//if there are actually bytes to copy.
-   memmove(o,s,t-s);
-   o+=(t-s);
-   s+=(t-s);
-  }
-  if(isxdigit(t[1]) && isxdigit(t[2])) {
-   s+=3;//skip the %XX
-   a=toupper(t[1]);
-   b=toupper(t[2]);
-   *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10); 
-   o++;
-  } else {
-   s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is.
-   *o='%';
-   o++;
-  }
- }
- //copy the last part.
- memmove(o,s,strlen(s));
- o[strlen(s)]=0;
- return o+strlen(s)-out;
-}
-
-struct url {
- char *scheme;
- char *username;
- char *password;
- char *domain;
- char *port;
- char *path;
- char *query_string;
- char *fragment_id;
-};
-
-
-/*
- schemes are case sensitive but cononicals are lower case.
- domain is case insensitive. return it lowercased?
- port is optional and in decimal
- path
- scheme://username:password@domain:port/path?query_string#fragment_id
- mailto:username@domain
-
- optional stuff:
- scheme, username, password, port, path, query_string, fragment_id
-*/
-
-void urlfromline(struct url *u,char *line) {
- int i;
- char hack=0;//we need to allow for // as host//path separator
-  //split at first single / into line and path
-  //this fails to split scheme://host//path into: scheme, host, /path. needs to be first single / or second double-or-more-/
-  for(i=0;line[i];i++) {
-   if(line[i] == '/' && line[i+1] == '/') {
-    if(!hack) {//only skip out on the first // because it is probably used in the scheme.
-     hack=1;
-     i++;
-     continue;
-    }
-   }
-   if(line[i] == '/') {
-    line[i]=0;
-    u->path=line+i+1;
-    break;
-   }
-  }
-  if(u->path) {
-   if(strchr(u->path,'?')) {
-    u->query_string=strchr(u->path,'?');
-    *u->query_string=0;
-    u->query_string++;
-   }
-  }
-
-  if(u->query_string) {
-   if(strchr(u->query_string,'#')) {
-    u->fragment_id=strchr(u->query_string,'#');
-    *u->fragment_id=0;
-    u->fragment_id++;
-   }
-  }
-
-  if(strstr(line,"://")) {
-   u->scheme=line;
-   u->domain=strstr(line,"://");
-   *u->domain=0;
-   u->domain+=3;
-  } else {
-   u->domain=line;
-  }
-
-  if(u->domain) {
-   if(strchr(u->domain,'@')) {
-    u->username=u->domain;
-    u->domain=strchr(u->domain,'@');
-    *u->domain=0;
-    u->domain++;
-   }
-  }
-
-  if(u->username) {
-   if(strchr(u->username,':')) {
-    u->password=strchr(u->username,':');
-    *u->password=0;
-    u->password++;
-   }
-  }
-
-  if(u->domain) {
-   if(strchr(u->domain,']')) {//the end of an IPv6 address
-    if(strchr(strchr(u->domain,']'),':')) {
-     u->port=strchr(strchr(u->domain,']'),':');
-     if(u->port[1] == '?') {//for magnet links
-      u->port=0;
-     } else {
-      *u->port=0;
-      u->port++;
-     }
-    }
-   } else {
-    if(strchr(u->domain,':')) {
-     u->port=strchr(u->domain,':');
-     if(u->port[1] == '?') {//for magnet links
-      u->port=0;
-     } else {
-      *u->port=0;
-      u->port++;
-     }
-    }
-   }
-  }
-  if(u->port) {
-   for(i=0;u->port[i];i++) {
-    if(u->port[i] < '0' || u->port[i] > '9') {
-     //this port number isn't a number!
-     //it is probably a different portion of the url then... and the domain is probably the scheme.
-     if(u->domain && !u->scheme) {
-      u->scheme=u->domain;
-      u->domain=0;
-     }
-     if(!u->path) {
-      u->path=u->port;
-      u->port=0;
-     }
-     break;
-    }
-   }
-  }
-
-  if(u->domain) {//for magnet links.
-   if(strchr(u->domain,'?')) {
-    u->query_string=strchr(u->domain,'?');
-    *u->query_string=0;
-    u->query_string++;
-   }
-  }
-
-  if(u->domain) {
-   if(strchr(u->domain,':') && !strchr(u->domain,']')) {//for scheme:?query_string
-    u->scheme=u->domain;
-    *strchr(u->scheme,':')=0;
-    u->domain=0;
-   }
-  }
-
-  if(!u->scheme && u->username) {//for mailto:
-   u->scheme=u->username;
-   u->username=u->password;
-   u->password=0;
-  }
-}
-
-#define AorB(a,b) ((a)?(a):(b))
-
-void magic_and_defaults(struct url *u) {
-  struct servent *serv;
-  char sport[10];
-  u->scheme=AorB(u->scheme,AorB(getenv("URL_SCHEME"),"DEFAULT"));
-  u->username=AorB(u->username,AorB(getenv("URL_USERNAME"),"DEFAULT"));
-  u->password=AorB(u->password,AorB(getenv("URL_PASSWORD"),"DEFAULT"));
-  u->domain=AorB(u->domain,AorB(getenv("URL_DOMAIN"),"DEFAULT"));
-  serv=getservbyname(u->scheme,strcmp(u->scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80
-  if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port));
-//  else snprintf(sport,sizeof(sport)-1,"%d",serv);
-  u->port=AorB(u->port,AorB(getenv("URL_PORT"),(serv?strdup(sport):"DEFAULT")));
-
-//  if(!strcmp(u->port,"DEFAULT")) {
-   //this shouldn't happen most of the time. :/
-//   printf("serv: %d\nsport: %s\nu->scheme: %s\n",serv,sport,u->scheme);
-//  }
-
-  u->path=AorB(u->path,AorB(getenv("URL_PATH"),"DEFAULT"));
-  u->query_string=AorB(u->query_string,AorB(getenv("URL_QUERY_STRING"),"DEFAULT"));
-  u->fragment_id=AorB(u->fragment_id,AorB(getenv("URL_FRAGMENT_ID"),"DEFAULT"));
-}
-#endif
diff --git a/urlescape.c b/urlescape.c
deleted file mode 100644
index ce8e3e7..0000000
--- a/urlescape.c
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <stdio.h>
-#include "url.h"
-
-int main(int argc,char *argv[]) {
-  int len;
-  char *out;
-  if(argc < 2) return 1;
-  len=urlescapelength(argv[1],strlen(argv[1]));
-  out=malloc(len+1);
-  urlescape(argv[1],out,len);
-  out[len]=0;
-  printf("%s\n",out);
-  return 0;
-}
diff --git a/urlunescape.c b/urlunescape.c
deleted file mode 100644
index 618cd64..0000000
--- a/urlunescape.c
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <unistd.h>
-#include "url.h"
-
-int main(int argc,char *argv[]) {
-  int len;
-  for(argv++,argc--;argc;argc--,argv++) {
-   len=urlunescape(*argv,*argv);
-   write(1,*argv,len);
-   if(argc-1) write(1," ",1);
-  }
-  return 0;
-}
-- 
cgit v1.2.3