rebased on an old copy of this repo. renamed everything. rewrote the uri parser. added uricmp. wew.

author: epoch <epoch@hacking.allowed.org> 2019-04-20 05:32:27 -0500
committer: epoch <epoch@hacking.allowed.org> 2019-04-20 05:32:27 -0500
commit: 6f402e2d2f052972886712f60d592684c8671982 (patch)
tree: 47a09324bd3c5e577ec5b7059bd6c8834bead115
parent: d42135919f480c8bba4ca1f043fbabf44dac708f (diff)
download: uritools-6f402e2d2f052972886712f60d592684c8671982.tar.gz
uritools-6f402e2d2f052972886712f60d592684c8671982.zip
13 files changed, 328 insertions, 362 deletions
diff --git a/.gitignore b/.gitignore
index 805d9dc..499d9be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
-matchurl
-cuturl
+uricmp
+uricut
+uriescape
+urimatch
+uriunescape
diff --git a/Makefile b/Makefile
index 7edc560..b67a2d6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,26 +1,28 @@
-CFLAGS=-std=c99 -pedantic -Wall
-PREFIX=/usr/local
-CC=gcc
+CFLAGS:=-std=c11 -pedantic -Wall
+PREFIX:=/usr/local
+CC:=gcc
 
-all: cuturl matchurl urlunescape urlescape
+all: uricut urimatch uriunescape uriescape uricmp
 
-matchurl: matchurl.c url.h
+urimatch: urimatch.c uri.h
 
-cuturl: cuturl.c url.h
+uricut: uricut.c uri.h
 
-urlunescape: urlunescape.c url.h
+uricmp: uricmp.c uri.h
 
-urlescape: urlescape.c url.h
+uriunescape: uriunescape.c uri.h
+
+uriescape: uriescape.c uri.h
 
 clean:
-	rm -f matchurl
-	rm -f cuturl
+	rm -f uricut urimatch uriunescape uriescape uricmp
 	rm -f *.o
 
 install: all
-	install matchurl $(PREFIX)/bin/matchurl
-	install cuturl $(PREFIX)/bin/cuturl
-	install start $(PREFIX)/bin/start
-	install printfurl $(PREFIX)/bin/printfurl
-	install urlunescape $(PREFIX)/bin/urlunescape
-	install urlescape $(PREFIX)/bin/urlescape
+	install urimatch $(PREFIX)/bin/urimatch
+	install uricut $(PREFIX)/bin/uricut
+	install uricmp $(PREFIX)/bin/uricmp
+	install uristart $(PREFIX)/bin/uristart
+	install uriprintf $(PREFIX)/bin/uriprintf
+	install uriunescape $(PREFIX)/bin/uriunescape
+	install uriescape $(PREFIX)/bin/uriescape
diff --git a/start b/start
deleted file mode 100755
index 9a3dca6..0000000
--- a/start
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-scheme="$(printf "%s\n" "$1" | cuturl -s)"
-line="$(grep "^${scheme}:" ~/.config/start.conf | cut -d: -f2-)"
-eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | printfurl "$line")"
diff --git a/start.conf.example b/start.conf.example
deleted file mode 100644
index 6864da3..0000000
--- a/start.conf.example
+++ /dev/null
@@ -1,8 +0,0 @@
-### start single-quote escapes the url parts so place them inside single-quote or else!
-### (if you're given a bad link someone might be able to run shell commands)
-finger:printf "%%s\r\n" '%p' | ncat '%d' 79 | tr -d '\r' | xmessage -file -
-### new! subshells works
-whois:whois "$(printf '%%s\\\\n' '%d' | sed 's/^..*$/-h/')" '%d' '%p' | xmessage -file -
-irc:x-terminal-emulator -e irssi -c '%d' -p '%P'
-http:dillo '%U'
-DEFAULT:xdg-open '%U'
diff --git a/uri.h b/uri.h
new file mode 100644
index 0000000..97ce3c2
--- /dev/null
+++ b/uri.h
@@ -0,0 +1,253 @@
+#ifndef uri_H
+#define uri_H
+
+#define _XOPEN_SOURCE 500 //for strdup
+#include <string.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+//uri_reserved = gen-delims / sub-delims
+#define pe_gen_delims ":/?#[]@"
+#define pe_sub_delims "!$&'()*+,;="
+//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims; 
+#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+#define pe_DIGIT "0123456789"
+#define pe_HPUT "-._~"
+//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT;
+
+unsigned char rfc3086_percent_encoding[256];
+
+#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F'))
+#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a)
+
+char *uri_reserved={
+  pe_gen_delims
+  pe_sub_delims
+  pe_ALPHA
+  pe_DIGIT
+  pe_HPUT
+};
+
+int uriescapelength(char *in,int len) {
+  int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end.
+  int i;
+  for(i=0;i<len;i++) {
+    rlen+=strchr(uri_reserved,in[i])?1:3;
+  }
+  return rlen;
+}
+
+// make sure your out char * has enough space! use uriescapelength for it.
+void uriescape(char *in,char *out,int len) {
+  int i;
+  int j;
+  for(i=0,j=0;i<len;i++) {
+    if(strchr(uri_reserved,in[i])) {
+      out[j]=in[i];
+      j++;
+    } else {
+      out[j]='%';
+      j++;
+      out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)];
+      j++;
+      out[j]="0123456789ABCDEF"[(in[i] % 16)];
+      j++;
+    }
+  }
+}
+
+int uriunescape(char *in,char *out) {
+ char *o=out;
+ char *t;
+ char a,b;
+ char *s=in;
+ if(!strchr(s,'%')) memmove(out,in,strlen(in));
+ while((t=strchr(s,'%'))) {
+  if(t-s) {//if there are actually bytes to copy.
+   memmove(o,s,t-s);
+   o+=(t-s);
+   s+=(t-s);
+  }
+  if(isxdigit(t[1]) && isxdigit(t[2])) {
+   s+=3;//skip the %XX
+   a=toupper(t[1]);
+   b=toupper(t[2]);
+   *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10); 
+   o++;
+  } else {
+   s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is.
+   *o='%';
+   o++;
+  }
+ }
+ //copy the last part.
+ memmove(o,s,strlen(s));
+ o[strlen(s)]=0;
+ return o+strlen(s)-out;
+}
+
+struct uri {//warning. it is technically undefined behavior to set one half of a union then use the other half.
+  union {
+    char *A[8];
+    struct {
+      union { char *s;char *scheme; };
+      union { char *u;char *username; };
+      union { char *k;char *password; };
+      union { char *d;char *domain; };
+      union { char *P;char *port; };
+      union { char *p;char *path; };
+      union { char *q;char *query_string; };
+      union { char *f;char *fragment_id; };
+    };
+  };
+};
+
+//returns 0 on success, returns a byte with bits set for non-matching pieces.
+unsigned int uricmp(struct uri *a,struct uri *b) {
+  int i;
+  int ret=0;
+  for(i=0;i<8;i++) {
+    if(a->A[i] && !b->A[i]) ret |=(1<<(i+8));//we have a's but not b's
+    if(!a->A[i] && b->A[i]) ret |=(1<<(i+16));
+    //for testing if(!a->A[i] && !b->A[i]) ret |=(1<<(i+24));//no problem here. both empty.
+    if(a->A[i] && b->A[i]) {
+      if(strcmp(a->A[i],b->A[i])) {
+        ret|=(1<<i);
+      }
+    }
+  }
+  return ret;
+}
+
+/*
+ schemes are case sensitive but cononicals are lower case.
+ domain is case insensitive. return it lowercased?
+ port is optional and in decimal
+ path
+ scheme://username:password@domain:port/path?query_string#fragment_id
+ mailto:username@domain
+
+ optional stuff:
+ scheme, username, password, port, path, query_string, fragment_id
+*/
+
+//should it be a dick about what characters are allowed?
+//should it just try to ignore weird shit?
+
+//return 0 on fail //not sure what this means.
+//return 1 on success
+int urifromline(struct uri *u,char *line) {
+  //these first two are easy. the rest... not so much.
+  char *t;
+//  memset(u,0,sizeof(struct uri)); //this function shouldn't do this.
+  if((u->fragment_id=strchr(line,'#'))) {
+    *u->fragment_id=0;
+    u->fragment_id++;
+  }
+  if((u->query_string=strchr(line,'?'))) {
+    *u->query_string=0;
+    u->query_string++;
+  }
+  //now we have scheme, user, pass, domain, port, and path. maybe.
+  //what character can we split on now? : is a terrible choice.
+  // how about /? first / is either a separator between scheme
+  //could find the first non-scheme character.
+  //so we might have... scheme://user:pass@host:port/path
+  //or... user:pass@host:port/path ?
+  //we need to do this based on /s
+  // we're either going to find the scheme and authority separator
+  // or we're going to find the start of a path.
+  //there: scheme:/path, scheme://host (empty path), or scheme:path/morepath
+  //or...  should we do paths without
+  //scheme must start with a-z
+/*  if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe
+    u->path=line;
+    return;//we're done. nothing else to do.
+  }
+  if(*line == '.') { //we have a relative path like: ./derp or ../merp
+    u->path=line;
+    return;//we're done here. nothing else to do.
+  }*/
+  //let's see if this starts with a scheme
+  if(strchr(line,':') && ((*line >= 'a' && *line <= 'z') || (*line >= 'A' && *line <= 'Z'))) {
+    for(u->scheme=strchr(line,':')-1;u->scheme > line;u->scheme--) {
+      if((*u->scheme >= 'a' && *u->scheme <= 'z') ||
+         (*u->scheme >= 'A' && *u->scheme <= 'Z') ||
+         (*u->scheme >= '0' && *u->scheme <= '9') ||
+         *u->scheme == '+' || *u->scheme == '-' || *u->scheme == '.') {
+        //this is still a scheme.
+      } else {
+        break;
+      }
+    }
+    if(u->scheme == line) {//we got through the for loop alright. line starts with a scheme.
+      line=strchr(line,':');
+      *line=0;
+      line++;
+      for(t=u->scheme;*t;t++) {
+        if(*t >= 'A' && *t <= 'Z') *t+=' ';
+      }
+    }
+  }
+
+  //copy-pasted from above the scheme strip attempt.
+  if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe
+    u->path=line;
+    return 1;//we're done. nothing else to do.
+  }
+  if(*line == '.') { //we have a relative path like: ./derp or ../merp
+    u->path=line;
+    return 1;//we're done here. nothing else to do.
+  }
+
+  if(*line == '/' && line[1] == '/') {//we have an authority section.
+    //let's left-shift this shit over until the third /
+    for(t=line+1;*(t+1) && *(t+1) != '/';t++) {
+      *t=*(t+1);
+    }
+    *t=0;
+    u->path=t+1;//if there was a /, path points at it and the stuff after.
+    //if there wasn't a /, it points at a null byte. so "empty"
+    u->username=line+1;
+  } else {
+    //we have all we need.
+    return 1;
+  }
+
+  if(u->username) {//this contains all of the authority.
+    if((u->domain=strchr(u->username,'@'))) {//we have user@host at least.
+      *u->domain=0;
+      u->domain++;
+    } else {//this isn't really a username. it is the domain.
+      u->domain=u->username;
+      u->username=0;
+    }
+  }
+  //if we still have u->username we try to split to user and password
+  if(u->username) {
+    if((u->password=strchr(u->username,':'))) {
+      *u->password=0;
+      u->password++;
+    }
+  }
+  if(u->domain) {
+    if((u->port=strchr(u->domain,']')) && *u->domain == '[') {//this is an IPv6 host
+      *u->port=0;
+      u->port++;
+      if(*u->port == ':') {
+        *u->port=0;
+        u->port++;//if it ends up being empty, whatever. that's a URI like: http://host:/path
+      }
+    } else { //we're safe to split port off at :
+      if((u->port=strchr(u->domain,':'))) {
+        *u->port=0;
+        u->port++;
+      } //there isn't a port. leave it unset.
+    }
+  }
+  //I dunno.
+  return 1;
+}
+
+#endif
diff --git a/uricmp.c b/uricmp.c
new file mode 100644
index 0000000..9af0fed
--- /dev/null
+++ b/uricmp.c
@@ -0,0 +1,20 @@
+#include "uri.h"
+#include <stdio.h>
+
+int main(int argc,char *argv[]) {
+  int i;
+  int ret;
+  struct uri *a=malloc(sizeof(struct uri));
+  struct uri *b=malloc(sizeof(struct uri));
+  if(argc < 3) {
+    fprintf(stderr,"usage: uricmp uri1 uri2\n");//we didn't ask for usage so it goes to stderr
+    return 1;
+  }
+  urifromline(a,argv[1]);
+  urifromline(b,argv[2]);
+  for(i=0;i<8;i++) {
+    printf("%s ? %s\n",a->A[i],b->A[i]);
+  }
+  printf("%08x\n",ret=uricmp(a,b));
+  return ret > 0 ? 2 : 0;
+}
diff --git a/cuturl.c b/uricut.c
index f1cbf42..5fe8764 100644
--- a/cuturl.c
+++ b/uricut.c
@@ -1,3 +1,5 @@
+#include "uri.h"
+
 #include <netdb.h>
 #include <stdio.h>
 #include <string.h>
@@ -6,10 +8,6 @@
 #include <unistd.h>
 #include <sys/wait.h>
 
-#include "url.h"
-
-#define MAGIC
-
 /*
  schemes are case sensitive but cononicals are lower case.
  domain is case insensitive. return it lowercased?
@@ -32,32 +30,30 @@
 #define F_PATH 1<<5
 #define F_QUERY_STRING 1<<6
 #define F_FRAGMENT_ID 1<<7
-#define F_WHOLE_URL 1<<8
+#define F_WHOLE_URI 1<<8
 
-char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URL",0};
+char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URI",0};
 char *short_opts[]={"s","u","k","d","P","p","q","f","U"};
 
 int main(int argc,char *argv[]) {
- char *url;
- char *name[2];
+ char *uri;
  char *line=0;
- short args[256];//this needs to be a short to make room for the F_WHOLE_URL
+ short args[256];//this needs to be a short to make room for the F_WHOLE_URI
  int i,j,c=0;
  int size=1024;
- int status;
  char fixme=0;
  char using_stdin=1;
  char malloced=0;
- struct url u;
+ struct uri u;
  if(argc > 1) {
   if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) {
-   printf("usage: echo urls | cuturl [options]\n");
-   printf("usage: cuturl [options] url [options] [url]\n\n");
+   printf("usage: echo uris | uricut [options]\n");
+   printf("usage: uricut [options] uri [options] [uri]\n\n");
    printf("options: \n");
    for(i=0;long_opts[i];i++) {
     printf("        -%s|--%s\n",short_opts[i],long_opts[i]);
    }
-   printf("To set default values use environment variables like: CUTURL_[OPTION]\n");
+   printf("To set default values use environment variables like: CUTURI_[OPTION]\n");
    return 2;
   }
  }
@@ -120,51 +116,12 @@ int main(int argc,char *argv[]) {
   for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++);
   line[i]=0;
 
-  url=strdup(line);
-  urlfromline(&u,line);
+  uri=strdup(line);
+  urifromline(&u,line);
 
   // printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n");
   //let's set them to what'll get printed now...
 
-#ifdef MAGIC
-  magic_and_defaults(&u);
-/*
-  u.scheme=AorB(u.scheme,AorB(getenv("CUTURL_SCHEME"),"DEFAULT"));
-  u.username=AorB(u.username,AorB(getenv("CUTURL_USERNAME"),"DEFAULT"));
-  u.password=AorB(u.password,AorB(getenv("CUTURL_PASSWORD"),"DEFAULT"));
-  u.domain=AorB(u.domain,AorB(getenv("CURURL_DOMAIN"),"DEFAULT"));
-  serv=getservbyname(u.scheme,strcmp(u.scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80
-  if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port));
-  u.port=AorB(u.port,AorB(getenv("CUTURL_PORT"),(serv?sport:"DEFAULT")));
-  u.path=AorB(u.path,AorB(getenv("CUTURL_PATH"),"DEFAULT"));
-  u.query_string=AorB(u.query_string,AorB(getenv("CUTURL_QUERY_STRING"),"DEFAULT"));
-  u.fragment_id=AorB(u.fragment_id,AorB(getenv("CUTURL_FRAGMENT_ID"),"DEFAULT"));
-*/
-#endif
-
-  if((name[0]=getenv("CUTURL__"))) {
-   setenv("CUTURL__SCHEME",u.scheme,1);
-   setenv("CUTURL__USERNAME",u.username,1);
-   setenv("CUTURL__PASSWORD",u.password,1);
-   setenv("CUTURL__DOMAIN",u.domain,1);
-   setenv("CUTURL__PORT",u.port,1);
-   setenv("CUTURL__PATH",u.path,1);
-   setenv("CUTURL__QUERY_STRING",u.query_string,1);
-   setenv("CUTURL__FRAGMENT_ID",u.fragment_id,1);
-   name[1]=0;
-   switch(fork()) {
-    case 0:
-     execv(name[0],name);
-     perror("execv");
-     return errno;
-    case -1:
-     perror("fork");
-     return errno;
-    default:
-     break;
-   }
-   wait(&status);
-  } else {
    if(c) {
     for(i=0;i<c;i++) {
      if(args[i]&F_SCHEME) printf("%s\n",AorB(u.scheme,""));
@@ -175,7 +132,7 @@ int main(int argc,char *argv[]) {
      if(args[i]&F_PATH) printf("%s\n",AorB(u.path,""));
      if(args[i]&F_QUERY_STRING) printf("%s\n",AorB(u.query_string,""));
      if(args[i]&F_FRAGMENT_ID) printf("%s\n",AorB(u.fragment_id,""));
-     if(args[i]&F_WHOLE_URL) printf("%s\n",url);
+     if(args[i]&F_WHOLE_URI) printf("%s\n",uri);
     }
    } else {
     printf("scheme: %s\n",u.scheme);
@@ -186,10 +143,9 @@ int main(int argc,char *argv[]) {
     printf("path: %s\n",u.path);
     printf("query_string: %s\n",u.query_string);
     printf("fragment_id: %s\n",u.fragment_id);
-    printf("whole_url: %s\n",url);
+    printf("whole_uri: %s\n",uri);
    }
-  }
-  free(url);//this is definitely malloc()d
+  free(uri);//this is definitely malloc()d
   if(malloced) {
    free(line);
    malloced=0;
diff --git a/urlescape.c b/uriescape.c
index ce8e3e7..da3da7e 100644
--- a/urlescape.c
+++ b/uriescape.c
@@ -1,13 +1,13 @@
+#include "uri.h"
 #include <stdio.h>
-#include "url.h"
 
 int main(int argc,char *argv[]) {
   int len;
   char *out;
   if(argc < 2) return 1;
-  len=urlescapelength(argv[1],strlen(argv[1]));
+  len=uriescapelength(argv[1],strlen(argv[1]));
   out=malloc(len+1);
-  urlescape(argv[1],out,len);
+  uriescape(argv[1],out,len);
   out[len]=0;
   printf("%s\n",out);
   return 0;
diff --git a/matchurl.c b/urimatch.c
index 47a2a20..42ee0aa 100644
--- a/matchurl.c
+++ b/urimatch.c
@@ -1,7 +1,7 @@
+#include "uri.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include "url.h"
 
 #define LINE_LENGTH 1024
 
@@ -19,16 +19,16 @@ int match(char negate,char *part,char *arg) {
 int main(int argc,char *argv[]) {
  int i;
  int ret=1;
- struct url u;
+ struct uri u;
  char negate=0;
  char *line=malloc(LINE_LENGTH);
  char copy[LINE_LENGTH];
  if(argc < 2) {
-  printf("usage: matchurl [-][n][s|u|k|d|D|P|p|q|f] [string]\n");
+  printf("usage: urimatch [-][n][s|u|k|d|D|P|p|q|f] [string]\n");
   printf("scheme://username:password@domain:port/path?query_string#fragment_id\n");
   printf("s://u:k@d:P/p?q#f\n");
   printf("The D flag is special. it matches its argument against the last bytes of the input url's domain.\n");
-  printf("This allows matching of subdomains, like `echo epoch.ano | matchurl -D ano` would match.\n");
+  printf("This allows matching of subdomains, like `echo epoch.ano | urimatch -D ano` would match.\n");
   printf("the 'n' flag can be put before any of the other flags to check for a missing.\n");
   return 1;
  }
@@ -37,7 +37,7 @@ int main(int argc,char *argv[]) {
   if(strchr(line,'\n')) *strchr(line,'\n')=0;
   strcpy(copy,line);
   memset(&u,0,sizeof(u));
-  urlfromline(&u,line);
+  urifromline(&u,line);
   //use the character in argv[1] to match stdin against argv[2]. if match print whole line.
   for(i=1;i<argc;i+=2) {
    if(negate) {i--;}//we didn't really need to go that far.
@@ -54,9 +54,10 @@ int main(int argc,char *argv[]) {
     case 'q': if(match(negate,u.query_string,argv[i+1])) { printf("%s\n",copy); ret=0;} break;
     case 'f': if(match(negate,u.fragment_id,argv[i+1]))  { printf("%s\n",copy); ret=0;} break;
     case 'D': //not sure how to look for a missing one of these. it'd be like d.
-     if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) 
+     if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) {
       printf("%s\n",copy);
       ret=0;
+     }
      break;
     default:
      printf("unknown url part letter! '%c'\n",argv[i][0]);
diff --git a/printfurl b/uriprintf
index c141bc1..f79d067 100755
--- a/printfurl
+++ b/uriprintf
@@ -2,4 +2,4 @@
 cutargs="$(printf "%s\n" "$1" | sed 's/%[^sukdPpqfU]//g' | tr '%' '\n' | tail -n+2 | sed 's/^\(.\).*/-\1/g' | tr '\n' ' ')"
 count="$(echo $cutargs | tr '-' '\n' | grep -c .)"
 printfargs="$(printf "%s\n" "$1" | sed 's/%[sukdPpqfU]/%s/g')"
-cuturl "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs"
+uricut "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs"
diff --git a/uristart b/uristart
new file mode 100755
index 0000000..78454b7
--- /dev/null
+++ b/uristart
@@ -0,0 +1,4 @@
+#!/bin/sh
+scheme="$(printf "%s\n" "$1" | uricut -s)"
+line="$(grep "^${scheme}:" ~/.config/uristart.conf | cut -d: -f2- | sed 's/^[ \t]//g')"
+eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | uriprintf "$line")"
diff --git a/urlunescape.c b/uriunescape.c
index 618cd64..aafc6ea 100644
--- a/urlunescape.c
+++ b/uriunescape.c
@@ -1,10 +1,10 @@
+#include "uri.h"
 #include <unistd.h>
-#include "url.h"
 
 int main(int argc,char *argv[]) {
   int len;
   for(argv++,argc--;argc;argc--,argv++) {
-   len=urlunescape(*argv,*argv);
+   len=uriunescape(*argv,*argv);
    write(1,*argv,len);
    if(argc-1) write(1," ",1);
   }
diff --git a/url.h b/url.h
deleted file mode 100644
index 6304a48..0000000
--- a/url.h
+++ /dev/null
@@ -1,261 +0,0 @@
-#ifndef URL_H
-#define URL_H
-
-#include <netdb.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-//reserved = gen-delims / sub-delims
-#define pe_gen_delims ":/?#[]@"
-#define pe_sub_delims "!$&'()*+,;="
-//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims; 
-#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
-#define pe_DIGIT "0123456789"
-#define pe_HPUT "-._~"
-//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT;
-
-unsigned char rfc3086_percent_encoding[256];
-
-#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F'))
-#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a)
-
-char *reserved={
-  pe_gen_delims
-  pe_sub_delims
-  pe_ALPHA
-  pe_DIGIT
-  pe_HPUT
-};
-
-int urlescapelength(char *in,int len) {
-  int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end.
-  int i;
-  for(i=0;i<len;i++) {
-    rlen+=strchr(reserved,in[i])?1:3;
-  }
-  return rlen;
-}
-
-// make sure your out char * has enough space! use urlescapelength for it.
-void urlescape(char *in,char *out,int len) {
-  int i;
-  int j;
-  for(i=0,j=0;i<len;i++) {
-    if(strchr(reserved,in[i])) {
-      out[j]=in[i];
-      j++;
-    } else {
-      out[j]='%';
-      j++;
-      out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)];
-      j++;
-      out[j]="0123456789ABCDEF"[(in[i] % 16)];
-      j++;
-    }
-  }
-}
-
-int urlunescape(char *in,char *out) {
- char *o=out;
- char *t;
- char a,b;
- char *s=in;
- if(!strchr(s,'%')) memmove(out,in,strlen(in));
- while((t=strchr(s,'%'))) {
-  if(t-s) {//if there are actually bytes to copy.
-   memmove(o,s,t-s);
-   o+=(t-s);
-   s+=(t-s);
-  }
-  if(isxdigit(t[1]) && isxdigit(t[2])) {
-   s+=3;//skip the %XX
-   a=toupper(t[1]);
-   b=toupper(t[2]);
-   *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10); 
-   o++;
-  } else {
-   s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is.
-   *o='%';
-   o++;
-  }
- }
- //copy the last part.
- memmove(o,s,strlen(s));
- o[strlen(s)]=0;
- return o+strlen(s)-out;
-}
-
-struct url {
- char *scheme;
- char *username;
- char *password;
- char *domain;
- char *port;
- char *path;
- char *query_string;
- char *fragment_id;
-};
-
-
-/*
- schemes are case sensitive but cononicals are lower case.
- domain is case insensitive. return it lowercased?
- port is optional and in decimal
- path
- scheme://username:password@domain:port/path?query_string#fragment_id
- mailto:username@domain
-
- optional stuff:
- scheme, username, password, port, path, query_string, fragment_id
-*/
-
-void urlfromline(struct url *u,char *line) {
- int i;
- char hack=0;//we need to allow for // as host//path separator
-  //split at first single / into line and path
-  //this fails to split scheme://host//path into: scheme, host, /path. needs to be first single / or second double-or-more-/
-  for(i=0;line[i];i++) {
-   if(line[i] == '/' && line[i+1] == '/') {
-    if(!hack) {//only skip out on the first // because it is probably used in the scheme.
-     hack=1;
-     i++;
-     continue;
-    }
-   }
-   if(line[i] == '/') {
-    line[i]=0;
-    u->path=line+i+1;
-    break;
-   }
-  }
-  if(u->path) {
-   if(strchr(u->path,'?')) {
-    u->query_string=strchr(u->path,'?');
-    *u->query_string=0;
-    u->query_string++;
-   }
-  }
-
-  if(u->query_string) {
-   if(strchr(u->query_string,'#')) {
-    u->fragment_id=strchr(u->query_string,'#');
-    *u->fragment_id=0;
-    u->fragment_id++;
-   }
-  }
-
-  if(strstr(line,"://")) {
-   u->scheme=line;
-   u->domain=strstr(line,"://");
-   *u->domain=0;
-   u->domain+=3;
-  } else {
-   u->domain=line;
-  }
-
-  if(u->domain) {
-   if(strchr(u->domain,'@')) {
-    u->username=u->domain;
-    u->domain=strchr(u->domain,'@');
-    *u->domain=0;
-    u->domain++;
-   }
-  }
-
-  if(u->username) {
-   if(strchr(u->username,':')) {
-    u->password=strchr(u->username,':');
-    *u->password=0;
-    u->password++;
-   }
-  }
-
-  if(u->domain) {
-   if(strchr(u->domain,']')) {//the end of an IPv6 address
-    if(strchr(strchr(u->domain,']'),':')) {
-     u->port=strchr(strchr(u->domain,']'),':');
-     if(u->port[1] == '?') {//for magnet links
-      u->port=0;
-     } else {
-      *u->port=0;
-      u->port++;
-     }
-    }
-   } else {
-    if(strchr(u->domain,':')) {
-     u->port=strchr(u->domain,':');
-     if(u->port[1] == '?') {//for magnet links
-      u->port=0;
-     } else {
-      *u->port=0;
-      u->port++;
-     }
-    }
-   }
-  }
-  if(u->port) {
-   for(i=0;u->port[i];i++) {
-    if(u->port[i] < '0' || u->port[i] > '9') {
-     //this port number isn't a number!
-     //it is probably a different portion of the url then... and the domain is probably the scheme.
-     if(u->domain && !u->scheme) {
-      u->scheme=u->domain;
-      u->domain=0;
-     }
-     if(!u->path) {
-      u->path=u->port;
-      u->port=0;
-     }
-     break;
-    }
-   }
-  }
-
-  if(u->domain) {//for magnet links.
-   if(strchr(u->domain,'?')) {
-    u->query_string=strchr(u->domain,'?');
-    *u->query_string=0;
-    u->query_string++;
-   }
-  }
-
-  if(u->domain) {
-   if(strchr(u->domain,':') && !strchr(u->domain,']')) {//for scheme:?query_string
-    u->scheme=u->domain;
-    *strchr(u->scheme,':')=0;
-    u->domain=0;
-   }
-  }
-
-  if(!u->scheme && u->username) {//for mailto:
-   u->scheme=u->username;
-   u->username=u->password;
-   u->password=0;
-  }
-}
-
-#define AorB(a,b) ((a)?(a):(b))
-
-void magic_and_defaults(struct url *u) {
-  struct servent *serv;
-  char sport[10];
-  u->scheme=AorB(u->scheme,AorB(getenv("URL_SCHEME"),"DEFAULT"));
-  u->username=AorB(u->username,AorB(getenv("URL_USERNAME"),"DEFAULT"));
-  u->password=AorB(u->password,AorB(getenv("URL_PASSWORD"),"DEFAULT"));
-  u->domain=AorB(u->domain,AorB(getenv("URL_DOMAIN"),"DEFAULT"));
-  serv=getservbyname(u->scheme,strcmp(u->scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80
-  if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port));
-//  else snprintf(sport,sizeof(sport)-1,"%d",serv);
-  u->port=AorB(u->port,AorB(getenv("URL_PORT"),(serv?strdup(sport):"DEFAULT")));
-
-//  if(!strcmp(u->port,"DEFAULT")) {
-   //this shouldn't happen most of the time. :/
-//   printf("serv: %d\nsport: %s\nu->scheme: %s\n",serv,sport,u->scheme);
-//  }
-
-  u->path=AorB(u->path,AorB(getenv("URL_PATH"),"DEFAULT"));
-  u->query_string=AorB(u->query_string,AorB(getenv("URL_QUERY_STRING"),"DEFAULT"));
-  u->fragment_id=AorB(u->fragment_id,AorB(getenv("URL_FRAGMENT_ID"),"DEFAULT"));
-}
-#endif
author	epoch <epoch@hacking.allowed.org>	2019-04-20 05:32:27 -0500
committer	epoch <epoch@hacking.allowed.org>	2019-04-20 05:32:27 -0500
commit	6f402e2d2f052972886712f60d592684c8671982 (patch)
tree	47a09324bd3c5e577ec5b7059bd6c8834bead115
parent	d42135919f480c8bba4ca1f043fbabf44dac708f (diff)
download	uritools-6f402e2d2f052972886712f60d592684c8671982.tar.gz uritools-6f402e2d2f052972886712f60d592684c8671982.zip