diff options
| -rw-r--r-- | .gitignore | 7 | ||||
| -rw-r--r-- | Makefile | 34 | ||||
| -rwxr-xr-x | start | 4 | ||||
| -rw-r--r-- | start.conf.example | 8 | ||||
| -rw-r--r-- | uri.h | 253 | ||||
| -rw-r--r-- | uricmp.c | 20 | ||||
| -rw-r--r-- | uricut.c (renamed from cuturl.c) | 74 | ||||
| -rw-r--r-- | uriescape.c (renamed from urlescape.c) | 6 | ||||
| -rw-r--r-- | urimatch.c (renamed from matchurl.c) | 13 | ||||
| -rwxr-xr-x | uriprintf (renamed from printfurl) | 2 | ||||
| -rwxr-xr-x | uristart | 4 | ||||
| -rw-r--r-- | uriunescape.c (renamed from urlunescape.c) | 4 | ||||
| -rw-r--r-- | url.h | 261 | 
13 files changed, 328 insertions, 362 deletions
| @@ -1,2 +1,5 @@ -matchurl -cuturl +uricmp +uricut +uriescape +urimatch +uriunescape @@ -1,26 +1,28 @@ -CFLAGS=-std=c99 -pedantic -Wall -PREFIX=/usr/local -CC=gcc +CFLAGS:=-std=c11 -pedantic -Wall +PREFIX:=/usr/local +CC:=gcc -all: cuturl matchurl urlunescape urlescape +all: uricut urimatch uriunescape uriescape uricmp -matchurl: matchurl.c url.h +urimatch: urimatch.c uri.h -cuturl: cuturl.c url.h +uricut: uricut.c uri.h -urlunescape: urlunescape.c url.h +uricmp: uricmp.c uri.h -urlescape: urlescape.c url.h +uriunescape: uriunescape.c uri.h + +uriescape: uriescape.c uri.h  clean: -	rm -f matchurl -	rm -f cuturl +	rm -f uricut urimatch uriunescape uriescape uricmp  	rm -f *.o  install: all -	install matchurl $(PREFIX)/bin/matchurl -	install cuturl $(PREFIX)/bin/cuturl -	install start $(PREFIX)/bin/start -	install printfurl $(PREFIX)/bin/printfurl -	install urlunescape $(PREFIX)/bin/urlunescape -	install urlescape $(PREFIX)/bin/urlescape +	install urimatch $(PREFIX)/bin/urimatch +	install uricut $(PREFIX)/bin/uricut +	install uricmp $(PREFIX)/bin/uricmp +	install uristart $(PREFIX)/bin/uristart +	install uriprintf $(PREFIX)/bin/uriprintf +	install uriunescape $(PREFIX)/bin/uriunescape +	install uriescape $(PREFIX)/bin/uriescape @@ -1,4 +0,0 @@ -#!/bin/sh -scheme="$(printf "%s\n" "$1" | cuturl -s)" -line="$(grep "^${scheme}:" ~/.config/start.conf | cut -d: -f2-)" -eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | printfurl "$line")" diff --git a/start.conf.example b/start.conf.example deleted file mode 100644 index 6864da3..0000000 --- a/start.conf.example +++ /dev/null @@ -1,8 +0,0 @@ -### start single-quote escapes the url parts so place them inside single-quote or else! -### (if you're given a bad link someone might be able to run shell commands) -finger:printf "%%s\r\n" '%p' | ncat '%d' 79 | tr -d '\r' | xmessage -file - -### new! subshells works -whois:whois "$(printf '%%s\\\\n' '%d' | sed 's/^..*$/-h/')" '%d' '%p' | xmessage -file - -irc:x-terminal-emulator -e irssi -c '%d' -p '%P' -http:dillo '%U' -DEFAULT:xdg-open '%U' @@ -0,0 +1,253 @@ +#ifndef uri_H +#define uri_H + +#define _XOPEN_SOURCE 500 //for strdup +#include <string.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> + +//uri_reserved = gen-delims / sub-delims +#define pe_gen_delims ":/?#[]@" +#define pe_sub_delims "!$&'()*+,;=" +//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims;  +#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define pe_DIGIT "0123456789" +#define pe_HPUT "-._~" +//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT; + +unsigned char rfc3086_percent_encoding[256]; + +#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F')) +#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a) + +char *uri_reserved={ +  pe_gen_delims +  pe_sub_delims +  pe_ALPHA +  pe_DIGIT +  pe_HPUT +}; + +int uriescapelength(char *in,int len) { +  int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end. +  int i; +  for(i=0;i<len;i++) { +    rlen+=strchr(uri_reserved,in[i])?1:3; +  } +  return rlen; +} + +// make sure your out char * has enough space! use uriescapelength for it. +void uriescape(char *in,char *out,int len) { +  int i; +  int j; +  for(i=0,j=0;i<len;i++) { +    if(strchr(uri_reserved,in[i])) { +      out[j]=in[i]; +      j++; +    } else { +      out[j]='%'; +      j++; +      out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)]; +      j++; +      out[j]="0123456789ABCDEF"[(in[i] % 16)]; +      j++; +    } +  } +} + +int uriunescape(char *in,char *out) { + char *o=out; + char *t; + char a,b; + char *s=in; + if(!strchr(s,'%')) memmove(out,in,strlen(in)); + while((t=strchr(s,'%'))) { +  if(t-s) {//if there are actually bytes to copy. +   memmove(o,s,t-s); +   o+=(t-s); +   s+=(t-s); +  } +  if(isxdigit(t[1]) && isxdigit(t[2])) { +   s+=3;//skip the %XX +   a=toupper(t[1]); +   b=toupper(t[2]); +   *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10);  +   o++; +  } else { +   s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is. +   *o='%'; +   o++; +  } + } + //copy the last part. + memmove(o,s,strlen(s)); + o[strlen(s)]=0; + return o+strlen(s)-out; +} + +struct uri {//warning. it is technically undefined behavior to set one half of a union then use the other half. +  union { +    char *A[8]; +    struct { +      union { char *s;char *scheme; }; +      union { char *u;char *username; }; +      union { char *k;char *password; }; +      union { char *d;char *domain; }; +      union { char *P;char *port; }; +      union { char *p;char *path; }; +      union { char *q;char *query_string; }; +      union { char *f;char *fragment_id; }; +    }; +  }; +}; + +//returns 0 on success, returns a byte with bits set for non-matching pieces. +unsigned int uricmp(struct uri *a,struct uri *b) { +  int i; +  int ret=0; +  for(i=0;i<8;i++) { +    if(a->A[i] && !b->A[i]) ret |=(1<<(i+8));//we have a's but not b's +    if(!a->A[i] && b->A[i]) ret |=(1<<(i+16)); +    //for testing if(!a->A[i] && !b->A[i]) ret |=(1<<(i+24));//no problem here. both empty. +    if(a->A[i] && b->A[i]) { +      if(strcmp(a->A[i],b->A[i])) { +        ret|=(1<<i); +      } +    } +  } +  return ret; +} + +/* + schemes are case sensitive but cononicals are lower case. + domain is case insensitive. return it lowercased? + port is optional and in decimal + path + scheme://username:password@domain:port/path?query_string#fragment_id + mailto:username@domain + + optional stuff: + scheme, username, password, port, path, query_string, fragment_id +*/ + +//should it be a dick about what characters are allowed? +//should it just try to ignore weird shit? + +//return 0 on fail //not sure what this means. +//return 1 on success +int urifromline(struct uri *u,char *line) { +  //these first two are easy. the rest... not so much. +  char *t; +//  memset(u,0,sizeof(struct uri)); //this function shouldn't do this. +  if((u->fragment_id=strchr(line,'#'))) { +    *u->fragment_id=0; +    u->fragment_id++; +  } +  if((u->query_string=strchr(line,'?'))) { +    *u->query_string=0; +    u->query_string++; +  } +  //now we have scheme, user, pass, domain, port, and path. maybe. +  //what character can we split on now? : is a terrible choice. +  // how about /? first / is either a separator between scheme +  //could find the first non-scheme character. +  //so we might have... scheme://user:pass@host:port/path +  //or... user:pass@host:port/path ? +  //we need to do this based on /s +  // we're either going to find the scheme and authority separator +  // or we're going to find the start of a path. +  //there: scheme:/path, scheme://host (empty path), or scheme:path/morepath +  //or...  should we do paths without +  //scheme must start with a-z +/*  if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe +    u->path=line; +    return;//we're done. nothing else to do. +  } +  if(*line == '.') { //we have a relative path like: ./derp or ../merp +    u->path=line; +    return;//we're done here. nothing else to do. +  }*/ +  //let's see if this starts with a scheme +  if(strchr(line,':') && ((*line >= 'a' && *line <= 'z') || (*line >= 'A' && *line <= 'Z'))) { +    for(u->scheme=strchr(line,':')-1;u->scheme > line;u->scheme--) { +      if((*u->scheme >= 'a' && *u->scheme <= 'z') || +         (*u->scheme >= 'A' && *u->scheme <= 'Z') || +         (*u->scheme >= '0' && *u->scheme <= '9') || +         *u->scheme == '+' || *u->scheme == '-' || *u->scheme == '.') { +        //this is still a scheme. +      } else { +        break; +      } +    } +    if(u->scheme == line) {//we got through the for loop alright. line starts with a scheme. +      line=strchr(line,':'); +      *line=0; +      line++; +      for(t=u->scheme;*t;t++) { +        if(*t >= 'A' && *t <= 'Z') *t+=' '; +      } +    } +  } + +  //copy-pasted from above the scheme strip attempt. +  if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe +    u->path=line; +    return 1;//we're done. nothing else to do. +  } +  if(*line == '.') { //we have a relative path like: ./derp or ../merp +    u->path=line; +    return 1;//we're done here. nothing else to do. +  } + +  if(*line == '/' && line[1] == '/') {//we have an authority section. +    //let's left-shift this shit over until the third / +    for(t=line+1;*(t+1) && *(t+1) != '/';t++) { +      *t=*(t+1); +    } +    *t=0; +    u->path=t+1;//if there was a /, path points at it and the stuff after. +    //if there wasn't a /, it points at a null byte. so "empty" +    u->username=line+1; +  } else { +    //we have all we need. +    return 1; +  } + +  if(u->username) {//this contains all of the authority. +    if((u->domain=strchr(u->username,'@'))) {//we have user@host at least. +      *u->domain=0; +      u->domain++; +    } else {//this isn't really a username. it is the domain. +      u->domain=u->username; +      u->username=0; +    } +  } +  //if we still have u->username we try to split to user and password +  if(u->username) { +    if((u->password=strchr(u->username,':'))) { +      *u->password=0; +      u->password++; +    } +  } +  if(u->domain) { +    if((u->port=strchr(u->domain,']')) && *u->domain == '[') {//this is an IPv6 host +      *u->port=0; +      u->port++; +      if(*u->port == ':') { +        *u->port=0; +        u->port++;//if it ends up being empty, whatever. that's a URI like: http://host:/path +      } +    } else { //we're safe to split port off at : +      if((u->port=strchr(u->domain,':'))) { +        *u->port=0; +        u->port++; +      } //there isn't a port. leave it unset. +    } +  } +  //I dunno. +  return 1; +} + +#endif diff --git a/uricmp.c b/uricmp.c new file mode 100644 index 0000000..9af0fed --- /dev/null +++ b/uricmp.c @@ -0,0 +1,20 @@ +#include "uri.h" +#include <stdio.h> + +int main(int argc,char *argv[]) { +  int i; +  int ret; +  struct uri *a=malloc(sizeof(struct uri)); +  struct uri *b=malloc(sizeof(struct uri)); +  if(argc < 3) { +    fprintf(stderr,"usage: uricmp uri1 uri2\n");//we didn't ask for usage so it goes to stderr +    return 1; +  } +  urifromline(a,argv[1]); +  urifromline(b,argv[2]); +  for(i=0;i<8;i++) { +    printf("%s ? %s\n",a->A[i],b->A[i]); +  } +  printf("%08x\n",ret=uricmp(a,b)); +  return ret > 0 ? 2 : 0; +} @@ -1,3 +1,5 @@ +#include "uri.h" +  #include <netdb.h>  #include <stdio.h>  #include <string.h> @@ -6,10 +8,6 @@  #include <unistd.h>  #include <sys/wait.h> -#include "url.h" - -#define MAGIC -  /*   schemes are case sensitive but cononicals are lower case.   domain is case insensitive. return it lowercased? @@ -32,32 +30,30 @@  #define F_PATH 1<<5  #define F_QUERY_STRING 1<<6  #define F_FRAGMENT_ID 1<<7 -#define F_WHOLE_URL 1<<8 +#define F_WHOLE_URI 1<<8 -char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URL",0}; +char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URI",0};  char *short_opts[]={"s","u","k","d","P","p","q","f","U"};  int main(int argc,char *argv[]) { - char *url; - char *name[2]; + char *uri;   char *line=0; - short args[256];//this needs to be a short to make room for the F_WHOLE_URL + short args[256];//this needs to be a short to make room for the F_WHOLE_URI   int i,j,c=0;   int size=1024; - int status;   char fixme=0;   char using_stdin=1;   char malloced=0; - struct url u; + struct uri u;   if(argc > 1) {    if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) { -   printf("usage: echo urls | cuturl [options]\n"); -   printf("usage: cuturl [options] url [options] [url]\n\n"); +   printf("usage: echo uris | uricut [options]\n"); +   printf("usage: uricut [options] uri [options] [uri]\n\n");     printf("options: \n");     for(i=0;long_opts[i];i++) {      printf("        -%s|--%s\n",short_opts[i],long_opts[i]);     } -   printf("To set default values use environment variables like: CUTURL_[OPTION]\n"); +   printf("To set default values use environment variables like: CUTURI_[OPTION]\n");     return 2;    }   } @@ -120,51 +116,12 @@ int main(int argc,char *argv[]) {    for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++);    line[i]=0; -  url=strdup(line); -  urlfromline(&u,line); +  uri=strdup(line); +  urifromline(&u,line);    // printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n");    //let's set them to what'll get printed now... -#ifdef MAGIC -  magic_and_defaults(&u); -/* -  u.scheme=AorB(u.scheme,AorB(getenv("CUTURL_SCHEME"),"DEFAULT")); -  u.username=AorB(u.username,AorB(getenv("CUTURL_USERNAME"),"DEFAULT")); -  u.password=AorB(u.password,AorB(getenv("CUTURL_PASSWORD"),"DEFAULT")); -  u.domain=AorB(u.domain,AorB(getenv("CURURL_DOMAIN"),"DEFAULT")); -  serv=getservbyname(u.scheme,strcmp(u.scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80 -  if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port)); -  u.port=AorB(u.port,AorB(getenv("CUTURL_PORT"),(serv?sport:"DEFAULT"))); -  u.path=AorB(u.path,AorB(getenv("CUTURL_PATH"),"DEFAULT")); -  u.query_string=AorB(u.query_string,AorB(getenv("CUTURL_QUERY_STRING"),"DEFAULT")); -  u.fragment_id=AorB(u.fragment_id,AorB(getenv("CUTURL_FRAGMENT_ID"),"DEFAULT")); -*/ -#endif - -  if((name[0]=getenv("CUTURL__"))) { -   setenv("CUTURL__SCHEME",u.scheme,1); -   setenv("CUTURL__USERNAME",u.username,1); -   setenv("CUTURL__PASSWORD",u.password,1); -   setenv("CUTURL__DOMAIN",u.domain,1); -   setenv("CUTURL__PORT",u.port,1); -   setenv("CUTURL__PATH",u.path,1); -   setenv("CUTURL__QUERY_STRING",u.query_string,1); -   setenv("CUTURL__FRAGMENT_ID",u.fragment_id,1); -   name[1]=0; -   switch(fork()) { -    case 0: -     execv(name[0],name); -     perror("execv"); -     return errno; -    case -1: -     perror("fork"); -     return errno; -    default: -     break; -   } -   wait(&status); -  } else {     if(c) {      for(i=0;i<c;i++) {       if(args[i]&F_SCHEME) printf("%s\n",AorB(u.scheme,"")); @@ -175,7 +132,7 @@ int main(int argc,char *argv[]) {       if(args[i]&F_PATH) printf("%s\n",AorB(u.path,""));       if(args[i]&F_QUERY_STRING) printf("%s\n",AorB(u.query_string,""));       if(args[i]&F_FRAGMENT_ID) printf("%s\n",AorB(u.fragment_id,"")); -     if(args[i]&F_WHOLE_URL) printf("%s\n",url); +     if(args[i]&F_WHOLE_URI) printf("%s\n",uri);      }     } else {      printf("scheme: %s\n",u.scheme); @@ -186,10 +143,9 @@ int main(int argc,char *argv[]) {      printf("path: %s\n",u.path);      printf("query_string: %s\n",u.query_string);      printf("fragment_id: %s\n",u.fragment_id); -    printf("whole_url: %s\n",url); +    printf("whole_uri: %s\n",uri);     } -  } -  free(url);//this is definitely malloc()d +  free(uri);//this is definitely malloc()d    if(malloced) {     free(line);     malloced=0; diff --git a/urlescape.c b/uriescape.c index ce8e3e7..da3da7e 100644 --- a/urlescape.c +++ b/uriescape.c @@ -1,13 +1,13 @@ +#include "uri.h"  #include <stdio.h> -#include "url.h"  int main(int argc,char *argv[]) {    int len;    char *out;    if(argc < 2) return 1; -  len=urlescapelength(argv[1],strlen(argv[1])); +  len=uriescapelength(argv[1],strlen(argv[1]));    out=malloc(len+1); -  urlescape(argv[1],out,len); +  uriescape(argv[1],out,len);    out[len]=0;    printf("%s\n",out);    return 0; @@ -1,7 +1,7 @@ +#include "uri.h"  #include <stdio.h>  #include <stdlib.h>  #include <string.h> -#include "url.h"  #define LINE_LENGTH 1024 @@ -19,16 +19,16 @@ int match(char negate,char *part,char *arg) {  int main(int argc,char *argv[]) {   int i;   int ret=1; - struct url u; + struct uri u;   char negate=0;   char *line=malloc(LINE_LENGTH);   char copy[LINE_LENGTH];   if(argc < 2) { -  printf("usage: matchurl [-][n][s|u|k|d|D|P|p|q|f] [string]\n"); +  printf("usage: urimatch [-][n][s|u|k|d|D|P|p|q|f] [string]\n");    printf("scheme://username:password@domain:port/path?query_string#fragment_id\n");    printf("s://u:k@d:P/p?q#f\n");    printf("The D flag is special. it matches its argument against the last bytes of the input url's domain.\n"); -  printf("This allows matching of subdomains, like `echo epoch.ano | matchurl -D ano` would match.\n"); +  printf("This allows matching of subdomains, like `echo epoch.ano | urimatch -D ano` would match.\n");    printf("the 'n' flag can be put before any of the other flags to check for a missing.\n");    return 1;   } @@ -37,7 +37,7 @@ int main(int argc,char *argv[]) {    if(strchr(line,'\n')) *strchr(line,'\n')=0;    strcpy(copy,line);    memset(&u,0,sizeof(u)); -  urlfromline(&u,line); +  urifromline(&u,line);    //use the character in argv[1] to match stdin against argv[2]. if match print whole line.    for(i=1;i<argc;i+=2) {     if(negate) {i--;}//we didn't really need to go that far. @@ -54,9 +54,10 @@ int main(int argc,char *argv[]) {      case 'q': if(match(negate,u.query_string,argv[i+1])) { printf("%s\n",copy); ret=0;} break;      case 'f': if(match(negate,u.fragment_id,argv[i+1]))  { printf("%s\n",copy); ret=0;} break;      case 'D': //not sure how to look for a missing one of these. it'd be like d. -     if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1])))  +     if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) {        printf("%s\n",copy);        ret=0; +     }       break;      default:       printf("unknown url part letter! '%c'\n",argv[i][0]); @@ -2,4 +2,4 @@  cutargs="$(printf "%s\n" "$1" | sed 's/%[^sukdPpqfU]//g' | tr '%' '\n' | tail -n+2 | sed 's/^\(.\).*/-\1/g' | tr '\n' ' ')"  count="$(echo $cutargs | tr '-' '\n' | grep -c .)"  printfargs="$(printf "%s\n" "$1" | sed 's/%[sukdPpqfU]/%s/g')" -cuturl "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs" +uricut "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs" diff --git a/uristart b/uristart new file mode 100755 index 0000000..78454b7 --- /dev/null +++ b/uristart @@ -0,0 +1,4 @@ +#!/bin/sh +scheme="$(printf "%s\n" "$1" | uricut -s)" +line="$(grep "^${scheme}:" ~/.config/uristart.conf | cut -d: -f2- | sed 's/^[ \t]//g')" +eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | uriprintf "$line")" diff --git a/urlunescape.c b/uriunescape.c index 618cd64..aafc6ea 100644 --- a/urlunescape.c +++ b/uriunescape.c @@ -1,10 +1,10 @@ +#include "uri.h"  #include <unistd.h> -#include "url.h"  int main(int argc,char *argv[]) {    int len;    for(argv++,argc--;argc;argc--,argv++) { -   len=urlunescape(*argv,*argv); +   len=uriunescape(*argv,*argv);     write(1,*argv,len);     if(argc-1) write(1," ",1);    } @@ -1,261 +0,0 @@ -#ifndef URL_H -#define URL_H - -#include <netdb.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> - -//reserved = gen-delims / sub-delims -#define pe_gen_delims ":/?#[]@" -#define pe_sub_delims "!$&'()*+,;=" -//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims;  -#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -#define pe_DIGIT "0123456789" -#define pe_HPUT "-._~" -//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT; - -unsigned char rfc3086_percent_encoding[256]; - -#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F')) -#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a) - -char *reserved={ -  pe_gen_delims -  pe_sub_delims -  pe_ALPHA -  pe_DIGIT -  pe_HPUT -}; - -int urlescapelength(char *in,int len) { -  int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end. -  int i; -  for(i=0;i<len;i++) { -    rlen+=strchr(reserved,in[i])?1:3; -  } -  return rlen; -} - -// make sure your out char * has enough space! use urlescapelength for it. -void urlescape(char *in,char *out,int len) { -  int i; -  int j; -  for(i=0,j=0;i<len;i++) { -    if(strchr(reserved,in[i])) { -      out[j]=in[i]; -      j++; -    } else { -      out[j]='%'; -      j++; -      out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)]; -      j++; -      out[j]="0123456789ABCDEF"[(in[i] % 16)]; -      j++; -    } -  } -} - -int urlunescape(char *in,char *out) { - char *o=out; - char *t; - char a,b; - char *s=in; - if(!strchr(s,'%')) memmove(out,in,strlen(in)); - while((t=strchr(s,'%'))) { -  if(t-s) {//if there are actually bytes to copy. -   memmove(o,s,t-s); -   o+=(t-s); -   s+=(t-s); -  } -  if(isxdigit(t[1]) && isxdigit(t[2])) { -   s+=3;//skip the %XX -   a=toupper(t[1]); -   b=toupper(t[2]); -   *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10);  -   o++; -  } else { -   s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is. -   *o='%'; -   o++; -  } - } - //copy the last part. - memmove(o,s,strlen(s)); - o[strlen(s)]=0; - return o+strlen(s)-out; -} - -struct url { - char *scheme; - char *username; - char *password; - char *domain; - char *port; - char *path; - char *query_string; - char *fragment_id; -}; - - -/* - schemes are case sensitive but cononicals are lower case. - domain is case insensitive. return it lowercased? - port is optional and in decimal - path - scheme://username:password@domain:port/path?query_string#fragment_id - mailto:username@domain - - optional stuff: - scheme, username, password, port, path, query_string, fragment_id -*/ - -void urlfromline(struct url *u,char *line) { - int i; - char hack=0;//we need to allow for // as host//path separator -  //split at first single / into line and path -  //this fails to split scheme://host//path into: scheme, host, /path. needs to be first single / or second double-or-more-/ -  for(i=0;line[i];i++) { -   if(line[i] == '/' && line[i+1] == '/') { -    if(!hack) {//only skip out on the first // because it is probably used in the scheme. -     hack=1; -     i++; -     continue; -    } -   } -   if(line[i] == '/') { -    line[i]=0; -    u->path=line+i+1; -    break; -   } -  } -  if(u->path) { -   if(strchr(u->path,'?')) { -    u->query_string=strchr(u->path,'?'); -    *u->query_string=0; -    u->query_string++; -   } -  } - -  if(u->query_string) { -   if(strchr(u->query_string,'#')) { -    u->fragment_id=strchr(u->query_string,'#'); -    *u->fragment_id=0; -    u->fragment_id++; -   } -  } - -  if(strstr(line,"://")) { -   u->scheme=line; -   u->domain=strstr(line,"://"); -   *u->domain=0; -   u->domain+=3; -  } else { -   u->domain=line; -  } - -  if(u->domain) { -   if(strchr(u->domain,'@')) { -    u->username=u->domain; -    u->domain=strchr(u->domain,'@'); -    *u->domain=0; -    u->domain++; -   } -  } - -  if(u->username) { -   if(strchr(u->username,':')) { -    u->password=strchr(u->username,':'); -    *u->password=0; -    u->password++; -   } -  } - -  if(u->domain) { -   if(strchr(u->domain,']')) {//the end of an IPv6 address -    if(strchr(strchr(u->domain,']'),':')) { -     u->port=strchr(strchr(u->domain,']'),':'); -     if(u->port[1] == '?') {//for magnet links -      u->port=0; -     } else { -      *u->port=0; -      u->port++; -     } -    } -   } else { -    if(strchr(u->domain,':')) { -     u->port=strchr(u->domain,':'); -     if(u->port[1] == '?') {//for magnet links -      u->port=0; -     } else { -      *u->port=0; -      u->port++; -     } -    } -   } -  } -  if(u->port) { -   for(i=0;u->port[i];i++) { -    if(u->port[i] < '0' || u->port[i] > '9') { -     //this port number isn't a number! -     //it is probably a different portion of the url then... and the domain is probably the scheme. -     if(u->domain && !u->scheme) { -      u->scheme=u->domain; -      u->domain=0; -     } -     if(!u->path) { -      u->path=u->port; -      u->port=0; -     } -     break; -    } -   } -  } - -  if(u->domain) {//for magnet links. -   if(strchr(u->domain,'?')) { -    u->query_string=strchr(u->domain,'?'); -    *u->query_string=0; -    u->query_string++; -   } -  } - -  if(u->domain) { -   if(strchr(u->domain,':') && !strchr(u->domain,']')) {//for scheme:?query_string -    u->scheme=u->domain; -    *strchr(u->scheme,':')=0; -    u->domain=0; -   } -  } - -  if(!u->scheme && u->username) {//for mailto: -   u->scheme=u->username; -   u->username=u->password; -   u->password=0; -  } -} - -#define AorB(a,b) ((a)?(a):(b)) - -void magic_and_defaults(struct url *u) { -  struct servent *serv; -  char sport[10]; -  u->scheme=AorB(u->scheme,AorB(getenv("URL_SCHEME"),"DEFAULT")); -  u->username=AorB(u->username,AorB(getenv("URL_USERNAME"),"DEFAULT")); -  u->password=AorB(u->password,AorB(getenv("URL_PASSWORD"),"DEFAULT")); -  u->domain=AorB(u->domain,AorB(getenv("URL_DOMAIN"),"DEFAULT")); -  serv=getservbyname(u->scheme,strcmp(u->scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80 -  if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port)); -//  else snprintf(sport,sizeof(sport)-1,"%d",serv); -  u->port=AorB(u->port,AorB(getenv("URL_PORT"),(serv?strdup(sport):"DEFAULT"))); - -//  if(!strcmp(u->port,"DEFAULT")) { -   //this shouldn't happen most of the time. :/ -//   printf("serv: %d\nsport: %s\nu->scheme: %s\n",serv,sport,u->scheme); -//  } - -  u->path=AorB(u->path,AorB(getenv("URL_PATH"),"DEFAULT")); -  u->query_string=AorB(u->query_string,AorB(getenv("URL_QUERY_STRING"),"DEFAULT")); -  u->fragment_id=AorB(u->fragment_id,AorB(getenv("URL_FRAGMENT_ID"),"DEFAULT")); -} -#endif | 
