diff options
-rw-r--r-- | .gitignore | 7 | ||||
-rw-r--r-- | Makefile | 34 | ||||
-rwxr-xr-x | start | 4 | ||||
-rw-r--r-- | start.conf.example | 8 | ||||
-rw-r--r-- | uri.h | 253 | ||||
-rw-r--r-- | uricmp.c | 20 | ||||
-rw-r--r-- | uricut.c (renamed from cuturl.c) | 74 | ||||
-rw-r--r-- | uriescape.c (renamed from urlescape.c) | 6 | ||||
-rw-r--r-- | urimatch.c (renamed from matchurl.c) | 13 | ||||
-rwxr-xr-x | uriprintf (renamed from printfurl) | 2 | ||||
-rwxr-xr-x | uristart | 4 | ||||
-rw-r--r-- | uriunescape.c (renamed from urlunescape.c) | 4 | ||||
-rw-r--r-- | url.h | 261 |
13 files changed, 328 insertions, 362 deletions
@@ -1,2 +1,5 @@ -matchurl -cuturl +uricmp +uricut +uriescape +urimatch +uriunescape @@ -1,26 +1,28 @@ -CFLAGS=-std=c99 -pedantic -Wall -PREFIX=/usr/local -CC=gcc +CFLAGS:=-std=c11 -pedantic -Wall +PREFIX:=/usr/local +CC:=gcc -all: cuturl matchurl urlunescape urlescape +all: uricut urimatch uriunescape uriescape uricmp -matchurl: matchurl.c url.h +urimatch: urimatch.c uri.h -cuturl: cuturl.c url.h +uricut: uricut.c uri.h -urlunescape: urlunescape.c url.h +uricmp: uricmp.c uri.h -urlescape: urlescape.c url.h +uriunescape: uriunescape.c uri.h + +uriescape: uriescape.c uri.h clean: - rm -f matchurl - rm -f cuturl + rm -f uricut urimatch uriunescape uriescape uricmp rm -f *.o install: all - install matchurl $(PREFIX)/bin/matchurl - install cuturl $(PREFIX)/bin/cuturl - install start $(PREFIX)/bin/start - install printfurl $(PREFIX)/bin/printfurl - install urlunescape $(PREFIX)/bin/urlunescape - install urlescape $(PREFIX)/bin/urlescape + install urimatch $(PREFIX)/bin/urimatch + install uricut $(PREFIX)/bin/uricut + install uricmp $(PREFIX)/bin/uricmp + install uristart $(PREFIX)/bin/uristart + install uriprintf $(PREFIX)/bin/uriprintf + install uriunescape $(PREFIX)/bin/uriunescape + install uriescape $(PREFIX)/bin/uriescape @@ -1,4 +0,0 @@ -#!/bin/sh -scheme="$(printf "%s\n" "$1" | cuturl -s)" -line="$(grep "^${scheme}:" ~/.config/start.conf | cut -d: -f2-)" -eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | printfurl "$line")" diff --git a/start.conf.example b/start.conf.example deleted file mode 100644 index 6864da3..0000000 --- a/start.conf.example +++ /dev/null @@ -1,8 +0,0 @@ -### start single-quote escapes the url parts so place them inside single-quote or else! -### (if you're given a bad link someone might be able to run shell commands) -finger:printf "%%s\r\n" '%p' | ncat '%d' 79 | tr -d '\r' | xmessage -file - -### new! subshells works -whois:whois "$(printf '%%s\\\\n' '%d' | sed 's/^..*$/-h/')" '%d' '%p' | xmessage -file - -irc:x-terminal-emulator -e irssi -c '%d' -p '%P' -http:dillo '%U' -DEFAULT:xdg-open '%U' @@ -0,0 +1,253 @@ +#ifndef uri_H +#define uri_H + +#define _XOPEN_SOURCE 500 //for strdup +#include <string.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> + +//uri_reserved = gen-delims / sub-delims +#define pe_gen_delims ":/?#[]@" +#define pe_sub_delims "!$&'()*+,;=" +//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims; +#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define pe_DIGIT "0123456789" +#define pe_HPUT "-._~" +//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT; + +unsigned char rfc3086_percent_encoding[256]; + +#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F')) +#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a) + +char *uri_reserved={ + pe_gen_delims + pe_sub_delims + pe_ALPHA + pe_DIGIT + pe_HPUT +}; + +int uriescapelength(char *in,int len) { + int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end. + int i; + for(i=0;i<len;i++) { + rlen+=strchr(uri_reserved,in[i])?1:3; + } + return rlen; +} + +// make sure your out char * has enough space! use uriescapelength for it. +void uriescape(char *in,char *out,int len) { + int i; + int j; + for(i=0,j=0;i<len;i++) { + if(strchr(uri_reserved,in[i])) { + out[j]=in[i]; + j++; + } else { + out[j]='%'; + j++; + out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)]; + j++; + out[j]="0123456789ABCDEF"[(in[i] % 16)]; + j++; + } + } +} + +int uriunescape(char *in,char *out) { + char *o=out; + char *t; + char a,b; + char *s=in; + if(!strchr(s,'%')) memmove(out,in,strlen(in)); + while((t=strchr(s,'%'))) { + if(t-s) {//if there are actually bytes to copy. + memmove(o,s,t-s); + o+=(t-s); + s+=(t-s); + } + if(isxdigit(t[1]) && isxdigit(t[2])) { + s+=3;//skip the %XX + a=toupper(t[1]); + b=toupper(t[2]); + *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10); + o++; + } else { + s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is. + *o='%'; + o++; + } + } + //copy the last part. + memmove(o,s,strlen(s)); + o[strlen(s)]=0; + return o+strlen(s)-out; +} + +struct uri {//warning. it is technically undefined behavior to set one half of a union then use the other half. + union { + char *A[8]; + struct { + union { char *s;char *scheme; }; + union { char *u;char *username; }; + union { char *k;char *password; }; + union { char *d;char *domain; }; + union { char *P;char *port; }; + union { char *p;char *path; }; + union { char *q;char *query_string; }; + union { char *f;char *fragment_id; }; + }; + }; +}; + +//returns 0 on success, returns a byte with bits set for non-matching pieces. +unsigned int uricmp(struct uri *a,struct uri *b) { + int i; + int ret=0; + for(i=0;i<8;i++) { + if(a->A[i] && !b->A[i]) ret |=(1<<(i+8));//we have a's but not b's + if(!a->A[i] && b->A[i]) ret |=(1<<(i+16)); + //for testing if(!a->A[i] && !b->A[i]) ret |=(1<<(i+24));//no problem here. both empty. + if(a->A[i] && b->A[i]) { + if(strcmp(a->A[i],b->A[i])) { + ret|=(1<<i); + } + } + } + return ret; +} + +/* + schemes are case sensitive but cononicals are lower case. + domain is case insensitive. return it lowercased? + port is optional and in decimal + path + scheme://username:password@domain:port/path?query_string#fragment_id + mailto:username@domain + + optional stuff: + scheme, username, password, port, path, query_string, fragment_id +*/ + +//should it be a dick about what characters are allowed? +//should it just try to ignore weird shit? + +//return 0 on fail //not sure what this means. +//return 1 on success +int urifromline(struct uri *u,char *line) { + //these first two are easy. the rest... not so much. + char *t; +// memset(u,0,sizeof(struct uri)); //this function shouldn't do this. + if((u->fragment_id=strchr(line,'#'))) { + *u->fragment_id=0; + u->fragment_id++; + } + if((u->query_string=strchr(line,'?'))) { + *u->query_string=0; + u->query_string++; + } + //now we have scheme, user, pass, domain, port, and path. maybe. + //what character can we split on now? : is a terrible choice. + // how about /? first / is either a separator between scheme + //could find the first non-scheme character. + //so we might have... scheme://user:pass@host:port/path + //or... user:pass@host:port/path ? + //we need to do this based on /s + // we're either going to find the scheme and authority separator + // or we're going to find the start of a path. + //there: scheme:/path, scheme://host (empty path), or scheme:path/morepath + //or... should we do paths without + //scheme must start with a-z +/* if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe + u->path=line; + return;//we're done. nothing else to do. + } + if(*line == '.') { //we have a relative path like: ./derp or ../merp + u->path=line; + return;//we're done here. nothing else to do. + }*/ + //let's see if this starts with a scheme + if(strchr(line,':') && ((*line >= 'a' && *line <= 'z') || (*line >= 'A' && *line <= 'Z'))) { + for(u->scheme=strchr(line,':')-1;u->scheme > line;u->scheme--) { + if((*u->scheme >= 'a' && *u->scheme <= 'z') || + (*u->scheme >= 'A' && *u->scheme <= 'Z') || + (*u->scheme >= '0' && *u->scheme <= '9') || + *u->scheme == '+' || *u->scheme == '-' || *u->scheme == '.') { + //this is still a scheme. + } else { + break; + } + } + if(u->scheme == line) {//we got through the for loop alright. line starts with a scheme. + line=strchr(line,':'); + *line=0; + line++; + for(t=u->scheme;*t;t++) { + if(*t >= 'A' && *t <= 'Z') *t+=' '; + } + } + } + + //copy-pasted from above the scheme strip attempt. + if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe + u->path=line; + return 1;//we're done. nothing else to do. + } + if(*line == '.') { //we have a relative path like: ./derp or ../merp + u->path=line; + return 1;//we're done here. nothing else to do. + } + + if(*line == '/' && line[1] == '/') {//we have an authority section. + //let's left-shift this shit over until the third / + for(t=line+1;*(t+1) && *(t+1) != '/';t++) { + *t=*(t+1); + } + *t=0; + u->path=t+1;//if there was a /, path points at it and the stuff after. + //if there wasn't a /, it points at a null byte. so "empty" + u->username=line+1; + } else { + //we have all we need. + return 1; + } + + if(u->username) {//this contains all of the authority. + if((u->domain=strchr(u->username,'@'))) {//we have user@host at least. + *u->domain=0; + u->domain++; + } else {//this isn't really a username. it is the domain. + u->domain=u->username; + u->username=0; + } + } + //if we still have u->username we try to split to user and password + if(u->username) { + if((u->password=strchr(u->username,':'))) { + *u->password=0; + u->password++; + } + } + if(u->domain) { + if((u->port=strchr(u->domain,']')) && *u->domain == '[') {//this is an IPv6 host + *u->port=0; + u->port++; + if(*u->port == ':') { + *u->port=0; + u->port++;//if it ends up being empty, whatever. that's a URI like: http://host:/path + } + } else { //we're safe to split port off at : + if((u->port=strchr(u->domain,':'))) { + *u->port=0; + u->port++; + } //there isn't a port. leave it unset. + } + } + //I dunno. + return 1; +} + +#endif diff --git a/uricmp.c b/uricmp.c new file mode 100644 index 0000000..9af0fed --- /dev/null +++ b/uricmp.c @@ -0,0 +1,20 @@ +#include "uri.h" +#include <stdio.h> + +int main(int argc,char *argv[]) { + int i; + int ret; + struct uri *a=malloc(sizeof(struct uri)); + struct uri *b=malloc(sizeof(struct uri)); + if(argc < 3) { + fprintf(stderr,"usage: uricmp uri1 uri2\n");//we didn't ask for usage so it goes to stderr + return 1; + } + urifromline(a,argv[1]); + urifromline(b,argv[2]); + for(i=0;i<8;i++) { + printf("%s ? %s\n",a->A[i],b->A[i]); + } + printf("%08x\n",ret=uricmp(a,b)); + return ret > 0 ? 2 : 0; +} @@ -1,3 +1,5 @@ +#include "uri.h" + #include <netdb.h> #include <stdio.h> #include <string.h> @@ -6,10 +8,6 @@ #include <unistd.h> #include <sys/wait.h> -#include "url.h" - -#define MAGIC - /* schemes are case sensitive but cononicals are lower case. domain is case insensitive. return it lowercased? @@ -32,32 +30,30 @@ #define F_PATH 1<<5 #define F_QUERY_STRING 1<<6 #define F_FRAGMENT_ID 1<<7 -#define F_WHOLE_URL 1<<8 +#define F_WHOLE_URI 1<<8 -char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URL",0}; +char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URI",0}; char *short_opts[]={"s","u","k","d","P","p","q","f","U"}; int main(int argc,char *argv[]) { - char *url; - char *name[2]; + char *uri; char *line=0; - short args[256];//this needs to be a short to make room for the F_WHOLE_URL + short args[256];//this needs to be a short to make room for the F_WHOLE_URI int i,j,c=0; int size=1024; - int status; char fixme=0; char using_stdin=1; char malloced=0; - struct url u; + struct uri u; if(argc > 1) { if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) { - printf("usage: echo urls | cuturl [options]\n"); - printf("usage: cuturl [options] url [options] [url]\n\n"); + printf("usage: echo uris | uricut [options]\n"); + printf("usage: uricut [options] uri [options] [uri]\n\n"); printf("options: \n"); for(i=0;long_opts[i];i++) { printf(" -%s|--%s\n",short_opts[i],long_opts[i]); } - printf("To set default values use environment variables like: CUTURL_[OPTION]\n"); + printf("To set default values use environment variables like: CUTURI_[OPTION]\n"); return 2; } } @@ -120,51 +116,12 @@ int main(int argc,char *argv[]) { for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++); line[i]=0; - url=strdup(line); - urlfromline(&u,line); + uri=strdup(line); + urifromline(&u,line); // printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n"); //let's set them to what'll get printed now... -#ifdef MAGIC - magic_and_defaults(&u); -/* - u.scheme=AorB(u.scheme,AorB(getenv("CUTURL_SCHEME"),"DEFAULT")); - u.username=AorB(u.username,AorB(getenv("CUTURL_USERNAME"),"DEFAULT")); - u.password=AorB(u.password,AorB(getenv("CUTURL_PASSWORD"),"DEFAULT")); - u.domain=AorB(u.domain,AorB(getenv("CURURL_DOMAIN"),"DEFAULT")); - serv=getservbyname(u.scheme,strcmp(u.scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80 - if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port)); - u.port=AorB(u.port,AorB(getenv("CUTURL_PORT"),(serv?sport:"DEFAULT"))); - u.path=AorB(u.path,AorB(getenv("CUTURL_PATH"),"DEFAULT")); - u.query_string=AorB(u.query_string,AorB(getenv("CUTURL_QUERY_STRING"),"DEFAULT")); - u.fragment_id=AorB(u.fragment_id,AorB(getenv("CUTURL_FRAGMENT_ID"),"DEFAULT")); -*/ -#endif - - if((name[0]=getenv("CUTURL__"))) { - setenv("CUTURL__SCHEME",u.scheme,1); - setenv("CUTURL__USERNAME",u.username,1); - setenv("CUTURL__PASSWORD",u.password,1); - setenv("CUTURL__DOMAIN",u.domain,1); - setenv("CUTURL__PORT",u.port,1); - setenv("CUTURL__PATH",u.path,1); - setenv("CUTURL__QUERY_STRING",u.query_string,1); - setenv("CUTURL__FRAGMENT_ID",u.fragment_id,1); - name[1]=0; - switch(fork()) { - case 0: - execv(name[0],name); - perror("execv"); - return errno; - case -1: - perror("fork"); - return errno; - default: - break; - } - wait(&status); - } else { if(c) { for(i=0;i<c;i++) { if(args[i]&F_SCHEME) printf("%s\n",AorB(u.scheme,"")); @@ -175,7 +132,7 @@ int main(int argc,char *argv[]) { if(args[i]&F_PATH) printf("%s\n",AorB(u.path,"")); if(args[i]&F_QUERY_STRING) printf("%s\n",AorB(u.query_string,"")); if(args[i]&F_FRAGMENT_ID) printf("%s\n",AorB(u.fragment_id,"")); - if(args[i]&F_WHOLE_URL) printf("%s\n",url); + if(args[i]&F_WHOLE_URI) printf("%s\n",uri); } } else { printf("scheme: %s\n",u.scheme); @@ -186,10 +143,9 @@ int main(int argc,char *argv[]) { printf("path: %s\n",u.path); printf("query_string: %s\n",u.query_string); printf("fragment_id: %s\n",u.fragment_id); - printf("whole_url: %s\n",url); + printf("whole_uri: %s\n",uri); } - } - free(url);//this is definitely malloc()d + free(uri);//this is definitely malloc()d if(malloced) { free(line); malloced=0; diff --git a/urlescape.c b/uriescape.c index ce8e3e7..da3da7e 100644 --- a/urlescape.c +++ b/uriescape.c @@ -1,13 +1,13 @@ +#include "uri.h" #include <stdio.h> -#include "url.h" int main(int argc,char *argv[]) { int len; char *out; if(argc < 2) return 1; - len=urlescapelength(argv[1],strlen(argv[1])); + len=uriescapelength(argv[1],strlen(argv[1])); out=malloc(len+1); - urlescape(argv[1],out,len); + uriescape(argv[1],out,len); out[len]=0; printf("%s\n",out); return 0; @@ -1,7 +1,7 @@ +#include "uri.h" #include <stdio.h> #include <stdlib.h> #include <string.h> -#include "url.h" #define LINE_LENGTH 1024 @@ -19,16 +19,16 @@ int match(char negate,char *part,char *arg) { int main(int argc,char *argv[]) { int i; int ret=1; - struct url u; + struct uri u; char negate=0; char *line=malloc(LINE_LENGTH); char copy[LINE_LENGTH]; if(argc < 2) { - printf("usage: matchurl [-][n][s|u|k|d|D|P|p|q|f] [string]\n"); + printf("usage: urimatch [-][n][s|u|k|d|D|P|p|q|f] [string]\n"); printf("scheme://username:password@domain:port/path?query_string#fragment_id\n"); printf("s://u:k@d:P/p?q#f\n"); printf("The D flag is special. it matches its argument against the last bytes of the input url's domain.\n"); - printf("This allows matching of subdomains, like `echo epoch.ano | matchurl -D ano` would match.\n"); + printf("This allows matching of subdomains, like `echo epoch.ano | urimatch -D ano` would match.\n"); printf("the 'n' flag can be put before any of the other flags to check for a missing.\n"); return 1; } @@ -37,7 +37,7 @@ int main(int argc,char *argv[]) { if(strchr(line,'\n')) *strchr(line,'\n')=0; strcpy(copy,line); memset(&u,0,sizeof(u)); - urlfromline(&u,line); + urifromline(&u,line); //use the character in argv[1] to match stdin against argv[2]. if match print whole line. for(i=1;i<argc;i+=2) { if(negate) {i--;}//we didn't really need to go that far. @@ -54,9 +54,10 @@ int main(int argc,char *argv[]) { case 'q': if(match(negate,u.query_string,argv[i+1])) { printf("%s\n",copy); ret=0;} break; case 'f': if(match(negate,u.fragment_id,argv[i+1])) { printf("%s\n",copy); ret=0;} break; case 'D': //not sure how to look for a missing one of these. it'd be like d. - if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) + if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) { printf("%s\n",copy); ret=0; + } break; default: printf("unknown url part letter! '%c'\n",argv[i][0]); @@ -2,4 +2,4 @@ cutargs="$(printf "%s\n" "$1" | sed 's/%[^sukdPpqfU]//g' | tr '%' '\n' | tail -n+2 | sed 's/^\(.\).*/-\1/g' | tr '\n' ' ')" count="$(echo $cutargs | tr '-' '\n' | grep -c .)" printfargs="$(printf "%s\n" "$1" | sed 's/%[sukdPpqfU]/%s/g')" -cuturl "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs" +uricut "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs" diff --git a/uristart b/uristart new file mode 100755 index 0000000..78454b7 --- /dev/null +++ b/uristart @@ -0,0 +1,4 @@ +#!/bin/sh +scheme="$(printf "%s\n" "$1" | uricut -s)" +line="$(grep "^${scheme}:" ~/.config/uristart.conf | cut -d: -f2- | sed 's/^[ \t]//g')" +eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | uriprintf "$line")" diff --git a/urlunescape.c b/uriunescape.c index 618cd64..aafc6ea 100644 --- a/urlunescape.c +++ b/uriunescape.c @@ -1,10 +1,10 @@ +#include "uri.h" #include <unistd.h> -#include "url.h" int main(int argc,char *argv[]) { int len; for(argv++,argc--;argc;argc--,argv++) { - len=urlunescape(*argv,*argv); + len=uriunescape(*argv,*argv); write(1,*argv,len); if(argc-1) write(1," ",1); } @@ -1,261 +0,0 @@ -#ifndef URL_H -#define URL_H - -#include <netdb.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> - -//reserved = gen-delims / sub-delims -#define pe_gen_delims ":/?#[]@" -#define pe_sub_delims "!$&'()*+,;=" -//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims; -#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -#define pe_DIGIT "0123456789" -#define pe_HPUT "-._~" -//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT; - -unsigned char rfc3086_percent_encoding[256]; - -#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F')) -#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a) - -char *reserved={ - pe_gen_delims - pe_sub_delims - pe_ALPHA - pe_DIGIT - pe_HPUT -}; - -int urlescapelength(char *in,int len) { - int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end. - int i; - for(i=0;i<len;i++) { - rlen+=strchr(reserved,in[i])?1:3; - } - return rlen; -} - -// make sure your out char * has enough space! use urlescapelength for it. -void urlescape(char *in,char *out,int len) { - int i; - int j; - for(i=0,j=0;i<len;i++) { - if(strchr(reserved,in[i])) { - out[j]=in[i]; - j++; - } else { - out[j]='%'; - j++; - out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)]; - j++; - out[j]="0123456789ABCDEF"[(in[i] % 16)]; - j++; - } - } -} - -int urlunescape(char *in,char *out) { - char *o=out; - char *t; - char a,b; - char *s=in; - if(!strchr(s,'%')) memmove(out,in,strlen(in)); - while((t=strchr(s,'%'))) { - if(t-s) {//if there are actually bytes to copy. - memmove(o,s,t-s); - o+=(t-s); - s+=(t-s); - } - if(isxdigit(t[1]) && isxdigit(t[2])) { - s+=3;//skip the %XX - a=toupper(t[1]); - b=toupper(t[2]); - *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10); - o++; - } else { - s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is. - *o='%'; - o++; - } - } - //copy the last part. - memmove(o,s,strlen(s)); - o[strlen(s)]=0; - return o+strlen(s)-out; -} - -struct url { - char *scheme; - char *username; - char *password; - char *domain; - char *port; - char *path; - char *query_string; - char *fragment_id; -}; - - -/* - schemes are case sensitive but cononicals are lower case. - domain is case insensitive. return it lowercased? - port is optional and in decimal - path - scheme://username:password@domain:port/path?query_string#fragment_id - mailto:username@domain - - optional stuff: - scheme, username, password, port, path, query_string, fragment_id -*/ - -void urlfromline(struct url *u,char *line) { - int i; - char hack=0;//we need to allow for // as host//path separator - //split at first single / into line and path - //this fails to split scheme://host//path into: scheme, host, /path. needs to be first single / or second double-or-more-/ - for(i=0;line[i];i++) { - if(line[i] == '/' && line[i+1] == '/') { - if(!hack) {//only skip out on the first // because it is probably used in the scheme. - hack=1; - i++; - continue; - } - } - if(line[i] == '/') { - line[i]=0; - u->path=line+i+1; - break; - } - } - if(u->path) { - if(strchr(u->path,'?')) { - u->query_string=strchr(u->path,'?'); - *u->query_string=0; - u->query_string++; - } - } - - if(u->query_string) { - if(strchr(u->query_string,'#')) { - u->fragment_id=strchr(u->query_string,'#'); - *u->fragment_id=0; - u->fragment_id++; - } - } - - if(strstr(line,"://")) { - u->scheme=line; - u->domain=strstr(line,"://"); - *u->domain=0; - u->domain+=3; - } else { - u->domain=line; - } - - if(u->domain) { - if(strchr(u->domain,'@')) { - u->username=u->domain; - u->domain=strchr(u->domain,'@'); - *u->domain=0; - u->domain++; - } - } - - if(u->username) { - if(strchr(u->username,':')) { - u->password=strchr(u->username,':'); - *u->password=0; - u->password++; - } - } - - if(u->domain) { - if(strchr(u->domain,']')) {//the end of an IPv6 address - if(strchr(strchr(u->domain,']'),':')) { - u->port=strchr(strchr(u->domain,']'),':'); - if(u->port[1] == '?') {//for magnet links - u->port=0; - } else { - *u->port=0; - u->port++; - } - } - } else { - if(strchr(u->domain,':')) { - u->port=strchr(u->domain,':'); - if(u->port[1] == '?') {//for magnet links - u->port=0; - } else { - *u->port=0; - u->port++; - } - } - } - } - if(u->port) { - for(i=0;u->port[i];i++) { - if(u->port[i] < '0' || u->port[i] > '9') { - //this port number isn't a number! - //it is probably a different portion of the url then... and the domain is probably the scheme. - if(u->domain && !u->scheme) { - u->scheme=u->domain; - u->domain=0; - } - if(!u->path) { - u->path=u->port; - u->port=0; - } - break; - } - } - } - - if(u->domain) {//for magnet links. - if(strchr(u->domain,'?')) { - u->query_string=strchr(u->domain,'?'); - *u->query_string=0; - u->query_string++; - } - } - - if(u->domain) { - if(strchr(u->domain,':') && !strchr(u->domain,']')) {//for scheme:?query_string - u->scheme=u->domain; - *strchr(u->scheme,':')=0; - u->domain=0; - } - } - - if(!u->scheme && u->username) {//for mailto: - u->scheme=u->username; - u->username=u->password; - u->password=0; - } -} - -#define AorB(a,b) ((a)?(a):(b)) - -void magic_and_defaults(struct url *u) { - struct servent *serv; - char sport[10]; - u->scheme=AorB(u->scheme,AorB(getenv("URL_SCHEME"),"DEFAULT")); - u->username=AorB(u->username,AorB(getenv("URL_USERNAME"),"DEFAULT")); - u->password=AorB(u->password,AorB(getenv("URL_PASSWORD"),"DEFAULT")); - u->domain=AorB(u->domain,AorB(getenv("URL_DOMAIN"),"DEFAULT")); - serv=getservbyname(u->scheme,strcmp(u->scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80 - if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port)); -// else snprintf(sport,sizeof(sport)-1,"%d",serv); - u->port=AorB(u->port,AorB(getenv("URL_PORT"),(serv?strdup(sport):"DEFAULT"))); - -// if(!strcmp(u->port,"DEFAULT")) { - //this shouldn't happen most of the time. :/ -// printf("serv: %d\nsport: %s\nu->scheme: %s\n",serv,sport,u->scheme); -// } - - u->path=AorB(u->path,AorB(getenv("URL_PATH"),"DEFAULT")); - u->query_string=AorB(u->query_string,AorB(getenv("URL_QUERY_STRING"),"DEFAULT")); - u->fragment_id=AorB(u->fragment_id,AorB(getenv("URL_FRAGMENT_ID"),"DEFAULT")); -} -#endif |