summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore7
-rw-r--r--Makefile34
-rwxr-xr-xstart4
-rw-r--r--start.conf.example8
-rw-r--r--uri.h253
-rw-r--r--uricmp.c20
-rw-r--r--uricut.c (renamed from cuturl.c)74
-rw-r--r--uriescape.c (renamed from urlescape.c)6
-rw-r--r--urimatch.c (renamed from matchurl.c)13
-rwxr-xr-xuriprintf (renamed from printfurl)2
-rwxr-xr-xuristart4
-rw-r--r--uriunescape.c (renamed from urlunescape.c)4
-rw-r--r--url.h261
13 files changed, 328 insertions, 362 deletions
diff --git a/.gitignore b/.gitignore
index 805d9dc..499d9be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
-matchurl
-cuturl
+uricmp
+uricut
+uriescape
+urimatch
+uriunescape
diff --git a/Makefile b/Makefile
index 7edc560..b67a2d6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,26 +1,28 @@
-CFLAGS=-std=c99 -pedantic -Wall
-PREFIX=/usr/local
-CC=gcc
+CFLAGS:=-std=c11 -pedantic -Wall
+PREFIX:=/usr/local
+CC:=gcc
-all: cuturl matchurl urlunescape urlescape
+all: uricut urimatch uriunescape uriescape uricmp
-matchurl: matchurl.c url.h
+urimatch: urimatch.c uri.h
-cuturl: cuturl.c url.h
+uricut: uricut.c uri.h
-urlunescape: urlunescape.c url.h
+uricmp: uricmp.c uri.h
-urlescape: urlescape.c url.h
+uriunescape: uriunescape.c uri.h
+
+uriescape: uriescape.c uri.h
clean:
- rm -f matchurl
- rm -f cuturl
+ rm -f uricut urimatch uriunescape uriescape uricmp
rm -f *.o
install: all
- install matchurl $(PREFIX)/bin/matchurl
- install cuturl $(PREFIX)/bin/cuturl
- install start $(PREFIX)/bin/start
- install printfurl $(PREFIX)/bin/printfurl
- install urlunescape $(PREFIX)/bin/urlunescape
- install urlescape $(PREFIX)/bin/urlescape
+ install urimatch $(PREFIX)/bin/urimatch
+ install uricut $(PREFIX)/bin/uricut
+ install uricmp $(PREFIX)/bin/uricmp
+ install uristart $(PREFIX)/bin/uristart
+ install uriprintf $(PREFIX)/bin/uriprintf
+ install uriunescape $(PREFIX)/bin/uriunescape
+ install uriescape $(PREFIX)/bin/uriescape
diff --git a/start b/start
deleted file mode 100755
index 9a3dca6..0000000
--- a/start
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-scheme="$(printf "%s\n" "$1" | cuturl -s)"
-line="$(grep "^${scheme}:" ~/.config/start.conf | cut -d: -f2-)"
-eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | printfurl "$line")"
diff --git a/start.conf.example b/start.conf.example
deleted file mode 100644
index 6864da3..0000000
--- a/start.conf.example
+++ /dev/null
@@ -1,8 +0,0 @@
-### start single-quote escapes the url parts so place them inside single-quote or else!
-### (if you're given a bad link someone might be able to run shell commands)
-finger:printf "%%s\r\n" '%p' | ncat '%d' 79 | tr -d '\r' | xmessage -file -
-### new! subshells works
-whois:whois "$(printf '%%s\\\\n' '%d' | sed 's/^..*$/-h/')" '%d' '%p' | xmessage -file -
-irc:x-terminal-emulator -e irssi -c '%d' -p '%P'
-http:dillo '%U'
-DEFAULT:xdg-open '%U'
diff --git a/uri.h b/uri.h
new file mode 100644
index 0000000..97ce3c2
--- /dev/null
+++ b/uri.h
@@ -0,0 +1,253 @@
+#ifndef uri_H
+#define uri_H
+
+#define _XOPEN_SOURCE 500 //for strdup
+#include <string.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+//uri_reserved = gen-delims / sub-delims
+#define pe_gen_delims ":/?#[]@"
+#define pe_sub_delims "!$&'()*+,;="
+//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims;
+#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+#define pe_DIGIT "0123456789"
+#define pe_HPUT "-._~"
+//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT;
+
+unsigned char rfc3086_percent_encoding[256];
+
+#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F'))
+#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a)
+
+char *uri_reserved={
+ pe_gen_delims
+ pe_sub_delims
+ pe_ALPHA
+ pe_DIGIT
+ pe_HPUT
+};
+
+int uriescapelength(char *in,int len) {
+ int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end.
+ int i;
+ for(i=0;i<len;i++) {
+ rlen+=strchr(uri_reserved,in[i])?1:3;
+ }
+ return rlen;
+}
+
+// make sure your out char * has enough space! use uriescapelength for it.
+void uriescape(char *in,char *out,int len) {
+ int i;
+ int j;
+ for(i=0,j=0;i<len;i++) {
+ if(strchr(uri_reserved,in[i])) {
+ out[j]=in[i];
+ j++;
+ } else {
+ out[j]='%';
+ j++;
+ out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)];
+ j++;
+ out[j]="0123456789ABCDEF"[(in[i] % 16)];
+ j++;
+ }
+ }
+}
+
+int uriunescape(char *in,char *out) {
+ char *o=out;
+ char *t;
+ char a,b;
+ char *s=in;
+ if(!strchr(s,'%')) memmove(out,in,strlen(in));
+ while((t=strchr(s,'%'))) {
+ if(t-s) {//if there are actually bytes to copy.
+ memmove(o,s,t-s);
+ o+=(t-s);
+ s+=(t-s);
+ }
+ if(isxdigit(t[1]) && isxdigit(t[2])) {
+ s+=3;//skip the %XX
+ a=toupper(t[1]);
+ b=toupper(t[2]);
+ *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10);
+ o++;
+ } else {
+ s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is.
+ *o='%';
+ o++;
+ }
+ }
+ //copy the last part.
+ memmove(o,s,strlen(s));
+ o[strlen(s)]=0;
+ return o+strlen(s)-out;
+}
+
+struct uri {//warning. it is technically undefined behavior to set one half of a union then use the other half.
+ union {
+ char *A[8];
+ struct {
+ union { char *s;char *scheme; };
+ union { char *u;char *username; };
+ union { char *k;char *password; };
+ union { char *d;char *domain; };
+ union { char *P;char *port; };
+ union { char *p;char *path; };
+ union { char *q;char *query_string; };
+ union { char *f;char *fragment_id; };
+ };
+ };
+};
+
+//returns 0 on success, returns a byte with bits set for non-matching pieces.
+unsigned int uricmp(struct uri *a,struct uri *b) {
+ int i;
+ int ret=0;
+ for(i=0;i<8;i++) {
+ if(a->A[i] && !b->A[i]) ret |=(1<<(i+8));//we have a's but not b's
+ if(!a->A[i] && b->A[i]) ret |=(1<<(i+16));
+ //for testing if(!a->A[i] && !b->A[i]) ret |=(1<<(i+24));//no problem here. both empty.
+ if(a->A[i] && b->A[i]) {
+ if(strcmp(a->A[i],b->A[i])) {
+ ret|=(1<<i);
+ }
+ }
+ }
+ return ret;
+}
+
+/*
+ schemes are case sensitive but cononicals are lower case.
+ domain is case insensitive. return it lowercased?
+ port is optional and in decimal
+ path
+ scheme://username:password@domain:port/path?query_string#fragment_id
+ mailto:username@domain
+
+ optional stuff:
+ scheme, username, password, port, path, query_string, fragment_id
+*/
+
+//should it be a dick about what characters are allowed?
+//should it just try to ignore weird shit?
+
+//return 0 on fail //not sure what this means.
+//return 1 on success
+int urifromline(struct uri *u,char *line) {
+ //these first two are easy. the rest... not so much.
+ char *t;
+// memset(u,0,sizeof(struct uri)); //this function shouldn't do this.
+ if((u->fragment_id=strchr(line,'#'))) {
+ *u->fragment_id=0;
+ u->fragment_id++;
+ }
+ if((u->query_string=strchr(line,'?'))) {
+ *u->query_string=0;
+ u->query_string++;
+ }
+ //now we have scheme, user, pass, domain, port, and path. maybe.
+ //what character can we split on now? : is a terrible choice.
+ // how about /? first / is either a separator between scheme
+ //could find the first non-scheme character.
+ //so we might have... scheme://user:pass@host:port/path
+ //or... user:pass@host:port/path ?
+ //we need to do this based on /s
+ // we're either going to find the scheme and authority separator
+ // or we're going to find the start of a path.
+ //there: scheme:/path, scheme://host (empty path), or scheme:path/morepath
+ //or... should we do paths without
+ //scheme must start with a-z
+/* if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe
+ u->path=line;
+ return;//we're done. nothing else to do.
+ }
+ if(*line == '.') { //we have a relative path like: ./derp or ../merp
+ u->path=line;
+ return;//we're done here. nothing else to do.
+ }*/
+ //let's see if this starts with a scheme
+ if(strchr(line,':') && ((*line >= 'a' && *line <= 'z') || (*line >= 'A' && *line <= 'Z'))) {
+ for(u->scheme=strchr(line,':')-1;u->scheme > line;u->scheme--) {
+ if((*u->scheme >= 'a' && *u->scheme <= 'z') ||
+ (*u->scheme >= 'A' && *u->scheme <= 'Z') ||
+ (*u->scheme >= '0' && *u->scheme <= '9') ||
+ *u->scheme == '+' || *u->scheme == '-' || *u->scheme == '.') {
+ //this is still a scheme.
+ } else {
+ break;
+ }
+ }
+ if(u->scheme == line) {//we got through the for loop alright. line starts with a scheme.
+ line=strchr(line,':');
+ *line=0;
+ line++;
+ for(t=u->scheme;*t;t++) {
+ if(*t >= 'A' && *t <= 'Z') *t+=' ';
+ }
+ }
+ }
+
+ //copy-pasted from above the scheme strip attempt.
+ if(*line == '/' && *(line+1) != '/') { //we have a relative path. /like:this.maybe
+ u->path=line;
+ return 1;//we're done. nothing else to do.
+ }
+ if(*line == '.') { //we have a relative path like: ./derp or ../merp
+ u->path=line;
+ return 1;//we're done here. nothing else to do.
+ }
+
+ if(*line == '/' && line[1] == '/') {//we have an authority section.
+ //let's left-shift this shit over until the third /
+ for(t=line+1;*(t+1) && *(t+1) != '/';t++) {
+ *t=*(t+1);
+ }
+ *t=0;
+ u->path=t+1;//if there was a /, path points at it and the stuff after.
+ //if there wasn't a /, it points at a null byte. so "empty"
+ u->username=line+1;
+ } else {
+ //we have all we need.
+ return 1;
+ }
+
+ if(u->username) {//this contains all of the authority.
+ if((u->domain=strchr(u->username,'@'))) {//we have user@host at least.
+ *u->domain=0;
+ u->domain++;
+ } else {//this isn't really a username. it is the domain.
+ u->domain=u->username;
+ u->username=0;
+ }
+ }
+ //if we still have u->username we try to split to user and password
+ if(u->username) {
+ if((u->password=strchr(u->username,':'))) {
+ *u->password=0;
+ u->password++;
+ }
+ }
+ if(u->domain) {
+ if((u->port=strchr(u->domain,']')) && *u->domain == '[') {//this is an IPv6 host
+ *u->port=0;
+ u->port++;
+ if(*u->port == ':') {
+ *u->port=0;
+ u->port++;//if it ends up being empty, whatever. that's a URI like: http://host:/path
+ }
+ } else { //we're safe to split port off at :
+ if((u->port=strchr(u->domain,':'))) {
+ *u->port=0;
+ u->port++;
+ } //there isn't a port. leave it unset.
+ }
+ }
+ //I dunno.
+ return 1;
+}
+
+#endif
diff --git a/uricmp.c b/uricmp.c
new file mode 100644
index 0000000..9af0fed
--- /dev/null
+++ b/uricmp.c
@@ -0,0 +1,20 @@
+#include "uri.h"
+#include <stdio.h>
+
+int main(int argc,char *argv[]) {
+ int i;
+ int ret;
+ struct uri *a=malloc(sizeof(struct uri));
+ struct uri *b=malloc(sizeof(struct uri));
+ if(argc < 3) {
+ fprintf(stderr,"usage: uricmp uri1 uri2\n");//we didn't ask for usage so it goes to stderr
+ return 1;
+ }
+ urifromline(a,argv[1]);
+ urifromline(b,argv[2]);
+ for(i=0;i<8;i++) {
+ printf("%s ? %s\n",a->A[i],b->A[i]);
+ }
+ printf("%08x\n",ret=uricmp(a,b));
+ return ret > 0 ? 2 : 0;
+}
diff --git a/cuturl.c b/uricut.c
index f1cbf42..5fe8764 100644
--- a/cuturl.c
+++ b/uricut.c
@@ -1,3 +1,5 @@
+#include "uri.h"
+
#include <netdb.h>
#include <stdio.h>
#include <string.h>
@@ -6,10 +8,6 @@
#include <unistd.h>
#include <sys/wait.h>
-#include "url.h"
-
-#define MAGIC
-
/*
schemes are case sensitive but cononicals are lower case.
domain is case insensitive. return it lowercased?
@@ -32,32 +30,30 @@
#define F_PATH 1<<5
#define F_QUERY_STRING 1<<6
#define F_FRAGMENT_ID 1<<7
-#define F_WHOLE_URL 1<<8
+#define F_WHOLE_URI 1<<8
-char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URL",0};
+char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id","URI",0};
char *short_opts[]={"s","u","k","d","P","p","q","f","U"};
int main(int argc,char *argv[]) {
- char *url;
- char *name[2];
+ char *uri;
char *line=0;
- short args[256];//this needs to be a short to make room for the F_WHOLE_URL
+ short args[256];//this needs to be a short to make room for the F_WHOLE_URI
int i,j,c=0;
int size=1024;
- int status;
char fixme=0;
char using_stdin=1;
char malloced=0;
- struct url u;
+ struct uri u;
if(argc > 1) {
if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) {
- printf("usage: echo urls | cuturl [options]\n");
- printf("usage: cuturl [options] url [options] [url]\n\n");
+ printf("usage: echo uris | uricut [options]\n");
+ printf("usage: uricut [options] uri [options] [uri]\n\n");
printf("options: \n");
for(i=0;long_opts[i];i++) {
printf(" -%s|--%s\n",short_opts[i],long_opts[i]);
}
- printf("To set default values use environment variables like: CUTURL_[OPTION]\n");
+ printf("To set default values use environment variables like: CUTURI_[OPTION]\n");
return 2;
}
}
@@ -120,51 +116,12 @@ int main(int argc,char *argv[]) {
for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++);
line[i]=0;
- url=strdup(line);
- urlfromline(&u,line);
+ uri=strdup(line);
+ urifromline(&u,line);
// printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n");
//let's set them to what'll get printed now...
-#ifdef MAGIC
- magic_and_defaults(&u);
-/*
- u.scheme=AorB(u.scheme,AorB(getenv("CUTURL_SCHEME"),"DEFAULT"));
- u.username=AorB(u.username,AorB(getenv("CUTURL_USERNAME"),"DEFAULT"));
- u.password=AorB(u.password,AorB(getenv("CUTURL_PASSWORD"),"DEFAULT"));
- u.domain=AorB(u.domain,AorB(getenv("CURURL_DOMAIN"),"DEFAULT"));
- serv=getservbyname(u.scheme,strcmp(u.scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80
- if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port));
- u.port=AorB(u.port,AorB(getenv("CUTURL_PORT"),(serv?sport:"DEFAULT")));
- u.path=AorB(u.path,AorB(getenv("CUTURL_PATH"),"DEFAULT"));
- u.query_string=AorB(u.query_string,AorB(getenv("CUTURL_QUERY_STRING"),"DEFAULT"));
- u.fragment_id=AorB(u.fragment_id,AorB(getenv("CUTURL_FRAGMENT_ID"),"DEFAULT"));
-*/
-#endif
-
- if((name[0]=getenv("CUTURL__"))) {
- setenv("CUTURL__SCHEME",u.scheme,1);
- setenv("CUTURL__USERNAME",u.username,1);
- setenv("CUTURL__PASSWORD",u.password,1);
- setenv("CUTURL__DOMAIN",u.domain,1);
- setenv("CUTURL__PORT",u.port,1);
- setenv("CUTURL__PATH",u.path,1);
- setenv("CUTURL__QUERY_STRING",u.query_string,1);
- setenv("CUTURL__FRAGMENT_ID",u.fragment_id,1);
- name[1]=0;
- switch(fork()) {
- case 0:
- execv(name[0],name);
- perror("execv");
- return errno;
- case -1:
- perror("fork");
- return errno;
- default:
- break;
- }
- wait(&status);
- } else {
if(c) {
for(i=0;i<c;i++) {
if(args[i]&F_SCHEME) printf("%s\n",AorB(u.scheme,""));
@@ -175,7 +132,7 @@ int main(int argc,char *argv[]) {
if(args[i]&F_PATH) printf("%s\n",AorB(u.path,""));
if(args[i]&F_QUERY_STRING) printf("%s\n",AorB(u.query_string,""));
if(args[i]&F_FRAGMENT_ID) printf("%s\n",AorB(u.fragment_id,""));
- if(args[i]&F_WHOLE_URL) printf("%s\n",url);
+ if(args[i]&F_WHOLE_URI) printf("%s\n",uri);
}
} else {
printf("scheme: %s\n",u.scheme);
@@ -186,10 +143,9 @@ int main(int argc,char *argv[]) {
printf("path: %s\n",u.path);
printf("query_string: %s\n",u.query_string);
printf("fragment_id: %s\n",u.fragment_id);
- printf("whole_url: %s\n",url);
+ printf("whole_uri: %s\n",uri);
}
- }
- free(url);//this is definitely malloc()d
+ free(uri);//this is definitely malloc()d
if(malloced) {
free(line);
malloced=0;
diff --git a/urlescape.c b/uriescape.c
index ce8e3e7..da3da7e 100644
--- a/urlescape.c
+++ b/uriescape.c
@@ -1,13 +1,13 @@
+#include "uri.h"
#include <stdio.h>
-#include "url.h"
int main(int argc,char *argv[]) {
int len;
char *out;
if(argc < 2) return 1;
- len=urlescapelength(argv[1],strlen(argv[1]));
+ len=uriescapelength(argv[1],strlen(argv[1]));
out=malloc(len+1);
- urlescape(argv[1],out,len);
+ uriescape(argv[1],out,len);
out[len]=0;
printf("%s\n",out);
return 0;
diff --git a/matchurl.c b/urimatch.c
index 47a2a20..42ee0aa 100644
--- a/matchurl.c
+++ b/urimatch.c
@@ -1,7 +1,7 @@
+#include "uri.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include "url.h"
#define LINE_LENGTH 1024
@@ -19,16 +19,16 @@ int match(char negate,char *part,char *arg) {
int main(int argc,char *argv[]) {
int i;
int ret=1;
- struct url u;
+ struct uri u;
char negate=0;
char *line=malloc(LINE_LENGTH);
char copy[LINE_LENGTH];
if(argc < 2) {
- printf("usage: matchurl [-][n][s|u|k|d|D|P|p|q|f] [string]\n");
+ printf("usage: urimatch [-][n][s|u|k|d|D|P|p|q|f] [string]\n");
printf("scheme://username:password@domain:port/path?query_string#fragment_id\n");
printf("s://u:k@d:P/p?q#f\n");
printf("The D flag is special. it matches its argument against the last bytes of the input url's domain.\n");
- printf("This allows matching of subdomains, like `echo epoch.ano | matchurl -D ano` would match.\n");
+ printf("This allows matching of subdomains, like `echo epoch.ano | urimatch -D ano` would match.\n");
printf("the 'n' flag can be put before any of the other flags to check for a missing.\n");
return 1;
}
@@ -37,7 +37,7 @@ int main(int argc,char *argv[]) {
if(strchr(line,'\n')) *strchr(line,'\n')=0;
strcpy(copy,line);
memset(&u,0,sizeof(u));
- urlfromline(&u,line);
+ urifromline(&u,line);
//use the character in argv[1] to match stdin against argv[2]. if match print whole line.
for(i=1;i<argc;i+=2) {
if(negate) {i--;}//we didn't really need to go that far.
@@ -54,9 +54,10 @@ int main(int argc,char *argv[]) {
case 'q': if(match(negate,u.query_string,argv[i+1])) { printf("%s\n",copy); ret=0;} break;
case 'f': if(match(negate,u.fragment_id,argv[i+1])) { printf("%s\n",copy); ret=0;} break;
case 'D': //not sure how to look for a missing one of these. it'd be like d.
- if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1])))
+ if(u.domain && argv[i+1] && strlen(u.domain) >= strlen(argv[i+1]) && !strncmp(u.domain+strlen(u.domain)-strlen(argv[i+1]),argv[i+1],strlen(argv[i+1]))) {
printf("%s\n",copy);
ret=0;
+ }
break;
default:
printf("unknown url part letter! '%c'\n",argv[i][0]);
diff --git a/printfurl b/uriprintf
index c141bc1..f79d067 100755
--- a/printfurl
+++ b/uriprintf
@@ -2,4 +2,4 @@
cutargs="$(printf "%s\n" "$1" | sed 's/%[^sukdPpqfU]//g' | tr '%' '\n' | tail -n+2 | sed 's/^\(.\).*/-\1/g' | tr '\n' ' ')"
count="$(echo $cutargs | tr '-' '\n' | grep -c .)"
printfargs="$(printf "%s\n" "$1" | sed 's/%[sukdPpqfU]/%s/g')"
-cuturl "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs"
+uricut "$cutargs" | tr '\n' '\0' | xargs -n $count -0 printf "$printfargs"
diff --git a/uristart b/uristart
new file mode 100755
index 0000000..78454b7
--- /dev/null
+++ b/uristart
@@ -0,0 +1,4 @@
+#!/bin/sh
+scheme="$(printf "%s\n" "$1" | uricut -s)"
+line="$(grep "^${scheme}:" ~/.config/uristart.conf | cut -d: -f2- | sed 's/^[ \t]//g')"
+eval "$(printf "%s\n" "$1" | sed 's/'\''/'\''\\'\'''\''/g' | uriprintf "$line")"
diff --git a/urlunescape.c b/uriunescape.c
index 618cd64..aafc6ea 100644
--- a/urlunescape.c
+++ b/uriunescape.c
@@ -1,10 +1,10 @@
+#include "uri.h"
#include <unistd.h>
-#include "url.h"
int main(int argc,char *argv[]) {
int len;
for(argv++,argc--;argc;argc--,argv++) {
- len=urlunescape(*argv,*argv);
+ len=uriunescape(*argv,*argv);
write(1,*argv,len);
if(argc-1) write(1," ",1);
}
diff --git a/url.h b/url.h
deleted file mode 100644
index 6304a48..0000000
--- a/url.h
+++ /dev/null
@@ -1,261 +0,0 @@
-#ifndef URL_H
-#define URL_H
-
-#include <netdb.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-//reserved = gen-delims / sub-delims
-#define pe_gen_delims ":/?#[]@"
-#define pe_sub_delims "!$&'()*+,;="
-//char *pe_reserved[]=pe_gen_delims "" pe_sub_delims;
-#define pe_ALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
-#define pe_DIGIT "0123456789"
-#define pe_HPUT "-._~"
-//char *pe_unreserved[]=pe_ALPHA "" pe_DIGIT "" pe_HPUT;
-
-unsigned char rfc3086_percent_encoding[256];
-
-#define isxdigit(a) ((a >= 'a' && a <= 'f') || (a >= '0' && a <= '9') || (a >= 'A' && a <= 'F'))
-#define toupper(a) ((a >= 'a' && a <= 'z')?a-' ':a)
-
-char *reserved={
- pe_gen_delims
- pe_sub_delims
- pe_ALPHA
- pe_DIGIT
- pe_HPUT
-};
-
-int urlescapelength(char *in,int len) {
- int rlen=0;//be sure to add one to this return value if you plan on putting a null byte at the end.
- int i;
- for(i=0;i<len;i++) {
- rlen+=strchr(reserved,in[i])?1:3;
- }
- return rlen;
-}
-
-// make sure your out char * has enough space! use urlescapelength for it.
-void urlescape(char *in,char *out,int len) {
- int i;
- int j;
- for(i=0,j=0;i<len;i++) {
- if(strchr(reserved,in[i])) {
- out[j]=in[i];
- j++;
- } else {
- out[j]='%';
- j++;
- out[j]="0123456789ABCDEF"[(in[i] >> 4 & 0x15)];
- j++;
- out[j]="0123456789ABCDEF"[(in[i] % 16)];
- j++;
- }
- }
-}
-
-int urlunescape(char *in,char *out) {
- char *o=out;
- char *t;
- char a,b;
- char *s=in;
- if(!strchr(s,'%')) memmove(out,in,strlen(in));
- while((t=strchr(s,'%'))) {
- if(t-s) {//if there are actually bytes to copy.
- memmove(o,s,t-s);
- o+=(t-s);
- s+=(t-s);
- }
- if(isxdigit(t[1]) && isxdigit(t[2])) {
- s+=3;//skip the %XX
- a=toupper(t[1]);
- b=toupper(t[2]);
- *o=((a-'0'<10 ? a-'0' : a-'A'+10) << 4) + (b-'0'<10 ? b-'0' : b-'A'+10);
- o++;
- } else {
- s++;//skip just the %. the next character might be a % //TODO: look up what the "right" thing to do here is.
- *o='%';
- o++;
- }
- }
- //copy the last part.
- memmove(o,s,strlen(s));
- o[strlen(s)]=0;
- return o+strlen(s)-out;
-}
-
-struct url {
- char *scheme;
- char *username;
- char *password;
- char *domain;
- char *port;
- char *path;
- char *query_string;
- char *fragment_id;
-};
-
-
-/*
- schemes are case sensitive but cononicals are lower case.
- domain is case insensitive. return it lowercased?
- port is optional and in decimal
- path
- scheme://username:password@domain:port/path?query_string#fragment_id
- mailto:username@domain
-
- optional stuff:
- scheme, username, password, port, path, query_string, fragment_id
-*/
-
-void urlfromline(struct url *u,char *line) {
- int i;
- char hack=0;//we need to allow for // as host//path separator
- //split at first single / into line and path
- //this fails to split scheme://host//path into: scheme, host, /path. needs to be first single / or second double-or-more-/
- for(i=0;line[i];i++) {
- if(line[i] == '/' && line[i+1] == '/') {
- if(!hack) {//only skip out on the first // because it is probably used in the scheme.
- hack=1;
- i++;
- continue;
- }
- }
- if(line[i] == '/') {
- line[i]=0;
- u->path=line+i+1;
- break;
- }
- }
- if(u->path) {
- if(strchr(u->path,'?')) {
- u->query_string=strchr(u->path,'?');
- *u->query_string=0;
- u->query_string++;
- }
- }
-
- if(u->query_string) {
- if(strchr(u->query_string,'#')) {
- u->fragment_id=strchr(u->query_string,'#');
- *u->fragment_id=0;
- u->fragment_id++;
- }
- }
-
- if(strstr(line,"://")) {
- u->scheme=line;
- u->domain=strstr(line,"://");
- *u->domain=0;
- u->domain+=3;
- } else {
- u->domain=line;
- }
-
- if(u->domain) {
- if(strchr(u->domain,'@')) {
- u->username=u->domain;
- u->domain=strchr(u->domain,'@');
- *u->domain=0;
- u->domain++;
- }
- }
-
- if(u->username) {
- if(strchr(u->username,':')) {
- u->password=strchr(u->username,':');
- *u->password=0;
- u->password++;
- }
- }
-
- if(u->domain) {
- if(strchr(u->domain,']')) {//the end of an IPv6 address
- if(strchr(strchr(u->domain,']'),':')) {
- u->port=strchr(strchr(u->domain,']'),':');
- if(u->port[1] == '?') {//for magnet links
- u->port=0;
- } else {
- *u->port=0;
- u->port++;
- }
- }
- } else {
- if(strchr(u->domain,':')) {
- u->port=strchr(u->domain,':');
- if(u->port[1] == '?') {//for magnet links
- u->port=0;
- } else {
- *u->port=0;
- u->port++;
- }
- }
- }
- }
- if(u->port) {
- for(i=0;u->port[i];i++) {
- if(u->port[i] < '0' || u->port[i] > '9') {
- //this port number isn't a number!
- //it is probably a different portion of the url then... and the domain is probably the scheme.
- if(u->domain && !u->scheme) {
- u->scheme=u->domain;
- u->domain=0;
- }
- if(!u->path) {
- u->path=u->port;
- u->port=0;
- }
- break;
- }
- }
- }
-
- if(u->domain) {//for magnet links.
- if(strchr(u->domain,'?')) {
- u->query_string=strchr(u->domain,'?');
- *u->query_string=0;
- u->query_string++;
- }
- }
-
- if(u->domain) {
- if(strchr(u->domain,':') && !strchr(u->domain,']')) {//for scheme:?query_string
- u->scheme=u->domain;
- *strchr(u->scheme,':')=0;
- u->domain=0;
- }
- }
-
- if(!u->scheme && u->username) {//for mailto:
- u->scheme=u->username;
- u->username=u->password;
- u->password=0;
- }
-}
-
-#define AorB(a,b) ((a)?(a):(b))
-
-void magic_and_defaults(struct url *u) {
- struct servent *serv;
- char sport[10];
- u->scheme=AorB(u->scheme,AorB(getenv("URL_SCHEME"),"DEFAULT"));
- u->username=AorB(u->username,AorB(getenv("URL_USERNAME"),"DEFAULT"));
- u->password=AorB(u->password,AorB(getenv("URL_PASSWORD"),"DEFAULT"));
- u->domain=AorB(u->domain,AorB(getenv("URL_DOMAIN"),"DEFAULT"));
- serv=getservbyname(u->scheme,strcmp(u->scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80
- if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port));
-// else snprintf(sport,sizeof(sport)-1,"%d",serv);
- u->port=AorB(u->port,AorB(getenv("URL_PORT"),(serv?strdup(sport):"DEFAULT")));
-
-// if(!strcmp(u->port,"DEFAULT")) {
- //this shouldn't happen most of the time. :/
-// printf("serv: %d\nsport: %s\nu->scheme: %s\n",serv,sport,u->scheme);
-// }
-
- u->path=AorB(u->path,AorB(getenv("URL_PATH"),"DEFAULT"));
- u->query_string=AorB(u->query_string,AorB(getenv("URL_QUERY_STRING"),"DEFAULT"));
- u->fragment_id=AorB(u->fragment_id,AorB(getenv("URL_FRAGMENT_ID"),"DEFAULT"));
-}
-#endif