diff options
-rw-r--r-- | cuturl.c | 191 | ||||
-rw-r--r-- | matchurl.c | 41 | ||||
-rw-r--r-- | url.c | 131 | ||||
-rw-r--r-- | url.h | 15 |
4 files changed, 378 insertions, 0 deletions
diff --git a/cuturl.c b/cuturl.c new file mode 100644 index 0000000..3f09020 --- /dev/null +++ b/cuturl.c @@ -0,0 +1,191 @@ +#include <netdb.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <sys/wait.h> + +#include "url.h" + +/* + schemes are case sensitive but cononicals are lower case. + domain is case insensitive. return it lowercased? + port is optional and in decimal + path + scheme://username:password@domain:port/path?query_string#fragment_id + mailto:username@domain + + optional stuff: + scheme, username, password, port, path, query_string, fragment_id +*/ + +#define AorB(a,b) ((a)?(a):(b)) + +#define F_SCHEME 1<<0 +#define F_USERNAME 1<<1 +#define F_PASSWORD 1<<2 +#define F_DOMAIN 1<<3 +#define F_PORT 1<<4 +#define F_PATH 1<<5 +#define F_QUERY_STRING 1<<6 +#define F_FRAGMENT_ID 1<<7 + +char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id",0}; +char *short_opts[]={"s","u","k","d","P","p","q","f"}; + +int main(int argc,char *argv[]) { + char *name[2]; + char *line=0; + char sport[10]; + char args[256]; + struct servent *serv; + int i,j,c=0; + int size=1024; + int status; + char fixme=0; + char using_stdin=1; + char malloced=0; + struct url u; + if(argc > 1) { + if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) { + printf("usage: echo urls | cuturl [options]\n"); + printf("usage: cuturl [options] url [options] [url]\n\n"); + printf("options: \n"); + for(i=0;long_opts[i];i++) { + printf(" -%s|--%s\n",short_opts[i],long_opts[i]); + } + printf("To set default values use environment variables like: CUTURL_[OPTION]\n"); + return 2; + } + } + argv++; + argc--; + while(1) { + u.scheme=0; + u.username=0; + u.password=0; + u.domain=0; + u.port=0; + u.path=0; + u.query_string=0; + u.fragment_id=0; + if(!using_stdin) c=0; + if(argc >= 1) { + for(;argc>0;argc--,argv++) { + for(i=0;long_opts[i];i++) { + if(!strncmp(*argv,"--",2)) { + if(!strcmp(*argv+2,long_opts[i])) { + args[c]=1<<i; + c++; + break; + } + } + } + fixme=0; + if(**argv=='-' && argv[0][1] != '-') { + for(j=1;argv[0][j];j++) { + for(i=0;short_opts[i];i++) { + if(argv[0][j]==*short_opts[i]) { + args[c]=1<<i; + c++; + fixme=1; + } + } + } + } + if(fixme) continue; + if(long_opts[i]) continue; + //if we get here we are at data instead of flags. work on it. + line=*argv; + using_stdin=0; + argc--; + argv++; + break; + } + } + if(!argc && !line) {//if we are out of arguments and it didn't include data + using_stdin=1; + } + if(using_stdin) { + line=malloc(size+1); + malloced=1; + if(!fgets(line,size,stdin)) { + return 0; + } + } + if(!line) return 0; + for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++); + line[i]=0; + + urlfromline(&u,line); + + // printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n"); + //let's set them to what'll get printed now... + + u.scheme=AorB(u.scheme,AorB(getenv("CUTURL_SCHEME"),"DEFAULT")); + u.username=AorB(u.username,AorB(getenv("CUTURL_USERNAME"),"DEFAULT")); + u.password=AorB(u.password,AorB(getenv("CUTURL_PASSWORD"),"DEFAULT")); + u.domain=AorB(u.domain,AorB(getenv("CURURL_DOMAIN"),"DEFAULT")); + serv=getservbyname(u.scheme,strcmp(u.scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80 + if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port)); + u.port=AorB(u.port,AorB(getenv("CUTURL_PORT"),(serv?sport:"DEFAULT"))); + u.path=AorB(u.path,AorB(getenv("CUTURL_PATH"),"DEFAULT")); + u.query_string=AorB(u.query_string,AorB(getenv("CUTURL_QUERY_STRING"),"DEFAULT")); + u.fragment_id=AorB(u.fragment_id,AorB(getenv("CUTURL_FRAGMENT_ID"),"DEFAULT")); + + if((name[0]=getenv("CUTURL__"))) { + setenv("CUTURL__SCHEME",u.scheme,1); + setenv("CUTURL__USERNAME",u.username,1); + setenv("CUTURL__PASSWORD",u.password,1); + setenv("CUTURL__DOMAIN",u.domain,1); + setenv("CUTURL__PORT",u.port,1); + setenv("CUTURL__PATH",u.path,1); + setenv("CUTURL__QUERY_STRING",u.query_string,1); + setenv("CUTURL__FRAGMENT_ID",u.fragment_id,1); + name[1]=0; + switch(fork()) { + case 0: + execv(name[0],name); + perror("execv"); + return errno; + case -1: + perror("fork"); + return errno; + default: + break; + } + wait(&status); + } else { + if(c) { + for(i=0;i<c;i++) { + if(args[i]&F_SCHEME) printf("%s\n",u.scheme); + if(args[i]&F_USERNAME) printf("%s\n",u.username); + if(args[i]&F_PASSWORD) printf("%s\n",u.password); + if(args[i]&F_DOMAIN) printf("%s\n",u.domain); + if(args[i]&F_PORT) printf("%s\n",u.port); + if(args[i]&F_PATH) printf("%s\n",u.path); + if(args[i]&F_QUERY_STRING) printf("%s\n",u.query_string); + if(args[i]&F_FRAGMENT_ID) printf("%s\n",u.fragment_id); + } + } else { + printf("scheme: %s\n",u.scheme); + printf("username: %s\n",u.username); + printf("password: %s\n",u.password); + printf("domain: %s\n",u.domain); + printf("port: %s\n",u.port); + printf("path: %s\n",u.path); + printf("query_string: %s\n",u.query_string); + printf("fragment_id: %s\n",u.fragment_id); + } + } + if(malloced) { + free(line); + malloced=0; + line=0; + } else { + line=0;//??? + } + } + return 0; +} diff --git a/matchurl.c b/matchurl.c new file mode 100644 index 0000000..aeb4aa4 --- /dev/null +++ b/matchurl.c @@ -0,0 +1,41 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "url.h" + +#define LINE_LENGTH 1024 + +int main(int argc,char *argv[]) { + int i; + struct url u; + char *line=malloc(LINE_LENGTH); + char copy[LINE_LENGTH]; + if(argc < 3) { + printf("usage: matchurl [s|u|k|d|P|p|q|f] [string]\n"); + return 1; + } + while(fgets(line,LINE_LENGTH-1,stdin)) { + if(strchr(line,'\r')) *strchr(line,'\r')=0; + if(strchr(line,'\n')) *strchr(line,'\n')=0; + strcpy(copy,line); + urlfromline(&u,line); + //use the character in argv[1] to match stdin against argv[2]. if match print whole line. + for(i=1;i<argc;i+=2) { + switch(argv[i][0]) { + case 's': if(u.scheme && !strcmp(u.scheme,argv[i+1])) printf("%s\n",copy); + case 'u': if(u.username && !strcmp(u.username,argv[i+1])) printf("%s\n",copy); + case 'k': if(u.password && !strcmp(u.password,argv[i+1])) printf("%s\n",copy); + case 'd': if(u.domain && !strcmp(u.domain,argv[i+1])) printf("%s\n",copy); + case 'P': if(u.port && !strcmp(u.port,argv[i+1])) printf("%s\n",copy); + case 'p': if(u.path && !strcmp(u.path,argv[i+1])) printf("%s\n",copy); + case 'q': if(u.query_string && !strcmp(u.query_string,argv[i+1])) printf("%s\n",copy); + case 'f': if(u.fragment_id && !strcmp(u.fragment_id,argv[i+1])) printf("%s\n",copy); + break; + default: + printf("unknown url part letter! %c\n",argv[i][0]); + return 0; + } + } + } + return 0; +} @@ -0,0 +1,131 @@ +//#include <stdio.h> +#include <string.h> +//#include <stdlib.h> + +/* + schemes are case sensitive but cononicals are lower case. + domain is case insensitive. return it lowercased? + port is optional and in decimal + path + scheme://username:password@domain:port/path?query_string#fragment_id + mailto:username@domain + + optional stuff: + scheme, username, password, port, path, query_string, fragment_id +*/ + +#include "url.h" + +/* +struct url { + char *scheme; + char *username; + char *password; + char *domain; + char *port; + char *path; + char *query_string; + char *fragment_id; +} +*/ + +void urlfromline(struct url *u,char *line) { + int i; + //split at first single / into line and path + for(i=0;line[i];i++) { + if(line[i] == '/' && line[i+1] == '/') { + i++; + continue; + } + if(line[i] == '/') { + line[i]=0; + u->path=line+i+1; + break; + } + } + if(u->path) { + if(strchr(u->path,'?')) { + u->query_string=strchr(u->path,'?'); + *u->query_string=0; + u->query_string++; + } + } + + if(u->query_string) { + if(strchr(u->query_string,'#')) { + u->fragment_id=strchr(u->query_string,'#'); + *u->fragment_id=0; + u->fragment_id++; + } + } + + if(strstr(line,"://")) { + u->scheme=line; + u->domain=strstr(line,"://"); + *u->domain=0; + u->domain+=3; + } else { + u->domain=line; + } + + if(u->domain) { + if(strchr(u->domain,'@')) { + u->username=u->domain; + u->domain=strchr(u->domain,'@'); + *u->domain=0; + u->domain++; + } + } + + if(u->username) { + if(strchr(u->username,':')) { + u->password=strchr(u->username,':'); + *u->password=0; + u->password++; + } + } + + if(u->domain) { + if(strchr(u->domain,']')) {//the end of an IPv6 address + if(strchr(strchr(u->domain,']'),':')) { + u->port=strchr(strchr(u->domain,']'),':'); + if(u->port[1] == '?') {//for magnet links + u->port=0; + } else { + *u->port=0; + u->port++; + } + } + } else { + if(strchr(u->domain,':')) { + u->port=strchr(u->domain,':'); + if(u->port[1] == '?') {//for magnet links + u->port=0; + } else { + *u->port=0; + u->port++; + } + } + } + } + + if(u->domain) {//for magnet links. + if(strchr(u->domain,'?')) { + u->query_string=strchr(u->domain,'?'); + *u->query_string=0; + u->query_string++; + } + } + + if(strchr(u->domain,':') && !strchr(u->domain,']')) {//for scheme:?query_string + u->scheme=u->domain; + *strchr(u->scheme,':')=0; + u->domain=0; + } + + if(!u->scheme && u->username) {//for mailto: + u->scheme=u->username; + u->username=u->password; + u->password=0; + } +} @@ -0,0 +1,15 @@ +#ifndef URL_H +#define URL_H +struct url { + char *scheme; + char *username; + char *password; + char *domain; + char *port; + char *path; + char *query_string; + char *fragment_id; +}; + +void urlfromline(struct url *u,char *line); +#endif |