summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorepochqwert <epoch@hacking.allowed.org>2015-08-22 03:36:15 -0500
committerepochqwert <epoch@hacking.allowed.org>2015-08-22 03:36:15 -0500
commitb7849c483393cf510cbe4bca137ce4520b636685 (patch)
tree28e6410126fac350d69c9f47a7588f7dfd639841
downloaduritools-b7849c483393cf510cbe4bca137ce4520b636685.tar.gz
uritools-b7849c483393cf510cbe4bca137ce4520b636685.zip
initial commit.
-rw-r--r--cuturl.c191
-rw-r--r--matchurl.c41
-rw-r--r--url.c131
-rw-r--r--url.h15
4 files changed, 378 insertions, 0 deletions
diff --git a/cuturl.c b/cuturl.c
new file mode 100644
index 0000000..3f09020
--- /dev/null
+++ b/cuturl.c
@@ -0,0 +1,191 @@
+#include <netdb.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include "url.h"
+
+/*
+ schemes are case sensitive but cononicals are lower case.
+ domain is case insensitive. return it lowercased?
+ port is optional and in decimal
+ path
+ scheme://username:password@domain:port/path?query_string#fragment_id
+ mailto:username@domain
+
+ optional stuff:
+ scheme, username, password, port, path, query_string, fragment_id
+*/
+
+#define AorB(a,b) ((a)?(a):(b))
+
+#define F_SCHEME 1<<0
+#define F_USERNAME 1<<1
+#define F_PASSWORD 1<<2
+#define F_DOMAIN 1<<3
+#define F_PORT 1<<4
+#define F_PATH 1<<5
+#define F_QUERY_STRING 1<<6
+#define F_FRAGMENT_ID 1<<7
+
+char *long_opts[]={"scheme","username","password","domain","port","path","query_string","fragment_id",0};
+char *short_opts[]={"s","u","k","d","P","p","q","f"};
+
+int main(int argc,char *argv[]) {
+ char *name[2];
+ char *line=0;
+ char sport[10];
+ char args[256];
+ struct servent *serv;
+ int i,j,c=0;
+ int size=1024;
+ int status;
+ char fixme=0;
+ char using_stdin=1;
+ char malloced=0;
+ struct url u;
+ if(argc > 1) {
+ if(!strcmp(argv[1],"--help") || !strcmp(argv[1],"-h")) {
+ printf("usage: echo urls | cuturl [options]\n");
+ printf("usage: cuturl [options] url [options] [url]\n\n");
+ printf("options: \n");
+ for(i=0;long_opts[i];i++) {
+ printf(" -%s|--%s\n",short_opts[i],long_opts[i]);
+ }
+ printf("To set default values use environment variables like: CUTURL_[OPTION]\n");
+ return 2;
+ }
+ }
+ argv++;
+ argc--;
+ while(1) {
+ u.scheme=0;
+ u.username=0;
+ u.password=0;
+ u.domain=0;
+ u.port=0;
+ u.path=0;
+ u.query_string=0;
+ u.fragment_id=0;
+ if(!using_stdin) c=0;
+ if(argc >= 1) {
+ for(;argc>0;argc--,argv++) {
+ for(i=0;long_opts[i];i++) {
+ if(!strncmp(*argv,"--",2)) {
+ if(!strcmp(*argv+2,long_opts[i])) {
+ args[c]=1<<i;
+ c++;
+ break;
+ }
+ }
+ }
+ fixme=0;
+ if(**argv=='-' && argv[0][1] != '-') {
+ for(j=1;argv[0][j];j++) {
+ for(i=0;short_opts[i];i++) {
+ if(argv[0][j]==*short_opts[i]) {
+ args[c]=1<<i;
+ c++;
+ fixme=1;
+ }
+ }
+ }
+ }
+ if(fixme) continue;
+ if(long_opts[i]) continue;
+ //if we get here we are at data instead of flags. work on it.
+ line=*argv;
+ using_stdin=0;
+ argc--;
+ argv++;
+ break;
+ }
+ }
+ if(!argc && !line) {//if we are out of arguments and it didn't include data
+ using_stdin=1;
+ }
+ if(using_stdin) {
+ line=malloc(size+1);
+ malloced=1;
+ if(!fgets(line,size,stdin)) {
+ return 0;
+ }
+ }
+ if(!line) return 0;
+ for(i=0;line[i] && line[i] != '\n' && line[i] != '\r';i++);
+ line[i]=0;
+
+ urlfromline(&u,line);
+
+ // printf("scheme://username:password@domain:port/path?query_string#fragment_id\n\n");
+ //let's set them to what'll get printed now...
+
+ u.scheme=AorB(u.scheme,AorB(getenv("CUTURL_SCHEME"),"DEFAULT"));
+ u.username=AorB(u.username,AorB(getenv("CUTURL_USERNAME"),"DEFAULT"));
+ u.password=AorB(u.password,AorB(getenv("CUTURL_PASSWORD"),"DEFAULT"));
+ u.domain=AorB(u.domain,AorB(getenv("CURURL_DOMAIN"),"DEFAULT"));
+ serv=getservbyname(u.scheme,strcmp(u.scheme,"udp")?"tcp":"udp");//gets default port for the scheme. http -> 80
+ if(serv) snprintf(sport,sizeof(sport)-1,"%d",ntohs(serv->s_port));
+ u.port=AorB(u.port,AorB(getenv("CUTURL_PORT"),(serv?sport:"DEFAULT")));
+ u.path=AorB(u.path,AorB(getenv("CUTURL_PATH"),"DEFAULT"));
+ u.query_string=AorB(u.query_string,AorB(getenv("CUTURL_QUERY_STRING"),"DEFAULT"));
+ u.fragment_id=AorB(u.fragment_id,AorB(getenv("CUTURL_FRAGMENT_ID"),"DEFAULT"));
+
+ if((name[0]=getenv("CUTURL__"))) {
+ setenv("CUTURL__SCHEME",u.scheme,1);
+ setenv("CUTURL__USERNAME",u.username,1);
+ setenv("CUTURL__PASSWORD",u.password,1);
+ setenv("CUTURL__DOMAIN",u.domain,1);
+ setenv("CUTURL__PORT",u.port,1);
+ setenv("CUTURL__PATH",u.path,1);
+ setenv("CUTURL__QUERY_STRING",u.query_string,1);
+ setenv("CUTURL__FRAGMENT_ID",u.fragment_id,1);
+ name[1]=0;
+ switch(fork()) {
+ case 0:
+ execv(name[0],name);
+ perror("execv");
+ return errno;
+ case -1:
+ perror("fork");
+ return errno;
+ default:
+ break;
+ }
+ wait(&status);
+ } else {
+ if(c) {
+ for(i=0;i<c;i++) {
+ if(args[i]&F_SCHEME) printf("%s\n",u.scheme);
+ if(args[i]&F_USERNAME) printf("%s\n",u.username);
+ if(args[i]&F_PASSWORD) printf("%s\n",u.password);
+ if(args[i]&F_DOMAIN) printf("%s\n",u.domain);
+ if(args[i]&F_PORT) printf("%s\n",u.port);
+ if(args[i]&F_PATH) printf("%s\n",u.path);
+ if(args[i]&F_QUERY_STRING) printf("%s\n",u.query_string);
+ if(args[i]&F_FRAGMENT_ID) printf("%s\n",u.fragment_id);
+ }
+ } else {
+ printf("scheme: %s\n",u.scheme);
+ printf("username: %s\n",u.username);
+ printf("password: %s\n",u.password);
+ printf("domain: %s\n",u.domain);
+ printf("port: %s\n",u.port);
+ printf("path: %s\n",u.path);
+ printf("query_string: %s\n",u.query_string);
+ printf("fragment_id: %s\n",u.fragment_id);
+ }
+ }
+ if(malloced) {
+ free(line);
+ malloced=0;
+ line=0;
+ } else {
+ line=0;//???
+ }
+ }
+ return 0;
+}
diff --git a/matchurl.c b/matchurl.c
new file mode 100644
index 0000000..aeb4aa4
--- /dev/null
+++ b/matchurl.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "url.h"
+
+#define LINE_LENGTH 1024
+
+int main(int argc,char *argv[]) {
+ int i;
+ struct url u;
+ char *line=malloc(LINE_LENGTH);
+ char copy[LINE_LENGTH];
+ if(argc < 3) {
+ printf("usage: matchurl [s|u|k|d|P|p|q|f] [string]\n");
+ return 1;
+ }
+ while(fgets(line,LINE_LENGTH-1,stdin)) {
+ if(strchr(line,'\r')) *strchr(line,'\r')=0;
+ if(strchr(line,'\n')) *strchr(line,'\n')=0;
+ strcpy(copy,line);
+ urlfromline(&u,line);
+ //use the character in argv[1] to match stdin against argv[2]. if match print whole line.
+ for(i=1;i<argc;i+=2) {
+ switch(argv[i][0]) {
+ case 's': if(u.scheme && !strcmp(u.scheme,argv[i+1])) printf("%s\n",copy);
+ case 'u': if(u.username && !strcmp(u.username,argv[i+1])) printf("%s\n",copy);
+ case 'k': if(u.password && !strcmp(u.password,argv[i+1])) printf("%s\n",copy);
+ case 'd': if(u.domain && !strcmp(u.domain,argv[i+1])) printf("%s\n",copy);
+ case 'P': if(u.port && !strcmp(u.port,argv[i+1])) printf("%s\n",copy);
+ case 'p': if(u.path && !strcmp(u.path,argv[i+1])) printf("%s\n",copy);
+ case 'q': if(u.query_string && !strcmp(u.query_string,argv[i+1])) printf("%s\n",copy);
+ case 'f': if(u.fragment_id && !strcmp(u.fragment_id,argv[i+1])) printf("%s\n",copy);
+ break;
+ default:
+ printf("unknown url part letter! %c\n",argv[i][0]);
+ return 0;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/url.c b/url.c
new file mode 100644
index 0000000..3fd2dae
--- /dev/null
+++ b/url.c
@@ -0,0 +1,131 @@
+//#include <stdio.h>
+#include <string.h>
+//#include <stdlib.h>
+
+/*
+ schemes are case sensitive but cononicals are lower case.
+ domain is case insensitive. return it lowercased?
+ port is optional and in decimal
+ path
+ scheme://username:password@domain:port/path?query_string#fragment_id
+ mailto:username@domain
+
+ optional stuff:
+ scheme, username, password, port, path, query_string, fragment_id
+*/
+
+#include "url.h"
+
+/*
+struct url {
+ char *scheme;
+ char *username;
+ char *password;
+ char *domain;
+ char *port;
+ char *path;
+ char *query_string;
+ char *fragment_id;
+}
+*/
+
+void urlfromline(struct url *u,char *line) {
+ int i;
+ //split at first single / into line and path
+ for(i=0;line[i];i++) {
+ if(line[i] == '/' && line[i+1] == '/') {
+ i++;
+ continue;
+ }
+ if(line[i] == '/') {
+ line[i]=0;
+ u->path=line+i+1;
+ break;
+ }
+ }
+ if(u->path) {
+ if(strchr(u->path,'?')) {
+ u->query_string=strchr(u->path,'?');
+ *u->query_string=0;
+ u->query_string++;
+ }
+ }
+
+ if(u->query_string) {
+ if(strchr(u->query_string,'#')) {
+ u->fragment_id=strchr(u->query_string,'#');
+ *u->fragment_id=0;
+ u->fragment_id++;
+ }
+ }
+
+ if(strstr(line,"://")) {
+ u->scheme=line;
+ u->domain=strstr(line,"://");
+ *u->domain=0;
+ u->domain+=3;
+ } else {
+ u->domain=line;
+ }
+
+ if(u->domain) {
+ if(strchr(u->domain,'@')) {
+ u->username=u->domain;
+ u->domain=strchr(u->domain,'@');
+ *u->domain=0;
+ u->domain++;
+ }
+ }
+
+ if(u->username) {
+ if(strchr(u->username,':')) {
+ u->password=strchr(u->username,':');
+ *u->password=0;
+ u->password++;
+ }
+ }
+
+ if(u->domain) {
+ if(strchr(u->domain,']')) {//the end of an IPv6 address
+ if(strchr(strchr(u->domain,']'),':')) {
+ u->port=strchr(strchr(u->domain,']'),':');
+ if(u->port[1] == '?') {//for magnet links
+ u->port=0;
+ } else {
+ *u->port=0;
+ u->port++;
+ }
+ }
+ } else {
+ if(strchr(u->domain,':')) {
+ u->port=strchr(u->domain,':');
+ if(u->port[1] == '?') {//for magnet links
+ u->port=0;
+ } else {
+ *u->port=0;
+ u->port++;
+ }
+ }
+ }
+ }
+
+ if(u->domain) {//for magnet links.
+ if(strchr(u->domain,'?')) {
+ u->query_string=strchr(u->domain,'?');
+ *u->query_string=0;
+ u->query_string++;
+ }
+ }
+
+ if(strchr(u->domain,':') && !strchr(u->domain,']')) {//for scheme:?query_string
+ u->scheme=u->domain;
+ *strchr(u->scheme,':')=0;
+ u->domain=0;
+ }
+
+ if(!u->scheme && u->username) {//for mailto:
+ u->scheme=u->username;
+ u->username=u->password;
+ u->password=0;
+ }
+}
diff --git a/url.h b/url.h
new file mode 100644
index 0000000..5e9bea8
--- /dev/null
+++ b/url.h
@@ -0,0 +1,15 @@
+#ifndef URL_H
+#define URL_H
+struct url {
+ char *scheme;
+ char *username;
+ char *password;
+ char *domain;
+ char *port;
+ char *path;
+ char *query_string;
+ char *fragment_id;
+};
+
+void urlfromline(struct url *u,char *line);
+#endif