[Toybox] Implement wget
Lipi C. H. Lee
lipisoft at gmail.com
Thu Feb 25 00:28:20 PST 2016
Thanks for your comment, Felix.
I am sorry my response is so late.
As your comments, I modified some code.
wget: clean up
- shorten error messages
- replace mk_rq with sprintf
- remove struct and defines
- change unsigned int to unsigned
On Fri, Feb 19, 2016 at 8:44 PM, Felix Janda <felix.janda at posteo.de> wrote:
> Lipi C. H. Lee wrote:
> > implement simple 'wget' and port name can be specified in URL if default
> > port 80 is not used.
> > It may be added to toys/pending directory.
>
> Thanks for your submission!
>
> Some comments below.
>
> In general, try simplifying the error messages:
>
>
> http://lists.landley.net/pipermail/toybox-landley.net/2013-April/000850.html
>
>
I changed them shortly.
> Also, generally functions are only added when they can be called at
> least twice. See
>
>
> http://lists.landley.net/pipermail/toybox-landley.net/2013-April/000919.html
>
>
I will check it later.
> (URLs taken from http://landley.net/toybox/cleanup.html)
>
> > /* wget.c - Simple downloader to get the resource file in HTTP server
> > *
> > * Copyright 2016 Lipi C.H. Lee <lipisoft at gmail.com>
> > *
> >
> > USE_WGET(NEWTOY(wget, "f:", TOYFLAG_USR|TOYFLAG_BIN))
> >
> > config WGET
> > bool "wget"
> > default n
> > help
> > usage: wget -f filename URL
> > -f filename: specify the filename to be saved
> > URL: HTTP uniform resource location and only HTTP, not HTTPS
> >
> > examples:
> > wget -f index.html http://www.example.com
> > wget -f sample.jpg http://www.example.com:8080/sample.jpg
> > */
> >
> > #define FOR_wget
> > #include "toys.h"
> >
> > GLOBALS(
> > char *filename;
> > )
> >
> > #define HN_LEN 128 // HOSTNAME MAX LENGTH
> > #define PATH_LEN 256 // PATH MAX LENGTH
> >
> > struct httpinfo {
> > char hostname[HN_LEN];
> > char port[6]; // MAX port value: 65535
> > char path[PATH_LEN];
> > };
>
> In the code base, global defines are usually avoided. If the functions
> were inlined, this struct would actually be unecessary and HN_LEN
> could be sizeof hostname.
>
>
I removed defines and struct.
>
> > // extract hostname from url
> > static unsigned int get_hn(char *url, char *hn) {
> > unsigned int i;
> >
> > for (i = 0; url[i] != '\0' && url[i] != ':' && url[i] != '/'; i++) {
> > if(i >= HN_LEN)
> > error_exit("The hostname's length is lower than %d.", HN_LEN);
>
> You mean "larger"
>
I shortened it.
>
> > hn[i] = url[i];
> > }
> > hn[i] = '\0';
> >
> > return i;
> > }
> >
> > // extract port number
> > static void get_port(char *url, char *port, unsigned int *url_i) {
> > unsigned int i;
> >
> > for (i = 0; url[i] != '\0' && url[i] != '/'; i++, (*url_i)++) {
> > if('0' <= url[i] && url[i] <= '9') port[i] = url[i];
> > else error_exit("Port is invalid");
> > }
> > if(i <= 6) port[i] = '\0';
> > else error_exit("Port number is too long");
> > }
> >
> > // get http infos in URL
> > static void get_info(struct httpinfo *hi, char *url) {
> > unsigned int i = 7, len;
> >
> > if (strncmp(url, "http://", i)) error_exit("Only HTTP can be
> supported.");
> > len = get_hn(url+i, hi->hostname);
> > i += len;
> >
> > // get port if exists
> > if (url[i] == ':') {
> > i++;
> > get_port(url+i, hi->port, &i);
> > } else strcpy(hi->port, "80");
> >
> > // get uri in URL
> > if (url[i] == '\0') strcpy(hi->path, "/");
> > else if (url[i] == '/') {
> > if (strlen(url+i) < PATH_LEN) strcpy(hi->path, url+i);
> > else error_exit("The URL path's length is less than %d.", PATH_LEN);
> > } else error_exit("The URL is NOT valid.");
> > }
> >
> > // connect to any IPv4 or IPv6 server
> > static int conn_svr(const char *hostname, const char *port) {
> > struct addrinfo hints, *result, *rp;
> > int sock;
> >
> > memset(&hints, 0, sizeof(struct addrinfo));
> > hints.ai_family = AF_UNSPEC;
> > hints.ai_socktype = SOCK_STREAM;
> > hints.ai_flags = 0;
> > hints.ai_protocol = 0;
> >
> > if ((errno = getaddrinfo(hostname, port, &hints, &result)))
> > error_exit("getaddrinfo: %s", gai_strerror(errno));
> >
> > // try all address list(IPv4 or IPv6) until success
> > for (rp = result; rp; rp = rp->ai_next) {
> > if ((sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol))
> > == -1) {
> > perror_msg("Socket Error");
> > continue;
> > }
> > if (connect(sock, rp->ai_addr, rp->ai_addrlen) != -1)
> > break; // succeed in connecting to any server IP
> > else perror_msg("Connect Error");
> > close(sock);
> > }
> > freeaddrinfo(result);
> > if(!rp) error_exit("Can not connect to HTTP server");
> >
> > return sock;
> > }
> >
> > // make HTTP request header field
> > static void mk_fld(char *name, char *value) {
> > strcat(toybuf, name);
> > strcat(toybuf, ": ");
> > strcat(toybuf, value);
> > strcat(toybuf, "\r\n");
> > }
> >
> > // make http request
> > static void mk_rq(char *path) {
> > strcpy(toybuf, "GET ");
> > strcat(toybuf, path);
> > strcat(toybuf, " HTTP/1.1\r\n");
> > }
>
> Why not sprintf() for these?
>
> I replaced mk_rq to sprintf.
> >
> > // get http response body starting address and its length
> > static char *get_body(size_t len, size_t *body_len) {
> > unsigned int i;
> >
> > for (i = 0; i < len-4; i++)
> > if (!strncmp(toybuf+i, "\r\n\r\n", 4)) break;
> >
> > *body_len = len-i-4;
> > return toybuf+i+4;
> > }
>
> Why not strstr()?
>
I need not only http body's address but also its length in toybuf.
> >
> > void wget_main(void)
> > {
> > int sock;
> > struct httpinfo hi;
> > FILE *fp;
> > size_t len, body_len;
> > char *body, *result, *rc, *r_str, ua[18] = "toybox wget/", ver[6];
> >
> > // TODO extract filename to be saved from URL
> > if (!(toys.optflags & FLAG_f))
> > help_exit("The filename to be saved should be needed.");
> > if (fopen(TT.filename, "r"))
> > error_exit("The file(%s) you specified already exists.",
> TT.filename);
>
> fopen can fail for other reasons. Why not use perror_exit?
>
I did.
>
> >
> > if(!toys.optargs[0]) help_exit("The URL should be specified.");
> > get_info(&hi, toys.optargs[0]);
> >
> > sock = conn_svr(hi.hostname, hi.port);
> >
> > // compose HTTP request
> > mk_rq(hi.path);
> > mk_fld("Host", hi.hostname);
> > strncpy(ver, TOYBOX_VERSION, 5);
> > strcat(ua, ver);
> > mk_fld("User-Agent", ua);
> > mk_fld("Connection", "close");
> > strcat(toybuf, "\r\n");
> >
> > // send the HTTP request
> > len = strlen(toybuf);
> > if (write(sock, toybuf, len) != len) perror_exit("HTTP GET failed.");
>
> This gives something like: "HTTP GET failed.: Broken pipe"
>
> I shortened it.
> >
> > // read HTTP response
> > if ((len = read(sock, toybuf, 4096)) == -1)
> > perror_exit("HTTP response failed.");
> > if (!strstr(toybuf, "\r\n\r\n"))
> > error_exit("HTTP response header is too long.");
> > body = get_body(len, &body_len);
> > result = strtok(toybuf, "\r");
> > strtok(result, " ");
> > rc = strtok(NULL, " ");
> > r_str = strtok(NULL, " ");
> >
> > // HTTP res code check
> > // TODO handle HTTP 302 Found(Redirection)
> > if (strcmp(rc, "200")) error_exit("HTTP response: %s(%s).", rc, r_str);
> >
> > if (!(fp = fopen(TT.filename, "w"))) perror_exit("File write error");
> > if (fwrite(body, sizeof(char), body_len, fp) != body_len)
>
> sizeof(char)=1
>
I did it.
>
> > error_exit("File write is not successful.");
> > while ((len = read(sock, toybuf, 4096)) > 0)
>
> len is unsigned. So the case when read returns -1 is not correctly handled.
>
I changed it to ssize_t type.
>
> > if (fwrite(toybuf, sizeof(char), len, fp) != len)
> > error_exit("File write is not successful.");
> > if (fclose(fp) == EOF) perror_exit("File Close Error");
> > }
>
>
> --Felix
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.landley.net/pipermail/toybox-landley.net/attachments/20160225/eced415f/attachment-0002.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-wget_cleanup.patch
Type: application/octet-stream
Size: 6851 bytes
Desc: not available
URL: <http://lists.landley.net/pipermail/toybox-landley.net/attachments/20160225/eced415f/attachment-0002.obj>
More information about the Toybox
mailing list