470,849 Members | 1,634 Online
Bytes | Developer Community
New Post

Home Posts Topics Members FAQ

Post your question to a community of 470,849 developers. It's quick & easy.

Can this conversion code be simplified?

Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);

printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

for(; start <= end; ++start)
out[out_index++] = s[start];

out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

start_index += 4;
end_index += 4;

ip[i] = strtol(sub, NULL, 16);
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1
But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

/ E

Apr 8 '06 #1
8 1512
Eric Lilja wrote:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. [...]


Consider using sscanf():

unsigned int b0, b1, b2, b3;
if (sscanf(string, "\\x%x\\x%x\\x%x\\x%x",
&b0, &b1, &b2, &b3) == 4) ...

I say "consider" rather than "use" because sscanf() is
fairly tolerant in what it will accept as a number, and
it does not insist on converting the entire input string.
For example, the above format would accept all of

"\\x1\\x23\\x456\\x789A"
"\\x a\\x 11\\x 8c\\x -1"
"\\x0A\\x11\\x8C\\x01,Drink Billy Beer"

If you are concerned about detecting and rejecting such
departures from the expected format, sscanf() may not be
the right tool. If not, though, its ease of use may make
it the weapon of choice.

--
Eric Sosman
es*****@acm-dot-org.invalid
Apr 8 '06 #2
Eric Lilja wrote:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);

printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

for(; start <= end; ++start)
out[out_index++] = s[start];

out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

start_index += 4;
end_index += 4;

ip[i] = strtol(sub, NULL, 16);
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1
But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

/ E

I don't know yet. When I compile the code you posted I get..

s2ip.c:5: parse error before "u_short"
s2ip.c:19: parse error before "u_short"
s2ip.c: In function `extract_substring':
s2ip.c:21: `u_short' undeclared (first use in this function)
s2ip.c:21: (Each undeclared identifier is reported only once
s2ip.c:21: for each function it appears in.)
s2ip.c:21: parse error before "out_index"
s2ip.c:22: `start' undeclared (first use in this function)
s2ip.c:22: `end' undeclared (first use in this function)
s2ip.c:23: `out' undeclared (first use in this function)
s2ip.c:23: `out_index' undeclared (first use in this function)
s2ip.c:23: `s' undeclared (first use in this function)
s2ip.c: In function `to_ip':
s2ip.c:32: `u_short' undeclared (first use in this function)
s2ip.c:32: parse error before "start_index"
s2ip.c:35: `i' undeclared (first use in this function)
s2ip.c:36: `start_index' undeclared (first use in this function)
s2ip.c:36: `end_index' undeclared (first use in this function)

Which puts me off a bit.

--
Joe Wright
"Everything should be made as simple as possible, but not simpler."
--- Albert Einstein ---
Apr 8 '06 #3

Joe Wright wrote:
Eric Lilja wrote:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);

printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

for(; start <= end; ++start)
out[out_index++] = s[start];

out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

start_index += 4;
end_index += 4;

ip[i] = strtol(sub, NULL, 16);
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1
But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

/ E
I don't know yet. When I compile the code you posted I get..

s2ip.c:5: parse error before "u_short"
s2ip.c:19: parse error before "u_short"
s2ip.c: In function `extract_substring':
s2ip.c:21: `u_short' undeclared (first use in this function)
s2ip.c:21: (Each undeclared identifier is reported only once
s2ip.c:21: for each function it appears in.)
s2ip.c:21: parse error before "out_index"
s2ip.c:22: `start' undeclared (first use in this function)
s2ip.c:22: `end' undeclared (first use in this function)
s2ip.c:23: `out' undeclared (first use in this function)
s2ip.c:23: `out_index' undeclared (first use in this function)
s2ip.c:23: `s' undeclared (first use in this function)
s2ip.c: In function `to_ip':
s2ip.c:32: `u_short' undeclared (first use in this function)
s2ip.c:32: parse error before "start_index"
s2ip.c:35: `i' undeclared (first use in this function)
s2ip.c:36: `start_index' undeclared (first use in this function)
s2ip.c:36: `end_index' undeclared (first use in this function)

Which puts me off a bit.


Hmm, I'm compiling using gcc with the flags -ansi -pedantic and I
thought that would turn off any compiler extension. Anyway, u_short is
simply a typedef for unsigned short int.

--
Joe Wright
"Everything should be made as simple as possible, but not simpler."
--- Albert Einstein ---


/ E

Apr 8 '06 #4

Eric Sosman wrote:
Eric Lilja wrote:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. [...]
Consider using sscanf():

unsigned int b0, b1, b2, b3;
if (sscanf(string, "\\x%x\\x%x\\x%x\\x%x",
&b0, &b1, &b2, &b3) == 4) ...

I say "consider" rather than "use" because sscanf() is
fairly tolerant in what it will accept as a number, and
it does not insist on converting the entire input string.
For example, the above format would accept all of

"\\x1\\x23\\x456\\x789A"
"\\x a\\x 11\\x 8c\\x -1"
"\\x0A\\x11\\x8C\\x01,Drink Billy Beer"

If you are concerned about detecting and rejecting such
departures from the expected format, sscanf() may not be
the right tool. If not, though, its ease of use may make
it the weapon of choice.


Thanks for the reply. sscanf() would sure make for a much shorter
program, but maybe a bit too fragile. Given my test program, do you see
any immediate improvements I could do it, while keeping the same
approach so to speak?

--
Eric Sosman
es*****@acm-dot-org.invalid


/ E

Apr 8 '06 #5
Eric Lilja wrote:
Eric Sosman wrote:
Eric Lilja wrote:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. [...]


Consider using sscanf(): [...]


Thanks for the reply. sscanf() would sure make for a much shorter
program, but maybe a bit too fragile. Given my test program, do you see
any immediate improvements I could do it, while keeping the same
approach so to speak?


Please don't take the criticism personally, but your
code as it stands is already rather fragile, certainly no
paragon of robustness. It plucks and converts the target
substrings without checking what's between them, without
checking whether they're of the expected length, and without
even checking whether they're valid hexadecimal numbers!
Consider what your code would do with the strings

"\\x0A$y-4999999999999999999"
"abcdefghijklmnopqrstuvwxyz"
""

Since your code had nothing to detect or reject such departures
from the expected format, it seemed to me you might be confident
that the input would always be valid and that sscanf()'s "format
enforcement," limited as it is, would be more error-checking
than you started with.

If you want to extract the values "by hand" -- which gives
you more control of the format, if you choose to exercise it --
I'd suggest you check for the presence of each '\\' and 'x'
and check that the next two characters are in fact hexadecimal
digits (use isxdigit() from <ctype.h>). Most of all, don't
just assume that the string has the expected length, lest you
walk blindly off the end and into No Man's Land.

--
Eric Sosman
es*****@acm-dot-org.invalid
Apr 8 '06 #6
Eric Lilja wrote:
.... snip code and error listings ...
Hmm, I'm compiling using gcc with the flags -ansi -pedantic and
I thought that would turn off any compiler extension. Anyway,
u_short is simply a typedef for unsigned short int.


No it isn't. If it were there would be aline such as:

typedef unsigned short int u_short;

somewhere in your code. There isn't.

--
"If you want to post a followup via groups.google.com, don't use
the broken "Reply" link at the bottom of the article. Click on
"show options" at the top of the article, then click on the
"Reply" at the bottom of the article headers." - Keith Thompson
More details at: <http://cfaj.freeshell.org/google/>
Also see <http://www.safalra.com/special/googlegroupsreply/>
Apr 9 '06 #7

Eric Lilja wrote:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip); you can try this looping as well instead of to_ip function call:
----
const char *pc=ip_as_string;
register i=2;

while(i<16){

ip[i/4] = (*(pc+i)-(*(pc+i) < 'A' ?'0':'A'-10)) * 16
+ (*(pc+i+1) - (*(pc+i+1) < 'A'
?'0':'A'-10));
i+=4;
}
----
printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

for(; start <= end; ++start)
out[out_index++] = s[start];

out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

start_index += 4;
end_index += 4;

ip[i] = strtol(sub, NULL, 16);
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1
But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

/ E


Apr 9 '06 #8
Eric Lilja schrieb:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);
Apart from the missing typedef for u_short, prototypes without
parameter names make it hard to see which roles the parameters
have. In addition, some compilers warn if there are mismatches
in the parameter names of different prototypes (including the
function definition) -- this can be very useful if the meaning
of a parameter changes but not all uses have been adapted.
int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);
I'd rather have a consistent naming scheme:
extract_substring() <-> convert_string_to_ip()

Just stopping here and having a look:
- There is no way to tell whether to_ip() succeeded
- to_ip() has no way to tell whether the storage ip points to
is large enough
- I cannot tell here whether ip[x] will be be guaranteedly
initialised, so the following printf() may invoke undefined
behaviour.
printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;
If you are using unsigned integer indices, then use the "natural"
type: size_t.
for(; start <= end; ++start)
out[out_index++] = s[start];
You do not respect string terminators here.
In fact, strncpy() may be for once a much better choice then this
loop.
out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);
By restricting yourself to this substring by ignoring the '\\',
you effectively do not gain anything. Then you can omit all
checking and use start_index = 2 (6, 10, 14) instead.
start_index += 4;
end_index += 4;

ip[i] = strtol(sub, NULL, 16);
You do not use the error checking capabilities of strtol() --
then you can as well use sscanf() and get rid of the dangerous
extract_substring() and the loop. }
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1
But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.


Thank you for stating that :-)
Eric Sosman already gave you good advice in
<TZ********************@comcast.com>, so I just will give you
a version that will work but make the teacher suspicious, i.e.
there still remains something to do ;-)

,----
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum {
E_ConvNoConversion = -1,
E_ConvHexOk = 0,
E_ConvRangeErr,
E_ConvFormatErr
};
static int convertToIP (const char *pIPAsString, int *pIP);
static int convertHexStringToHexValues (const char *pHexString,
int *pHexValArray,
size_t number);
static int isHexValOctett (unsigned long HexVal);
static int retrieveHexVal(const char *pHexString,
size_t *pIndex,
unsigned long *pHexVal);

int
main (void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";
int ip[4] = {0};
int ret;

ret = convertToIP(ip_as_string, ip);

if (ret != E_ConvHexOk) {
fprintf(stderr, "Conversion not successful (%d): %s\n",
ret, ip_as_string);
exit(EXIT_FAILURE);
}

printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static int
convertToIP (const char *pIPAsString, int *pIP)
{
return convertHexStringToHexValues(pIPAsString, pIP, 4);
}

static int
convertHexStringToHexValues (const char *pHexString,
int *pHexValArray,
size_t number)
{
size_t i;
size_t index = 0;
int ret = E_ConvNoConversion;
unsigned long HexVal;

for (i = 0; i < number; i++) {
if (E_ConvHexOk != (ret = retrieveHexVal(pHexString,
&index,
&HexVal))
)
break;
if (!isHexValOctett(HexVal)) {
ret = E_ConvRangeErr;
break;
}
pHexValArray[i] = HexVal;
}

return ret;
}

static int isHexValOctett (unsigned long HexVal)
{
return HexVal < 256U;
}

static int retrieveHexVal(const char *pHexString,
size_t *pIndex,
unsigned long *pHexVal)
{
static const char startSequence[] = "\\x";
static const char allowedTermCharacters[] = {'\\', '\0'};
static const size_t startLen = sizeof startSequence - 1;
const char * substring = pHexString + *pIndex;
char *pEnd = NULL;
size_t index = 0;

if (0 != strncmp(substring, startSequence, startLen))
return E_ConvFormatErr;

index += startLen;
substring += index;

*pHexVal = strtoul(substring, &pEnd, 16);
if (pEnd == substring
|| !strchr(allowedTermCharacters, *pEnd)
)
return E_ConvFormatErr;

/*
if ((pEnd - substring) != 2)
return E_ConvFormatErr;
*/

index += pEnd - substring;

*pIndex += index;

return E_ConvHexOk;
}
`----

Note the difference in behaviour if you change the check
in retrieveHexVal() to the version in the comment; have
a look at "\\x0A\\x11\\x8C\\x01\\" and
"\\x0AB\\x11\\x8C\\x01", respectively.
Cheers
Michael
--
E-Mail: Mine is an /at/ gmx /dot/ de address.
Apr 9 '06 #9

This discussion thread is closed

Replies have been disabled for this discussion.

Similar topics

6 posts views Thread by New MSSQL DBA | last post: by
5 posts views Thread by Gord | last post: by
31 posts views Thread by Bjørn Augestad | last post: by
10 posts views Thread by junky_fellow | last post: by
5 posts views Thread by John J. Hughes II | last post: by
8 posts views Thread by Tom Smith | last post: by
3 posts views Thread by Philipp Brune | last post: by
By using this site, you agree to our Privacy Policy and Terms of Use.