473,385 Members | 1,720 Online
Bytes | Software Development & Data Engineering Community
Post Job

Home Posts Topics Members FAQ

Join Bytes to post your question to a community of 473,385 software developers and data experts.

XML Parser - check Starttag

Hi,
iīm trying to build a xml - parser, which should simply list all used
tokens an dattributes including their values. So far, so good, this
works, but now i try to check for illegal phrases in the source
document regarding starttags. here is my parser so far:

tokenlibrary.cpp:

#include <iostream>
using namespace std;
#include <fstream>
#include <string>
#include "token7.h"

ClToken::ClToken()
{
*tokenName='\0';
tokenChild=NULL;
tokenSibling=NULL;
tokenInhalt=new char[1];
*tokenInhalt='\0';
}

int ClToken::getToken(
ifstream &datei, int ebene)
{
int zaehler;
enum zustand zustand;
char zeichen;
char puffer[100];
ClToken *child;

cleanToken();

for (zaehler=0;;)
{
datei.get(zeichen);
if (datei.eof())
{
if (*tokenName == '\0' && tokenChild == NULL && tokenInhalt ==
NULL)
return fillToken(0);
return fillToken(1);
}
switch(zeichen)
{
case '<':
datei.get(zeichen);
if (zeichen=='/')
{
zustand = istEndTag;
if (zaehler!=0)
{
puffer[zaehler]='\0';
tokenInhalt = new char[zaehler+1];
strcpy(tokenInhalt,puffer);
}
}
else
{
datei.putback(zeichen);
if (*tokenName!='\0')
{
datei.putback('<');
if (tokenChild==NULL)
{
ebene++;
tokenChild=new ClToken;
tokenChild->getToken(datei,ebene);
}
else
{
for (child=tokenChild;;child=child->tokenSibling)
{
if (child->tokenSibling==NULL)
{
child->tokenSibling=new ClToken;
//child->tokenSibling->ebene = ebene;
child->tokenSibling->getToken(datei, ebene);
break;
}
}
}
}
else zustand=istStartTag;

}
zaehler=0;
break;
case '>':
puffer[zaehler]='\0';
if (zustand==istEndTag)
{
//cout << "Ausgabe Ebene in getToken: " << this->ebene << "+" <<
this->tokenName << endl;
strcpy(endtagName, puffer);
checkEndtag();
return fillToken(1);
}
if (zustand==istStartTag)
{
att.getAttList(puffer);

//cout <<"TokenName in der Abfrage: " << this->tokenName <<
endl;

strcpy(tokenName,puffer);
checkStarttag();
//cout << "tokenname in switch: " << tokenName<<endl;
}
zaehler=0;
break;
case '\n':
break;
default:
puffer[zaehler]=zeichen;
zaehler++;
break;
}
}
}

int ClToken::fillToken(
int mode)
{
if (*tokenName=='\0')
strcpy(tokenName,"Unbekanntes Element");
if (tokenInhalt==NULL)
{
tokenInhalt=new char[1];
*tokenInhalt='\0';
}

return mode;
}

void ClToken::cleanToken(void)
{
*tokenName='\0';
if (tokenChild!=NULL)
{
delete tokenChild;
tokenChild=NULL;
}
if (tokenInhalt!=NULL)
{
delete tokenInhalt;
tokenInhalt=NULL;
}
}

void ClToken::druckeToken(
int level)
{
druckeTokenEbene(level);
cout << "Token: " << name() << " - " << inhalt() << endl;
if (att.zahlAtt() > 0)
{
for (int i=0;i<att.zahlAtt();i++)
{
druckeTokenEbene(level);
cout << "Attribut " << att.zeigeAttName(i) << " hat den Wert "
<< att.zeigeAttWert(i) << endl;
}
}
if (tokenChild!=NULL) tokenChild->druckeToken(level+1);
if (tokenSibling!=NULL) tokenSibling->druckeToken(level);
}

void ClToken::druckeTokenEbene(
int level)
{
while (level > 0)
{
cout << "| ";
level = level - 1;
}
}

void ClToken::checkEndtag()
{
if ( *tokenName != *endtagName)
{

cout << "fehlendes Endtag: " << tokenName << endl;
}

}

int ClToken::Init(ifstream &datei)
{
ebene = 1;
//cout << "tokenadresse in init: " << &token << endl;
return getToken(datei, ebene);
}

void ClToken::checkStarttag()
{

}

the header for this library:
#include "att.h"

class ClToken
{
public:
ClToken();
char *name() { return tokenName; }
ClToken *child() { return tokenChild; }
char *inhalt() { return tokenInhalt; }
void druckeToken(int ebene);
int getToken(ifstream &datei, int ebene);
ClattToken att;
int Init(ifstream &datei);

private:
void cleanToken();
void druckeTokenEbene(int ebene);
int fillToken(int mode);
char tokenName[64];
char endtagName[64];
ClToken *tokenChild;
ClToken *tokenSibling;
ClToken *tokenParent;
void checkEndtag();
void checkStarttag();
char *tokenInhalt;
int ebene;
char starttagName[64];
char *speicheradresse;
} ;

enum zustand { istStartTag, istEndTag } ;

and the main program:

#include <iostream>
using namespace std;
#include <fstream>
#include <string>

#include "token7.h"

int main()
{
ifstream eingabe;
ClToken *token;
char dateiname[50];
string adresse;

cout << "Bitte geben sie den Namen der Datei an, die eingelesen werden
soll!" << endl;
cin >> dateiname;

eingabe.open(dateiname);
token=new ClToken;
;

if (token->Init(eingabe)!=0) token->druckeToken(1);
eingabe.close();
{
/* bitte ignorieren; nur bis zu Ihrer Anmeldung notwendig */
int x;
cin >> x;
}

}

here are also two more files that are necessary to run the program,
but donīt have anything to do with my problem ( at least i hope so );
theyīre used to read the values and names of atrributes:

header:
class ClattToken
{
private:
int anzahlAtt;
char *attName[10];
char *attValue[10];
public:
int getAttList(char *eingabe);
char *zeigeAttName(int id) {return attName[id];}
char *zeigeAttWert(int id) {return attValue[id];}
int zahlAtt() {return anzahlAtt;}
};

and the library for this header:

#include <iostream>
using namespace std;
#include <fstream>
#include <string>
#include "att.h"

int ClattToken::getAttList(
char *eingabe)
{
char puffer[100];
int zaehler;
enum zustand { zwischenTags, inNamen, erwarteAttributNamen,
erwarteAttributWert,
verarbeiteAttributWert} ;
enum zustand zustand;

for (zaehler=0,zustand=inNamen,anzahlAtt=0;*eingabe!=' \0';
eingabe = eingabe + 1)
{
switch(*eingabe)
{
case ' ':
if (zustand == inNamen)
{
zustand = erwarteAttributNamen;
*eingabe='\0';
zaehler=0;
}
else if (zustand == verarbeiteAttributWert)
{
puffer[zaehler] = *eingabe;
zaehler++;
}
break;

case '=':
if (zustand == erwarteAttributNamen)
{
zustand = erwarteAttributWert;
puffer[zaehler] = '\0';
attName[anzahlAtt] = new char[zaehler+1];
strcpy(attName[anzahlAtt],puffer);
zaehler=0;
}
else if (zustand == verarbeiteAttributWert)
{
puffer[zaehler] = *eingabe;
zaehler++;
}
else cout << "Fehlerhaftes Zeichen! '='" << endl;
break;

case '"':
if (zustand == erwarteAttributWert)
{
zustand = verarbeiteAttributWert;
zaehler = 0;
}
else if (zustand == verarbeiteAttributWert)
{
zustand = erwarteAttributNamen;
puffer[zaehler] = '\0';
attValue[anzahlAtt] = new char[zaehler+1];
strcpy(attValue[anzahlAtt],puffer);
zaehler=0;
anzahlAtt++;
}
else cout << "Fehlerhaftes Zeichen! '\"'" << endl;
break;

default:
if (zustand >= erwarteAttributNamen)
{
puffer[zaehler] = *eingabe;
zaehler++;
}
break;
}
}

return 1;
}

when itīs done, the parser should be able to recognize, when a
starttag isnīt allowed, hereīs an exmaple for a file the parser
shouldnīt accept:
<kurs>
<person>
<vorname attr1="value1">Margarita</vorname>
<famname attr1="value1" attr2="value2">weber
</person>
<person>
</kurs>

it should only accept a structure like
<kurs>
<person>
<vorname attr1="value1">Margarita</vorname>
<famname attr1="value1" attr2="value2">weber</famname>
</person>
</kurs>
with no new starttags within the structure, i already tried several
things, but i donīt have any idea how to compare the names of new
starttags with the already existing names of parent - or sibling names
which is necessary for my plan!
can anyone help me? i donīt have any more ideaqs how to solve this
problem!
Thank you,
Patrick Gunia
Jul 22 '05 #1
1 1998
Did you look into freely available XML parsers available in C++?

I would suggest looking at:
- MSXML from Microsoft
- Xerces XML parser from Apache

Deepa
--
http://www.EventHelix.com/EventStudio
EventStudio 2.5 - Generate sequence diagrams from plain text input

Jul 22 '05 #2

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

2
by: Sylvain Thenault | last post by:
Hi there ! I've noticed the following problem with python >= 2.3 (actually 2.3.4 and 2.4): syt@musca:test$ python Python 2.3.4 (#2, Sep 24 2004, 08:39:09) on linux2 Type "help", "copyright",...
2
by: Magnus Heino | last post by:
Hi. Are there any patterns or other design techniques that could be used when implementing a xml parser that needs to be able to handle different versions of a schema? Let's say that I write...
4
by: billcoumbe | last post by:
any recommendations? I'm looking for something that will just run from the unix command line to validate large (20-50Mb) XML files against an XML DTD. Ideally something that is actively...
2
by: Marshall | last post by:
Hi All, I am building an asp.net web app using Visual Studio 2003. Today I created a new folder called 'Secured' at the root of my web app so I could partition off all of the restricted...
7
by: (Jamie Andrews) | last post by:
For a research project, we're looking for a reliable parser for C that will take an ANSI C program and yield a tree representation of the program (as a Java or C++ object). Of course a grammar...
8
by: Andrew Robert | last post by:
Hi Everyone. I tried the following to get input into optionparser from either a file or command line. The code below detects the passed file argument and prints the file contents but the...
6
by: Herby | last post by:
Hi, Im interested in Reverse Engineering C++ source code into a form more comprehensible than the source itself. I want to write a basic one myself, obviously i need to write a parser for the...
12
abdoelmasry
by: abdoelmasry | last post by:
HI men im trying To get xml file conetent To insert to database xml parser functions couldn't get single element from xml file it's return all start elements , end elements and data elements...
3
by: ups_genius | last post by:
Hi everyone! I created an error parser using the existing CDT stuff by basically copying some of the GNU / make / ... error parsers' code. I also added the extension point for the new error...
0
by: taylorcarr | last post by:
A Canon printer is a smart device known for being advanced, efficient, and reliable. It is designed for home, office, and hybrid workspace use and can also be used for a variety of purposes. However,...
0
by: aa123db | last post by:
Variable and constants Use var or let for variables and const fror constants. Var foo ='bar'; Let foo ='bar';const baz ='bar'; Functions function $name$ ($parameters$) { } ...
0
by: ryjfgjl | last post by:
If we have dozens or hundreds of excel to import into the database, if we use the excel import function provided by database editors such as navicat, it will be extremely tedious and time-consuming...
0
by: ryjfgjl | last post by:
In our work, we often receive Excel tables with data in the same format. If we want to analyze these data, it can be difficult to analyze them because the data is spread across multiple Excel files...
0
by: emmanuelkatto | last post by:
Hi All, I am Emmanuel katto from Uganda. I want to ask what challenges you've faced while migrating a website to cloud. Please let me know. Thanks! Emmanuel
1
by: Sonnysonu | last post by:
This is the data of csv file 1 2 3 1 2 3 1 2 3 1 2 3 2 3 2 3 3 the lengths should be different i have to store the data by column-wise with in the specific length. suppose the i have to...
0
by: Hystou | last post by:
There are some requirements for setting up RAID: 1. The motherboard and BIOS support RAID configuration. 2. The motherboard has 2 or more available SATA protocol SSD/HDD slots (including MSATA, M.2...
0
marktang
by: marktang | last post by:
ONU (Optical Network Unit) is one of the key components for providing high-speed Internet services. Its primary function is to act as an endpoint device located at the user's premises. However,...
0
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers,...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.