XML Parser - check Starttag

Patrick Gunia

Hi,
i´m trying to build a xml - parser, which should simply list all used
tokens an dattributes including their values. So far, so good, this
works, but now i try to check for illegal phrases in the source
document regarding starttags. here is my parser so far:

tokenlibrary.cpp:

#include <iostream>
using namespace std;
#include <fstream>
#include <string>
#include "token7.h"

ClToken::ClToken()
{
*tokenName='\0';
tokenChild=NULL;
tokenSibling=NULL;
tokenInhalt=new char[1];
*tokenInhalt='\0';
}

int ClToken::getToken(
ifstream &datei, int ebene)
{
int zaehler;
enum zustand zustand;
char zeichen;
char puffer[100];
ClToken *child;

cleanToken();

for (zaehler=0;;)
{
datei.get(zeichen);
if (datei.eof())
{
if (*tokenName == '\0' && tokenChild == NULL && tokenInhalt ==
NULL)
return fillToken(0);
return fillToken(1);
}
switch(zeichen)
{
case '<':
datei.get(zeichen);
if (zeichen=='/')
{
zustand = istEndTag;
if (zaehler!=0)
{
puffer[zaehler]='\0';
tokenInhalt = new char[zaehler+1];
strcpy(tokenInhalt,puffer);
}
}
else
{
datei.putback(zeichen);
if (*tokenName!='\0')
{
datei.putback('<');
if (tokenChild==NULL)
{
ebene++;
tokenChild=new ClToken;
tokenChild->getToken(datei,ebene);
}
else
{
for (child=tokenChild;;child=child->tokenSibling)
{
if (child->tokenSibling==NULL)
{
child->tokenSibling=new ClToken;
//child->tokenSibling->ebene = ebene;
child->tokenSibling->getToken(datei, ebene);
break;
}
}
}
}
else zustand=istStartTag;

}
zaehler=0;
break;
case '>':
puffer[zaehler]='\0';
if (zustand==istEndTag)
{
//cout << "Ausgabe Ebene in getToken: " << this->ebene << "+" <<
this->tokenName << endl;
strcpy(endtagName, puffer);
checkEndtag();
return fillToken(1);
}
if (zustand==istStartTag)
{
att.getAttList(puffer);

//cout <<"TokenName in der Abfrage: " << this->tokenName <<
endl;

strcpy(tokenName,puffer);
checkStarttag();
//cout << "tokenname in switch: " << tokenName<<endl;
}
zaehler=0;
break;
case '\n':
break;
default:
puffer[zaehler]=zeichen;
zaehler++;
break;
}
}
}

int ClToken::fillToken(
int mode)
{
if (*tokenName=='\0')
strcpy(tokenName,"Unbekanntes Element");
if (tokenInhalt==NULL)
{
tokenInhalt=new char[1];
*tokenInhalt='\0';
}

return mode;
}

void ClToken::cleanToken(void)
{
*tokenName='\0';
if (tokenChild!=NULL)
{
delete tokenChild;
tokenChild=NULL;
}
if (tokenInhalt!=NULL)
{
delete tokenInhalt;
tokenInhalt=NULL;
}
}

void ClToken::druckeToken(
int level)
{
druckeTokenEbene(level);
cout << "Token: " << name() << " - " << inhalt() << endl;
if (att.zahlAtt() > 0)
{
for (int i=0;i<att.zahlAtt();i++)
{
druckeTokenEbene(level);
cout << "Attribut " << att.zeigeAttName(i) << " hat den Wert "
<< att.zeigeAttWert(i) << endl;
}
}
if (tokenChild!=NULL) tokenChild->druckeToken(level+1);
if (tokenSibling!=NULL) tokenSibling->druckeToken(level);
}

void ClToken::druckeTokenEbene(
int level)
{
while (level > 0)
{
cout << "| ";
level = level - 1;
}
}

void ClToken::checkEndtag()
{
if ( *tokenName != *endtagName)
{

cout << "fehlendes Endtag: " << tokenName << endl;
}

}

int ClToken::Init(ifstream &datei)
{
ebene = 1;
//cout << "tokenadresse in init: " << &token << endl;
return getToken(datei, ebene);
}

void ClToken::checkStarttag()
{

}

the header for this library:
#include "att.h"

class ClToken
{
public:
ClToken();
char *name() { return tokenName; }
ClToken *child() { return tokenChild; }
char *inhalt() { return tokenInhalt; }
void druckeToken(int ebene);
int getToken(ifstream &datei, int ebene);
ClattToken att;
int Init(ifstream &datei);

private:
void cleanToken();
void druckeTokenEbene(int ebene);
int fillToken(int mode);
char tokenName[64];
char endtagName[64];
ClToken *tokenChild;
ClToken *tokenSibling;
ClToken *tokenParent;
void checkEndtag();
void checkStarttag();
char *tokenInhalt;
int ebene;
char starttagName[64];
char *speicheradresse;
} ;

enum zustand { istStartTag, istEndTag } ;

and the main program:

#include <iostream>
using namespace std;
#include <fstream>
#include <string>

#include "token7.h"

int main()
{
ifstream eingabe;
ClToken *token;
char dateiname[50];
string adresse;

cout << "Bitte geben sie den Namen der Datei an, die eingelesen werden
soll!" << endl;
cin >> dateiname;

eingabe.open(dateiname);
token=new ClToken;
;

if (token->Init(eingabe)!=0) token->druckeToken(1);
eingabe.close();
{
/* bitte ignorieren; nur bis zu Ihrer Anmeldung notwendig */
int x;
cin >> x;
}

}

here are also two more files that are necessary to run the program,
but don´t have anything to do with my problem ( at least i hope so );
they´re used to read the values and names of atrributes:

header:
class ClattToken
{
private:
int anzahlAtt;
char *attName[10];
char *attValue[10];
public:
int getAttList(char *eingabe);
char *zeigeAttName(int id) {return attName[id];}
char *zeigeAttWert(int id) {return attValue[id];}
int zahlAtt() {return anzahlAtt;}
};

and the library for this header:

#include <iostream>
using namespace std;
#include <fstream>
#include <string>
#include "att.h"

int ClattToken::getAttList(
char *eingabe)
{
char puffer[100];
int zaehler;
enum zustand { zwischenTags, inNamen, erwarteAttributNamen,
erwarteAttributWert,
verarbeiteAttributWert} ;
enum zustand zustand;

for (zaehler=0,zustand=inNamen,anzahlAtt=0;*eingabe!=' \0';
eingabe = eingabe + 1)
{
switch(*eingabe)
{
case ' ':
if (zustand == inNamen)
{
zustand = erwarteAttributNamen;
*eingabe='\0';
zaehler=0;
}
else if (zustand == verarbeiteAttributWert)
{
puffer[zaehler] = *eingabe;
zaehler++;
}
break;

case '=':
if (zustand == erwarteAttributNamen)
{
zustand = erwarteAttributWert;
puffer[zaehler] = '\0';
attName[anzahlAtt] = new char[zaehler+1];
strcpy(attName[anzahlAtt],puffer);
zaehler=0;
}
else if (zustand == verarbeiteAttributWert)
{
puffer[zaehler] = *eingabe;
zaehler++;
}
else cout << "Fehlerhaftes Zeichen! '='" << endl;
break;

case '"':
if (zustand == erwarteAttributWert)
{
zustand = verarbeiteAttributWert;
zaehler = 0;
}
else if (zustand == verarbeiteAttributWert)
{
zustand = erwarteAttributNamen;
puffer[zaehler] = '\0';
attValue[anzahlAtt] = new char[zaehler+1];
strcpy(attValue[anzahlAtt],puffer);
zaehler=0;
anzahlAtt++;
}
else cout << "Fehlerhaftes Zeichen! '\"'" << endl;
break;

default:
if (zustand >= erwarteAttributNamen)
{
puffer[zaehler] = *eingabe;
zaehler++;
}
break;
}
}

return 1;
}

when it´s done, the parser should be able to recognize, when a
starttag isn´t allowed, here´s an exmaple for a file the parser
shouldn´t accept:
<kurs>
<person>
<vorname attr1="value1">Margarita</vorname>
<famname attr1="value1" attr2="value2">weber
</person>
<person>
</kurs>

it should only accept a structure like
<kurs>
<person>
<vorname attr1="value1">Margarita</vorname>
<famname attr1="value1" attr2="value2">weber</famname>
</person>
</kurs>
with no new starttags within the structure, i already tried several
things, but i don´t have any idea how to compare the names of new
starttags with the already existing names of parent - or sibling names
which is necessary for my plan!
can anyone help me? i don´t have any more ideaqs how to solve this
problem!
Thank you,
Patrick Gunia

Jul 22 '05 #1

Subscribe Post Reply

1998

EventHelix.com

Did you look into freely available XML parsers available in C++?

I would suggest looking at:
- MSXML from Microsoft
- Xerces XML parser from Apache

Deepa
--
http://www.EventHelix.com/EventStudio
EventStudio 2.5 - Generate sequence diagrams from plain text input

Jul 22 '05 #2

by: Sylvain Thenault | last post by:

Hi there ! I've noticed the following problem with python >= 2.3 (actually 2.3.4 and 2.4): syt@musca:test$ python Python 2.3.4 (#2, Sep 24 2004, 08:39:09) on linux2 Type "help", "copyright",...

Python

XML parser patterns

by: Magnus Heino | last post by:

Hi. Are there any patterns or other design techniques that could be used when implementing a xml parser that needs to be able to handle different versions of a schema? Let's say that I write...

.NET Framework

standalone validating XML parser for Solaris?

by: billcoumbe | last post by:

any recommendations? I'm looking for something that will just run from the unix command line to validate large (20-50Mb) XML files against an XML DTD. Ideally something that is actively...

.NET Framework

Parser Error

by: Marshall | last post by:

Hi All, I am building an asp.net web app using Visual Studio 2003. Today I created a new folder called 'Secured' at the root of my web app so I could partition off all of the restricted...

ASP.NET

C parser yielding syntax tree data structure?

by: (Jamie Andrews) | last post by:

For a research project, we're looking for a reliable parser for C that will take an ANSI C program and yield a tree representation of the program (as a Java or C++ object). Of course a grammar...

C / C++

Option parser question - reading options from file as well as commandline

by: Andrew Robert | last post by:

Hi Everyone. I tried the following to get input into optionparser from either a file or command line. The code below detects the passed file argument and prints the file contents but the...

Python

C++ Source Reverse Engineer - How to write a parser ?

by: Herby | last post by:

Hi, Im interested in Reverse Engineering C++ source code into a form more comprehensible than the source itself. I want to write a basic one myself, obviously i need to write a parser for the...

C / C++

XML parser will not return a single element from my XML code

by: abdoelmasry | last post by:

HI men im trying To get xml file conetent To insert to database xml parser functions couldn't get single element from xml file it's return all start elements , end elements and data elements...

PHP

Eclipse CDT error parser for external tool

by: ups_genius | last post by:

Hi everyone! I created an error parser using the existing CDT stuff by basically copying some of the GNU / make / ... error parsers' code. I also added the extension point for the new error...

C / C++

Easy Steps to Fix "Canon Printer Won't Connect to WiFi Network"

by: taylorcarr | last post by:

A Canon printer is a smart device known for being advanced, efficient, and reliable. It is designed for home, office, and hybrid workspace use and can also be used for a variety of purposes. However,...

General

Basic Javascript concepts

by: aa123db | last post by:

Variable and constants Use var or let for variables and const fror constants. Var foo ='bar'; Let foo ='bar';const baz ='bar'; Functions function $name$ ($parameters$) { } ...

Javascript

Batch import of multiple excel files into the database

by: ryjfgjl | last post by:

If we have dozens or hundreds of excel to import into the database, if we use the excel import function provided by database editors such as navicat, it will be extremely tedious and time-consuming...

Data Management

Merging data from multiple Excel files

by: ryjfgjl | last post by:

In our work, we often receive Excel tables with data in the same format. If we want to analyze these data, it can be difficult to analyze them because the data is spread across multiple Excel files...

Data Management

Migrating Website to Cloud - Emmanuel Katto

by: emmanuelkatto | last post by:

Hi All, I am Emmanuel katto from Uganda. I want to ask what challenges you've faced while migrating a website to cloud. Please let me know. Thanks! Emmanuel

General

Is that possible of reading the .csv file in column wise and the column have different lengths ?

by: Sonnysonu | last post by:

This is the data of csv file 1 2 3 1 2 3 1 2 3 1 2 3 2 3 2 3 3 the lengths should be different i have to store the data by column-wise with in the specific length. suppose the i have to...

C / C++

How to build RAID in BIOS?

by: Hystou | last post by:

There are some requirements for setting up RAID: 1. The motherboard and BIOS support RAID configuration. 2. The motherboard has 2 or more available SATA protocol SSD/HDD slots (including MSATA, M.2...

Computer Hardware

What is ONU?

by: marktang | last post by:

ONU (Optical Network Unit) is one of the key components for providing high-speed Internet services. Its primary function is to act as an endpoint device located at the user's premises. However,...

General

Problem With Comparison Operator <=> in G++

by: Oralloy | last post by:

Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers,...

C / C++

XML Parser - check Starttag

Similar topics