any other best way of reading the file

440 256MB

Hi,

I am here with placing the Sample code for reading the and Input data mentioned.

Is there any best of reading the file?.

Thanks in advance
PSB

Expand|Select|Wrap|Line Numbers

 Sampple1.txt

Rect    1       1       2       7       6

Rect    2       2       3       8       7

Rect    3       3       4       9       8

Tria    4       4       5       9

Pnt     1       0.      0.      0.

Pnt     2       5.      0.      0.

Pnt     3       10.     0.      0.

Pnt     4       15.     0.      0.

Pnt     5       20.     0.      0.

Pnt     6       0.      5.      0.

Pnt     7       5.      5.      0.

Pnt     8       10.     5.      0.

Pnt     9       15.     5.      0.

Expand|Select|Wrap|Line Numbers

 Sample.py

def read_file_data(strFile):
 
    f = open(strFile,'r')        
 
    pntIDDict = {}

    pntCoordDict = {}

    pntList = []             

    coordList = []

    wireIDDict ={}
 
    while True:

        strTemp = f.readline()
 
        if len(strTemp)>=1:            

            strTemp = strTemp[:(len(strTemp)-1)]                                        

            if strTemp[:3]=='Pnt':                         

                pntID = int(strTemp[8:16])                        

                coordList.append((float(strTemp[16:24])))

                coordList.append((float(strTemp[24:32])))

                coordList.append((float(strTemp[32:40])))                      
 
                pntIDDict[pntID]=coordList                        

                coordList = []                        
 
            elif (strTemp[:4]=='Rect' or strTemp[:4]=='Tria'):

                wireID = int(strTemp[8:16])                        

                pntList.append((int(strTemp[16:24])))

                pntList.append((int(strTemp[24:32])))

                pntList.append((int(strTemp[32:40])))    
 
                if (strTemp[:4]=='Rect'):

                    pntList.append((int(strTemp[40:48])))                    
 
                wireIDDict[wireID]=pntList

                pntList = []

        else:

            break
 
    f.close()
 
    return pntIDDict,wireIDDict
 
if __name__ == '__main__':
 
    pntIDDict = {}

    wireIDDict = {}

    ntIDDict,wireIDDict = read_file_data ("c:\\Sample1.txt")

    print ntIDDict,wireIDDict

Mar 18 '07 #1

Subscribe Post Reply

4042

bartonc

6,596

Expert 4TB

Hi,

I am here with placing the Sample code for reading the and Input data mentioned.

Is there any best of reading the file?.

Thanks in advance
PSB

Expand|Select|Wrap|Line Numbers

Sampple1.txt

Rect    1       1       2       7       6

Rect    2       2       3       8       7

Rect    3       3       4       9       8

Tria    4       4       5       9

Pnt     1       0.      0.      0.

Pnt     2       5.      0.      0.

Pnt     3       10.     0.      0.

Pnt     4       15.     0.      0.

Pnt     5       20.     0.      0.

Pnt     6       0.      5.      0.

Pnt     7       5.      5.      0.

Pnt     8       10.     5.      0.

Pnt     9       15.     5.      0.

Expand|Select|Wrap|Line Numbers

Sample.py

def read_file_data(strFile):

    f = open(strFile,'r')

    pntIDDict = {}

    pntCoordDict = {}

    pntList = []

    coordList = []

    wireIDDict ={}

    while True:

        strTemp = f.readline()

        if len(strTemp)>=1:

            strTemp = strTemp[:(len(strTemp)-1)]

            if strTemp[:3]=='Pnt':

                pntID = int(strTemp[8:16])

                coordList.append((float(strTemp[16:24])))

                coordList.append((float(strTemp[24:32])))

                coordList.append((float(strTemp[32:40])))

                pntIDDict[pntID]=coordList

                coordList = []

            elif (strTemp[:4]=='Rect' or strTemp[:4]=='Tria'):

                wireID = int(strTemp[8:16])

                pntList.append((int(strTemp[16:24])))

                pntList.append((int(strTemp[24:32])))

                pntList.append((int(strTemp[32:40])))

                if (strTemp[:4]=='Rect'):

                    pntList.append((int(strTemp[40:48])))

                wireIDDict[wireID]=pntList

                pntList = []

        else:

            break

    f.close()

    return pntIDDict,wireIDDict

if __name__ == '__main__':

    pntIDDict = {}

    wireIDDict = {}

    ntIDDict,wireIDDict = read_file_data ("c:\\Sample1.txt")

    print ntIDDict,wireIDDict

It easiest to iterate a file using the 'in' operator.
In order to handle spaces (which may vary) or tabs, a list is safer.
This technique is more likely to raise an IndexError, but also more likely to read the values out of the file:

Expand|Select|Wrap|Line Numbers

 def read_file_data(strFile):
 
    f = open(strFile,'r')
 
    pntIDDict = {}

    pntCoordDict = {}

    pntList = []

    coordList = []

    wireIDDict ={}
 
##    while True:

##        strTemp = f.readline()

    for strTemp in f:

        tmpList = strTemp.split()
 
##        if len(strTemp)>=1:

####            strTemp = strTemp[:(len(strTemp)-1)] really a need for this?

##            strTemp = strTemp[:-1]  # if there is, strings know their length

        if tmpList[0] == 'Pnt':

            pntID = int(tmpList[1])

            coordList.append((float(tmpList[2])))

            coordList.append((float(tmpList[3])))

            coordList.append((float(tmpList[4])))
 
            pntIDDict[pntID]=coordList

            coordList = []
 
        elif (tmpList[0] == 'Rect' or tmpList[0] == 'Tria'):

            wireID = int(tmpList[1])

            pntList.append((int(tmpList[2])))

            pntList.append((int(tmpList[3])))

            pntList.append((int(tmpList[4])))
 
            if (strTemp[:4]=='Rect'):

                pntList.append((int(tmpList[5])))
 
            wireIDDict[wireID] = pntList

            pntList = []

##        else:

##            break
 
    f.close()
 
    return pntIDDict, wireIDDict
 
if __name__ == '__main__':
 
    pntIDDict = {}

    wireIDDict = {}

    ntIDDict,wireIDDict = read_file_data ("text1.txt")

    print ntIDDict,wireIDDict

You should also guard against TypeError for fload() and int() in try blocks.

Mar 18 '07 #2

psbasha

440

256MB

Thanks for the suggestion.

Which place I have to place the "try" and "catch" blocks

-PSB

Mar 18 '07 #3

psbasha

440

256MB

BV ,any comments from your side on reading the above input file data.Is it psosible to reduce any lines of code and make the reading data in more precise way.

Thanks
PSB

Mar 18 '07 #4

bartonc

6,596

Expert 4TB

Thanks for the suggestion.

Which place I have to place the "try" and "catch" blocks

-PSB

If you don't care which field is wrong (just want to handle errors gracefully) then wrap all 3 (or 4) conversions in a single block:

Expand|Select|Wrap|Line Numbers

 
>>> float('abc')

  File "<console>", line 1, in ?

''' exceptions.ValueError : invalid literal for float(): abc '''
 
>>> try:

...     float('abc')

... except ValueError: # Try not to use 'naked' excepts EVER

...     print "not a float"

...     

not a float

>>>

The same goes for the ints.

Mar 18 '07 #5

bvdet

2,851

Expert Mod 2GB

BV ,any comments from your side on reading the above input file data.Is it psosible to reduce any lines of code and make the reading data in more precise way.

Thanks
PSB

Shameless! You know I'm a sucker for file parsing problems :)

Use the convert data function I showed you. Initialize your dictionaries. Read all the lines from the file into a list. No file object is created. Iterate on the list. Create a word list from the line with a list comprehension using strip() and split(' '), skipping the blank strings. Check for keywords in the word list to decide which dictionary to add to using another list comprehension. If the data conversion fails, you have a string.

Expand|Select|Wrap|Line Numbers

 def read_file_data(f):

    ptDict = {}

    wireDict = {}

    fList = open(f).readlines()

    for line in fList:

        lineList = [x.lower() for x in line.strip().split(' ') if x != '']

        if 'rect' in lineList or 'tria' in lineList:

            wireDict[convert_data(lineList[1])] = [convert_data(x) for x in lineList[2:]]

        elif 'pnt' in lineList:

            ptDict[convert_data(lineList[1])] = [convert_data(x) for x in lineList[2:]]

    return ptDict,wireDict

Mar 19 '07 #6

psbasha

440

256MB

Shameless! You know I'm a sucker for file parsing problems :)

Use the convert data function I showed you. Initialize your dictionaries. Read all the lines from the file into a list. No file object is created. Iterate on the list. Create a word list from the line with a list comprehension using strip() and split(' '), skipping the blank strings. Check for keywords in the word list to decide which dictionary to add to using another list comprehension. If the data conversion fails, you have a string.

Expand|Select|Wrap|Line Numbers

def read_file_data(f):

    ptDict = {}

    wireDict = {}

    fList = open(f).readlines()

    for line in fList:

        lineList = [x.lower() for x in line.strip().split(' ') if x != '']

        if 'rect' in lineList or 'tria' in lineList:

            wireDict[convert_data(lineList[1])] = [convert_data(x) for x in lineList[2:]]

        elif 'pnt' in lineList:

            ptDict[convert_data(lineList[1])] = [convert_data(x) for x in lineList[2:]]

    return ptDict,wireDict

BV,

If the Point and Wire IDs are having 8-digit number then I am not able to get the details from the above piece of code,sicne we are not having the spaces in between the data.

How to resolve this issue?

Expand|Select|Wrap|Line Numbers

 Sample.txt

Rect    1000000010000000200000007000000060000000

Rect    2000000020000000300000008000000070000000

Rect    3000000030000000400000009000000080000000

Tria     40000000400000005000000090000000

Pnt     100000000.      0.      0.

Pnt     200000005.      0.      0.

Pnt     3000000010.     0.      0.

Pnt     4000000015.     0.      0.

Pnt     5000000020.     0.      0.

Pnt     600000000.      5.      0.

Pnt     700000005.      5.      0.

Pnt     8000000010.     5.      0.

Pnt     9000000015.     5.      0.

Thanks in advance
PSB

Mar 19 '07 #7

bvdet

2,851

Expert Mod 2GB

BV,

If the Point and Wire IDs are having 8-digit number then I am not able to get the details from the above piece of code,sicne we are not having the spaces in between the data.

How to resolve this issue?

Expand|Select|Wrap|Line Numbers

Sample.txt

Rect    1000000010000000200000007000000060000000

Rect    2000000020000000300000008000000070000000

Rect    3000000030000000400000009000000080000000

Tria     40000000400000005000000090000000

Pnt     100000000.      0.      0.

Pnt     200000005.      0.      0.

Pnt     3000000010.     0.      0.

Pnt     4000000015.     0.      0.

Pnt     5000000020.     0.      0.

Pnt     600000000.      5.      0.

Pnt     700000005.      5.      0.

Pnt     8000000010.     5.      0.

Pnt     9000000015.     5.      0.

Thanks in advance
PSB

Expand|Select|Wrap|Line Numbers

 import re

>>> lineList = [x.lower() for x in re.split('[ 0]', line.strip()) if x != '']

>>> lineList

['rect', '1', '1', '2', '7', '6']

>>>

Mar 19 '07 #8

psbasha

440

256MB

Expand|Select|Wrap|Line Numbers

import re

>>> lineList = [x.lower() for x in re.split('[ 0]', line.strip()) if x != '']

>>> lineList

['rect', '1', '1', '2', '7', '6']

>>>

Sorry BV,the numbers will not be zero for all.It will be 8-digit number and maximum value will be 99999999

Expand|Select|Wrap|Line Numbers

 Sample.txt

Rect    1000007110000101200000227000000060000055

Rect    2000009220000105300000048000400071111167

Rect    3000008830000208400000029000500080003000

Tria     40000094400003045000007190000600

Pnt      100100123.      0.      0.

Pnt      200200035.      0.      0.

Pnt      3040000010.     0.      0.

Pnt      4000000015.     0.      0.

Pnt      5005000020.     0.      0.

Pnt      600008000.      5.      0.

Pnt      700000005.      5.      0.

Pnt      8000900010.     5.      0.

Pnt      9000900015.     5.      0.

Mar 19 '07 #9

bvdet

2,851

Expert Mod 2GB

Sorry BV,the numbers will not be zero for all.It will be 8-digit number and maximum value will be 99999999

Expand|Select|Wrap|Line Numbers

Sample.txt

Rect    1000007110000101200000227000000060000055

Rect    2000009220000105300000048000400071111167

Rect    3000008830000208400000029000500080003000

Tria     40000094400003045000007190000600

Pnt      100100123.      0.      0.

Pnt      200200035.      0.      0.

Pnt      3040000010.     0.      0.

Pnt      4000000015.     0.      0.

Pnt      5005000020.     0.      0.

Pnt      600008000.      5.      0.

Pnt      700000005.      5.      0.

Pnt      8000900010.     5.      0.

Pnt      9000900015.     5.      0.

You have lost me now. What numbers do you want to extract from 'Rect' and 'Tria'? Your data files need to be in a consistent format with predictable delimiters to parse in this manner.

Mar 19 '07 #10

psbasha

440

256MB

You have lost me now. What numbers do you want to extract from 'Rect' and 'Tria'? Your data files need to be in a consistent format with predictable delimiters to parse in this manner.

The output should look like this
o/p should be :

WireDict
{10000071:[110000101,20000022,70000000,60000055],
20000092:[2000010,53000000,480004000,71111167],
3000008:[830000208,40000002,90005000,80003000],
40000094:[40000304,50000071,90000600]}

pntDict
{ 100100123:[0.0,0.0,0.0],20020003:[5.0,0.0,0.0],30400000:[10.0,0.0,0.0],
40000000:[15.0,0.0,0.0],50050000:[20.0,0.0,0.0],60000800:[0.0,5.0,0.0],70000000:[5.0,5.0,0.],
80009000:[10.0,5.0,0.0],90009000:[15.0,5.0,0.]
}

Mar 19 '07 #11

psbasha

440

256MB

The output should look like this
o/p should be :

WireDict
{10000071:[110000101,20000022,70000000,60000055],
20000092:[2000010,53000000,480004000,71111167],
3000008:[830000208,40000002,90005000,80003000],
40000094:[40000304,50000071,90000600]}

pntDict
{ 100100123:[0.0,0.0,0.0],20020003:[5.0,0.0,0.0],30400000:[10.0,0.0,0.0],
40000000:[15.0,0.0,0.0],50050000:[20.0,0.0,0.0],60000800:[0.0,5.0,0.0],70000000:[5.0,5.0,0.],
80009000:[10.0,5.0,0.0],90009000:[15.0,5.0,0.]
}

We have to break at every 8-fields of the number or string.So How can I split it ,without using slicing mechanism.

Mar 19 '07 #12

bvdet

2,851

Expert Mod 2GB

We have to break at every 8-fields of the number or string.So How can I split it ,without using slicing mechanism.

Why not use slices? If your data will be in 8 character fields, it seems to me that would be a good method.

Expand|Select|Wrap|Line Numbers

 def each8(item):

    cnt = 0

    for x in range(len(item)/8):

        yield item[cnt:cnt+8]

        cnt += 8
 
def read_file_data(f):

    ptDict = {}

    wireDict = {}

    fList = open(f).readlines()

    for line in fList:

        lineList = [x.lower().strip() for x in line.strip().split(' ', 1) if x != '']

        data = [lineList[0], lineList[1][:8], lineList[1][8:]]

        if 'rect' in lineList or 'tria' in lineList:

            wireDict[convert_data(data[1])] = [convert_data(x) for x in each8(data[2])]

        elif 'pnt' in lineList:

            ptDict[convert_data(data[1])] = [convert_data(x) for x in data[2].split() if x != '']

Expand|Select|Wrap|Line Numbers

 import re

..................................

    for line in fList:

        lineList = [x.lower().strip() for x in line.strip().split(' ', 1) if x != '']
 
        if 'rect' in lineList or 'tria' in lineList:

            wireDict[convert_data(lineList[1][:8])] = \

                [convert_data(x) for x in re.findall(r"\d{8}", lineList[1])]
 
        elif 'pnt' in lineList:

            ptDict[convert_data(lineList[1][:8])] = \

                [convert_data(y.strip()) for y in [x for x in re.split(r"\d{8}", \

                    lineList[1]) if x != ''][0].split(' ') if y != '']

Take your choice. I'm no expert at regex!

Mar 19 '07 #13

bvdet

2,851

Expert Mod 2GB

I like this version of each8() better:

Expand|Select|Wrap|Line Numbers

 def each8(s):

    while len(s) > 0:

        yield s[:8]

        s = s[8:]

Mar 19 '07 #14

psbasha

440

256MB

Expand|Select|Wrap|Line Numbers

 Sample1.txt
 
Sample.txt

Pnt      100100123.      0.      0.

Pnt      200200035.      0.      0.

Pnt      3040000010.     0.      0.

Pnt      4000000015.     0.      0.

Pnt      5005000020.     0.      0.

Pnt      600008000.      5.      0.

Pnt      700000005.      5.      0.

Pnt      8000900010.     5.      0.

Pnt      9000900015.     5.      0.

Expand|Select|Wrap|Line Numbers

 Sample2.txt

Pnt    *         3280311       0          1.36567432E+03 -3.71226532E+02

*         2.01031464E+02       0

Pnt     *         3280502       0          1.25433850E+03 -1.42613068E+02

*         1.80202667E+02       0

Pnt     *         3280503       0          1.27057288E+03 -1.75843582E+02

*         1.84236084E+02       0

Pnt    *         3280504       0          1.28286145E+03 -2.01004501E+02

*         1.87218460E+02       0

Expand|Select|Wrap|Line Numbers

 Sample3.txt

Pnt*     10260209                       1156.26599      313.992828

*       155.018463

Pnt*     10270106                       1097.15002      250.676315

*       140.789337

Pnt*     10270107                       1115.47864      271.83374

*       144.698837

Mar 21 '07 #15

psbasha

440

256MB

Expand|Select|Wrap|Line Numbers

Sample1.txt

Sample.txt

Pnt      100100123.      0.      0.

Pnt      200200035.      0.      0.

Pnt      3040000010.     0.      0.

Pnt      4000000015.     0.      0.

Pnt      5005000020.     0.      0.

Pnt      600008000.      5.      0.

Pnt      700000005.      5.      0.

Pnt      8000900010.     5.      0.

Pnt      9000900015.     5.      0.

Expand|Select|Wrap|Line Numbers

Sample2.txt

Pnt    *         3280311       0          1.36567432E+03 -3.71226532E+02

*         2.01031464E+02       0

Pnt     *         3280502       0          1.25433850E+03 -1.42613068E+02

*         1.80202667E+02       0

Pnt     *         3280503       0          1.27057288E+03 -1.75843582E+02

*         1.84236084E+02       0

Pnt    *         3280504       0          1.28286145E+03 -2.01004501E+02

*         1.87218460E+02       0

Expand|Select|Wrap|Line Numbers

Sample3.txt

Pnt*     10260209                       1156.26599      313.992828

*       155.018463

Pnt*     10270106                       1097.15002      250.676315

*       140.789337

Pnt*     10270107                       1115.47864      271.83374

*       144.698837

I am getting inconsistency input data from different softwares,but I have to write a generic Pyton code where I can read any input data format as mentioned in the above examples

Mar 21 '07 #16

psbasha

440

256MB

I am getting inconsistency input data from different softwares,but I have to write a generic Pyton code where I can read any input data format as mentioned in the above examples

Could any body help me in resolving this issue of handling the generic format data.

Thanks in advance
PSB

Mar 21 '07 #17

bvdet

2,851

Expert Mod 2GB

Expand|Select|Wrap|Line Numbers

Sample1.txt

Sample.txt

Pnt      100100123.      0.      0.

Pnt      200200035.      0.      0.

Pnt      3040000010.     0.      0.

Pnt      4000000015.     0.      0.

Pnt      5005000020.     0.      0.

Pnt      600008000.      5.      0.

Pnt      700000005.      5.      0.

Pnt      8000900010.     5.      0.

Pnt      9000900015.     5.      0.

Expand|Select|Wrap|Line Numbers

Sample2.txt

Pnt    *         3280311       0          1.36567432E+03 -3.71226532E+02

*         2.01031464E+02       0

Pnt     *         3280502       0          1.25433850E+03 -1.42613068E+02

*         1.80202667E+02       0

Pnt     *         3280503       0          1.27057288E+03 -1.75843582E+02

*         1.84236084E+02       0

Pnt    *         3280504       0          1.28286145E+03 -2.01004501E+02

*         1.87218460E+02       0

Expand|Select|Wrap|Line Numbers

Sample3.txt

Pnt*     10260209                       1156.26599      313.992828

*       155.018463

Pnt*     10270106                       1097.15002      250.676315

*       140.789337

Pnt*     10270107                       1115.47864      271.83374

*       144.698837

I think we have taken care of Sample1, have we not? Can you explain Sample2 and Sample3 format? Is the point data really on two separate lines? What is the significance of the asterisk? Why are there zeros mixed in with the numbers in scientific notation? Help us help you.

Mar 22 '07 #18

psbasha

440

256MB

I think we have taken care of Sample1, have we not? Can you explain Sample2 and Sample3 format? Is the point data really on two separate lines? What is the significance of the asterisk? Why are there zeros mixed in with the numbers in scientific notation? Help us help you.

a) "I think we have taken care of Sample1, have we not?"

Yes

b) "Can you explain Sample2 and Sample3 format?"

This format is some what different with Sample-1

The X,Y,Z co-ordinates are not written in a single line.They are splitted into two lines.Each String/Number is of 16-Field data
The maximum length of the line is ( 79)
c)Is the point data really on two separate lines?
Yes
d)What is the significance of the asterisk?
The "*" in the second line may be used as continuation of the fields

e) Why are there zeros mixed in with the numbers in scientific notation?
Pnt * 3280504 0 1.28286145E+03 -2.01004501E+02
* 1.87218460E+02 0
Currently I dont need of this zero's.It is also one of the ID which may be refering to some number later

Thanks in advacne
PSB

Mar 22 '07 #19

bvdet

2,851

Expert Mod 2GB

a) "I think we have taken care of Sample1, have we not?"

Yes

b) "Can you explain Sample2 and Sample3 format?"

This format is some what different with Sample-1

The X,Y,Z co-ordinates are not written in a single line.They are splitted into two lines.Each String/Number is of 16-Field data
The maximum length of the line is ( 79)
c)Is the point data really on two separate lines?
Yes
d)What is the significance of the asterisk?
The "*" in the second line may be used as continuation of the fields

e) Why are there zeros mixed in with the numbers in scientific notation?
Pnt * 3280504 0 1.28286145E+03 -2.01004501E+02
* 1.87218460E+02 0
Currently I dont need of this zero's.It is also one of the ID which may be refering to some number later

Thanks in advacne
PSB

Here's one way of adding the data in this format to your point dictionary:

Expand|Select|Wrap|Line Numbers

 >>> patt = re.compile(r'''\d+\.\d+E\+\d+|

... \d+\.\d+E\+\d+|

... -\d+\.\d+E\+\d+|

... -\d+\.\d+E-\d+|

... \d+\.\d+E-\d+|

... \d+\.\d+|

... -\d+\.\d+|

... \d+''', re.X

... )

>>> patt

<_sre.SRE_Pattern object at 0x00DE68D0>

>>> s = 'Pnt    *         3280311       0          +1.36567432E+03 -3.71226532E+02'

>>> re.findall(patt,s)

['3280311', '0', '1.36567432E+03', '-3.71226532E+02']

>>> dd = {}

>>> lst = re.findall(patt,s)

>>> dd[int(lst[0])] = [float(i) for i in lst[1:] if i != '0']

>>> dd

{3280311: [1365.6743200000001, -371.22653200000002]}

>>> s1 = '*       155.018463'

>>> lst1 = re.findall(patt,s)

>>> dd[int(lst[0])] = dd[int(lst[0])]+[float(i) for i in lst1 if i != '0']

>>> dd

{3280311: [1365.6743200000001, -371.22653200000002, 155.018463]}

>>>

You can add an elif for the word 'pnt' in combination with '*'. Whoever designed the output for this data ought to be ..............

Mar 22 '07 #20

psbasha

440

256MB

Hi BV,

Is there any other simple approach available?.It looks like we have to do the formating of the values for readiing it.

Thanks
PSB

Mar 23 '07 #21

bvdet

2,851

Expert Mod 2GB

Hi BV,

Is there any other simple approach available?.It looks like we have to do the formating of the values for readiing it.

Thanks
PSB

The code I showed you works. I guess you could do splits, strips, slices. etc., but I don't think it would be simpler. After incorporating that code into the other code I showed you, you should get output like this:

Expand|Select|Wrap|Line Numbers

 >>> Point dictionary:

30400000 = [10.0, 0.0, 0.0]

40000000 = [15.0, 0.0, 0.0]

2 = [2, 5.0, 0.0, 0.0]

3 = [3, 10.0, 0.0, 0.0]

4 = [4, 15.0, 0.0, 0.0]

5 = [5, 20.0, 0.0, 0.0]

6 = [6, 0.0, 5.0, 0.0]

1 = [1, 0.0, 0.0, 0.0]

8 = [8, 10.0, 5.0, 0.0]

9 = [9, 15.0, 5.0, 0.0]

10270106 = [1097.15002, 250.67631499999999, 140.78933699999999]

10270107 = [1115.47864, 271.83373999999998, 144.698837]

10010012 = [3.0, 0.0, 0.0]

60000800 = [0.0, 5.0, 0.0]

20020003 = [5.0, 0.0, 0.0]

10260209 = [1156.2659900000001, 313.99282799999997, 155.018463]

80009000 = [10.0, 5.0, 0.0]

7 = [7, 5.0, 5.0, 0.0]

3280311 = [1365.6743200000001, -371.22653200000002, 201.031464]

50050000 = [20.0, 0.0, 0.0]

90009000 = [15.0, 5.0, 0.0]

70000000 = [5.0, 5.0, 0.0]

3280502 = [1254.3385000000001, -142.613068, 180.20266699999999]

3280503 = [1270.5728799999999, -175.843582, 184.23608400000001]

3280504 = [1282.8614500000001, -201.004501, 187.21845999999999]
 
Wire dictionary:

10000000 = [10000000, 20000000, 70000000]

20000000 = [20000000, 30000000, 80000000]

30000000 = [30000000, 40000000, 90000000]

10000071 = [10000101, 20000022, 70000000, 60000055]

40000000 = [40000000, 50000000]

30000088 = [30000208, 40000002, 90005000, 80003000]

20000092 = [20000105, 30000004, 80004000, 71111167]

40000094 = [40000304, 50000071, 90000600]

from data like this:

Expand|Select|Wrap|Line Numbers

 Rect    1000007110000101200000227000000060000055

Rect    2000009220000105300000048000400071111167

Rect    3000008830000208400000029000500080003000

Tria     40000094400003045000007190000600

Pnt      100100123.      0.      0.

Pnt      200200035.      0.      0.

Pnt      3040000010.     0.      0.

Pnt      4000000015.     0.      0.

Pnt      5005000020.     0.      0.

Pnt      600008000.      5.      0.

Pnt      700000005.      5.      0.

Pnt      8000900010.     5.      0.

Pnt      9000900015.     5.      0.

Pnt      100100123.      0.      0.

Pnt      200200035.      0.      0.

Pnt      3040000010.     0.      0.

Pnt      4000000015.     0.      0.

Pnt      5005000020.     0.      0.

Pnt      600008000.      5.      0.

Pnt      700000005.      5.      0.

Pnt      8000900010.     5.      0.

Pnt      9000900015.     5.      0.

Rect    100000001000000020000000700000006

Rect    200000002000000030000000800000007

Rect    300000003000000040000000900000008

Tria    4000000040000000500000009

Pnt     1       0.      0.      0.

Pnt     2       5.      0.      0.

Pnt     3       10.     0.      0.

Pnt     4       15.     0.      0.

Pnt     5       20.     0.      0.

Pnt     6       0.      5.      0.

Pnt     7       5.      5.      0.

Pnt     8       10.     5.      0.

Pnt     9       15.     5.      0.
 
Pnt    *         3280311       0          1.36567432E+03 -3.71226532E+02

*         2.01031464E+02       0

Pnt     *         3280502       0          1.25433850E+03 -1.42613068E+02

*         1.80202667E+02       0

Pnt     *         3280503       0          1.27057288E+03 -1.75843582E+02

*         1.84236084E+02       0

Pnt    *         3280504       0          1.28286145E+03 -2.01004501E+02

*         1.87218460E+02       0
 
Pnt*     10260209                       1156.26599      313.992828

*       155.018463

Pnt*     10270106                       1097.15002      250.676315

*       140.789337

Pnt*     10270107                       1115.47864      271.83374

*       144.698837

The data files were not formatted is the best manner for reading.

Mar 23 '07 #22

bvdet

2,851

Expert Mod 2GB

Maybe this will be easier to follow:

Expand|Select|Wrap|Line Numbers

 def read_file_data(f):

    ptDict = {}

    wireDict = {}

    fList = open(f).readlines()
 
    in_pnt = False

    patt = re.compile(r'''\d+\.\d+E\+\d+|           # engineering notation ++

                          -\d+\.\d+E\+\d+|          # engineering notation -+

                          -\d+\.\d+E-\d+|           # engineering notation --

                          \d+\.\d+E-\d+|            # engineering notation +-

                          \d+\.\d+|                 # positive float format

                          -\d+\.\d+|                # negative float format

                          \d+                       # positive integer

                          ''', re.X

                      )
 
    for line in fList:

        lineList = [x.lower().strip() for x in line.strip().split(' ', 1) if x != '']

Mar 24 '07 #23

psbasha

440

256MB

Expand|Select|Wrap|Line Numbers

 Sample.txt

$$$$$

START

COLOR RED

LINETYPE SOLID

END

$$$$$$$

PLine    1        6      1.5     9.375   .001    .001

$ Line Details

Line*    1               1                1              2

*        .002952         .992547         .121827

$

Rect     2        1       2       3       7       6

Rect     3        1       3       4       8       7

PRect*   4               11              15              16

*        10              11              0.3

Rect*    4               1               5               6

*        10              11              0.

Othr*    1               1               5               6

*        10              11              0.              0.

*        10              11              0.              1.0

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

Tria     5        1       7       2       11

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

Point    1               0.0     0.0     0.0

Point    2               1.0     0.0     0.0

Point    3               2.0     0.0     0.0

Point    4               3.0     0.0     0.0

Point    5               0.0     1.0     0.0

Point    6               1.0     1.0     0.0

Point    7               2.0     1.0     0.0

Point    8               4.0     1.0     0.0

Point*   9                              0.0             2.0

*          0.0

Point  *3280504         0               1.28286145E+03  1.28286145E+03

*       -2.01004501E+02

$

END

Expand|Select|Wrap|Line Numbers

 Sample.py
 
def read_file_data(strFile):

    f = open(strFile,'r')

    pointID = 0

    curvetID = 0

    pointIDDict = {}        

    pointList = []             

    coordList = []

    attrdict=[]
 
    curveIDDict = {}

    curveOneDimIDDict = {}

    curveTwoDimIDPointIDDict = {}        

    largeFieldFlag = False
 
    curveCardLargeFieldFlag = False

    bTriaFlag = False

    bRectFlag = False

    bOnlyPointCoord = True

    b1DCurveFlag = False

    propDict={}
 
    strTemp = f.readlines()

    for line in strTemp:

        if(line.startswith('Point') or line.startswith('Point*') or line.startswith('Point  *') or line.startswith('*') and bOnlyPointCoord):
 
            if (line.startswith('Point') and (line[:8].strip().isalpha())):

                pointID = int(line[8:16])                        

                coordList.append((float(line[24:32])))

                coordList.append((float(line[32:40])))                

                coordList.append((float(line[40:48])))

                largeFieldFlag = False

            elif (line.startswith('Point*') or line.startswith('Point  *')):

                pointID = int(line[8:24])                        

                coordList.append((float(line[40:56])))

                coordList.append((float(line[56:72])))

                largeFieldFlag = True

                bOnlyPointCoord = True

            elif (line.startswith('*') and largeFieldFlag):                  

                coordList.append((float(line[8:24])))

                largeFieldFlag = False                    

            if ( pointID and largeFieldFlag == False):

                pointIDDict[pointID]=coordList                    

                pointID =0   

                coordList = []
 
            bOnlyPointCoord = True

        elif (line.startswith('Rect') or line.startswith('Tria') or  \

              line.startswith('Line') and line[:8].strip().isalpha() or \

              line.startswith('Rect*') or line.startswith('Tria*') or\

              line.startswith('Line*')or line.startswith('*')):
 
            if (line.startswith('Rect  ') or \

                line.startswith('Line') and line[:8].strip().isalpha() ):
 
                curvetID = int(line[8:16])                        

                pointList.append((int(line[24:32])))

                pointList.append((int(line[32:40])))

                b1DCurveFlag = True
 
                if (line[:4]=='Tria'or line[:4]=='Rect'):                        

                    pointList.append((int(line[40:48])))

                    b1DCurveFlag = False
 
                    if (line[:4]=='Rect' ):

                        pointList.append((int(line[48:56])))
 
                curveCardLargeFieldFlag = False
 
            elif   (line.startswith('Rect*') or line.startswith('Tria*') or \

                    line.startswith('Line*')):

                curvetID = int(line[8:24])                        

                pointList.append((int(line[40:56])))

                pointList.append((int(line[56:72])))

                curveCardLargeFieldFlag = True

                bOnlyPointCoord = False

                b1DCurveFlag = True

                if line.startswith('Rect*') :

                    bRectFlag = True

                    bTriaFlag = False

                elif line.startswith('Tria*'):

                    bTriaFlag = True

                    bRectFlag = False                        
 
            elif line.startswith('*') and curveCardLargeFieldFlag:                    

                if (bTriaFlag or bRectFlag):

                    pointList.append((int(line[8:24])))

                    b1DCurveFlag = False                                

                    if bRectFlag:

                        pointList.append((int(line[24:40])))
 
                bTriaFlag = False

                bRectFlag = False
 
                curveCardLargeFieldFlag = False
 
            if ( curvetID and curveCardLargeFieldFlag == False):                    

                # Map ElementID and Node ID's of that element

                curveIDDict[curvetID]=pointList

                if b1DCurveFlag:

                    curveOneDimIDDict[curvetID]= pointList

                    b1DCurveFlag = False

                else:

                    curveTwoDimIDPointIDDict[curvetID]= pointList

                    b1DCurveFlag = False                    
 
                curveCardLargeFieldFlag = False

                bOnlyPointCoord = False

                curvetID = 0                    

                pointList = []          
 
    f.close()
 
    #Node

    #For all Nodes

    print pointIDDict
 
    print curveIDDict
 
    print  curveOneDimIDDict
 
    print curveTwoDimIDPointIDDict  
 
if __name__ == '__main__':

    read_file_data("C:\\ReadFile\\SampleData.txt")

Above is the sample text file ,and the sample code for the above file reading.I would like to avoid using the flags and so many variables to define.Is it possible to use regular expression and reduce the piece of code

Thanks
PSB

Dec 25 '07 #24

psbasha

440

256MB

Expand|Select|Wrap|Line Numbers

Sample.txt

$$$$$

START

COLOR RED

LINETYPE SOLID

END

$$$$$$$

PLine    1        6      1.5     9.375   .001    .001

$ Line Details

Line*    1               1                1              2

*        .002952         .992547         .121827

$

Rect     2        1       2       3       7       6

Rect     3        1       3       4       8       7

PRect*   4               11              15              16

*        10              11              0.3

Rect*    4               1               5               6

*        10              11              0.

Othr*    1               1               5               6

*        10              11              0.              0.

*        10              11              0.              1.0

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

Tria     5        1       7       2       11

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

Point    1               0.0     0.0     0.0

Point    2               1.0     0.0     0.0

Point    3               2.0     0.0     0.0

Point    4               3.0     0.0     0.0

Point    5               0.0     1.0     0.0

Point    6               1.0     1.0     0.0

Point    7               2.0     1.0     0.0

Point    8               4.0     1.0     0.0

Point*   9                              0.0             2.0

*          0.0

Point  *3280504         0               1.28286145E+03  1.28286145E+03

*       -2.01004501E+02

$

END

Expand|Select|Wrap|Line Numbers

Sample.py

def read_file_data(strFile):

    f = open(strFile,'r')

    pointID = 0

    curvetID = 0

    pointIDDict = {}

    pointList = []

    coordList = []

    attrdict=[]

    curveIDDict = {}

    curveOneDimIDDict = {}

    curveTwoDimIDPointIDDict = {}

    largeFieldFlag = False

    curveCardLargeFieldFlag = False

    bTriaFlag = False

    bRectFlag = False

    bOnlyPointCoord = True

    b1DCurveFlag = False

    propDict={}

    strTemp = f.readlines()

    for line in strTemp:

        if(line.startswith('Point') or line.startswith('Point*') or line.startswith('Point  *') or line.startswith('*') and bOnlyPointCoord):

            if (line.startswith('Point') and (line[:8].strip().isalpha())):

                pointID = int(line[8:16])

                coordList.append((float(line[24:32])))

                coordList.append((float(line[32:40])))

                coordList.append((float(line[40:48])))

                largeFieldFlag = False

            elif (line.startswith('Point*') or line.startswith('Point  *')):

                pointID = int(line[8:24])

                coordList.append((float(line[40:56])))

                coordList.append((float(line[56:72])))

                largeFieldFlag = True

                bOnlyPointCoord = True

            elif (line.startswith('*') and largeFieldFlag):

                coordList.append((float(line[8:24])))

                largeFieldFlag = False

            if ( pointID and largeFieldFlag == False):

                pointIDDict[pointID]=coordList

                pointID =0

                coordList = []

            bOnlyPointCoord = True

        elif (line.startswith('Rect') or line.startswith('Tria') or  \

              line.startswith('Line') and line[:8].strip().isalpha() or \

              line.startswith('Rect*') or line.startswith('Tria*') or\

              line.startswith('Line*')or line.startswith('*')):

            if (line.startswith('Rect  ') or \

                line.startswith('Line') and line[:8].strip().isalpha() ):

                curvetID = int(line[8:16])

                pointList.append((int(line[24:32])))

                pointList.append((int(line[32:40])))

                b1DCurveFlag = True

                if (line[:4]=='Tria'or line[:4]=='Rect'):

                    pointList.append((int(line[40:48])))

                    b1DCurveFlag = False

                    if (line[:4]=='Rect' ):

                        pointList.append((int(line[48:56])))

                curveCardLargeFieldFlag = False

            elif   (line.startswith('Rect*') or line.startswith('Tria*') or \

                    line.startswith('Line*')):

                curvetID = int(line[8:24])

                pointList.append((int(line[40:56])))

                pointList.append((int(line[56:72])))

                curveCardLargeFieldFlag = True

                bOnlyPointCoord = False

                b1DCurveFlag = True

                if line.startswith('Rect*') :

                    bRectFlag = True

                    bTriaFlag = False

                elif line.startswith('Tria*'):

                    bTriaFlag = True

                    bRectFlag = False

            elif line.startswith('*') and curveCardLargeFieldFlag:

                if (bTriaFlag or bRectFlag):

                    pointList.append((int(line[8:24])))

                    b1DCurveFlag = False

                    if bRectFlag:

                        pointList.append((int(line[24:40])))

                bTriaFlag = False

                bRectFlag = False

                curveCardLargeFieldFlag = False

            if ( curvetID and curveCardLargeFieldFlag == False):

                # Map ElementID and Node ID's of that element

                curveIDDict[curvetID]=pointList

                if b1DCurveFlag:

                    curveOneDimIDDict[curvetID]= pointList

                    b1DCurveFlag = False

                else:

                    curveTwoDimIDPointIDDict[curvetID]= pointList

                    b1DCurveFlag = False

                curveCardLargeFieldFlag = False

                bOnlyPointCoord = False

                curvetID = 0

                pointList = []

    f.close()

    #Node

    #For all Nodes

    print pointIDDict

    print curveIDDict

    print  curveOneDimIDDict

    print curveTwoDimIDPointIDDict

if __name__ == '__main__':

    read_file_data("C:\\Shakil\\ReadFile\\SampleData.txt")

Above is the sample text file ,and the sample code for the above file reading.I would like to avoid using the flags and so many variables to define.Is it possible to use regular expression and reduce the piece of code

Thanks
PSB

In some scenarios I have to read following data in the file

PLine 1 6 1.5 9.375 .001 .001
PRect* 4 11 15 16
* 10 11 0.3
Othr* 1 1 5 6
* 10 11 0. 0.
* 10 11 0. 1.0

In Some scenarios the Point data will be defined as below

Point *3280505 0 1.28286145+03 1.28286145-03
* -2.01004501+02

1.28286145+03 is same as 1.28286145E+03
1.28286145-03 is same as 1.28286145E-03

How to handle the above scenarios while reading the file

Thanks
PSB

Dec 25 '07 #25

psbasha

440

256MB

PLine 1 6 1.5 9.375 .001 .001
PRect* 4 11 15 16
* 10 11 0.3
Othr* 1 1 5 6
* 10 11 0. 0.
* 10 11 0. 1.0

I have not written a code for the above Card lines to store the properties of the curves.

In some cases the Point coordinates are represented as shown below

Point *3280505 0 1.28286145+03 1.28286145-03
* -2.01004501+02

1.28286145+03 is same as 1.28286145E+03
1.28286145-03 is same as 1.28286145E-03

Is anybody suggest me ,how to store and print the data?

Thanks
PSB

Dec 25 '07 #26

psbasha

440

256MB

PLine 1 6 1.5 9.375 .001 .001
PRect* 4 11 15 16
* 10 11 0.3
Othr* 1 1 5 6
* 10 11 0. 0.
* 10 11 0. 1.0

I have not written a code for the above Card lines to store the properties of the curves.

In some cases the Point coordinates are represented as shown below

Point *3280505 0 1.28286145+03 1.28286145-03
* -2.01004501+02

1.28286145+03 is same as 1.28286145E+03
1.28286145-03 is same as 1.28286145E-03

Is anybody suggest me ,how to store and print the data?

Thanks
PSB

Any suggestions to the above queries ?

Dec 25 '07 #27

psbasha

440

256MB

Hi BV,

Any suggestions on the above code.

Thanks
PSB

Dec 27 '07 #28

bvdet

2,851

Expert Mod 2GB

Try this:

Expand|Select|Wrap|Line Numbers

 import re
 
def convert_data(s):

    for func in (int, float):

        try:

            n = func(s)

            return n

        except:

            pass

    return s
 
pattnum = re.compile(r'''

                      \d+\.\d+E\+\d+|           # engineering notation ++

                      -\d+\.\d+E\+\d+|          # engineering notation -+

                      -\d+\.\d+E-\d+|           # engineering notation --

                      \d+\.\d+E-\d+|            # engineering notation +-

                      \d+\.\d+|                 # positive float format

                      -\d+\.\d+|                # negative float format

                      \d+\.|                    # positive float format

                      -\d+\.|                   # negative float format

                      \.\d+|                    # positive float format

                      -\.\d+|                   # negative float format

                      \d+                       # positive integer

                      ''', re.X

                  )
 
def parseData(fn, *kargs):

    fileList = [item.strip() for item in open(fn).readlines()\

                if not item.startswith('$')]

    pattkey = re.compile('|'.join([r'\b(%s)' % item for item in kargs]))

    '''

    print pattkey

    print pattkey.pattern

    '''

    # create dictionary with keys from kargs

    masterDict = dict(zip(kargs, [[] for _ in kargs]))

    inData = False

    for line in fileList:

        if inData and line.startswith('*'):

            data.extend(re.findall(pattnum, line))

        elif inData and not line.startswith('*'):

            masterDict[m.group(0)].append([convert_data(item)\

                                           for item in data])

            inData = False

            m = pattkey.match(line)

            if m:

                # m.group(0) is the current keyword

                if '*' in line.split()[0]:

                    inData = True

                    data = re.findall(pattnum, line)

                else:

                    data = re.findall(pattnum, line)

                    masterDict[m.group(0)].append([convert_data(item)\

                                                   for item in data])

        else:

            m = pattkey.match(line)

            if m:

                # m.group(0) is the current keyword

                if '*' in line.split()[0]:

                    inData = True

                    data = re.findall(pattnum, line)

                else:

                    data = re.findall(pattnum, line)

                    masterDict[m.group(0)].append([convert_data(item)\

                                                   for item in data])

    return masterDict
 
fn = 'H:\\TEMP\\temsys\\sample_points8.txt'

keywords =  ['Point', 'Othr', 'Rect', 'PRect', 'PLine', 'Line', 'Tria']   

dd = parseData(fn, *keywords)

for key in dd:

    print key

    for item in dd[key]:

        print '    %s' % item

Output:

Expand|Select|Wrap|Line Numbers

 >>> Point

    [1, 0.0, 0.0, 0.0]

    [2, 1.0, 0.0, 0.0]

    [3, 2.0, 0.0, 0.0]

    [4, 3.0, 0.0, 0.0]

    [5, 0.0, 1.0, 0.0]

    [6, 1.0, 1.0, 0.0]

    [7, 2.0, 1.0, 0.0]

    [8, 4.0, 1.0, 0.0]

    [9, 0.0, 2.0, 0.0]

    [3280504, 0, 1282.8614500000001, 1282.8614500000001]

PLine

    [1, 6, 1.5, 9.375, 0.001, 0.001]

Tria

    [5, 1, 7, 2, 11]

PRect

    [4, 11, 15, 16, 10, 11, 0.29999999999999999]

Line

    [1, 1, 1, 2, 0.0029520000000000002, 0.99254699999999996, 0.121827]

Rect

    [2, 1, 2, 3, 7, 6]

    [3, 1, 3, 4, 8, 7]

    [4, 1, 5, 6, 10, 11, 0.0]

Othr

    [1, 1, 5, 6, 10, 11, 0.0, 0.0, 10, 11, 0.0, 1.0]

Dec 27 '07 #29

bvdet

2,851

Expert Mod 2GB

I made a few modifications so it would work properly. It probably needs some more work, but I will leave it up to you. Let us know how it turns out.

Expand|Select|Wrap|Line Numbers

 import re
 
def convert_data(s):

    for func in (int, float):

        try:

            n = func(s)

            return n

        except:

            pass

    return s
 
pattnum = re.compile(r'''

                      -\d+\.\d+E\+\d+|          # engineering notation -+

                      \d+\.\d+E\+\d+|           # engineering notation ++

                      -\d+\.\d+E-\d+|           # engineering notation --

                      \d+\.\d+E-\d+|            # engineering notation +-

                      -\d+\.\d+|                # negative float format

                      \d+\.\d+|                 # positive float format

                      -\d+\.|                   # negative float format

                      \d+\.|                    # positive float format

                      -\.\d+|                   # negative float format

                      \.\d+|                    # positive float format

                      \d+                       # positive integer

                      ''', re.X

                  )
 
def parseData(fn, *kargs):

    fileList = [item.strip() for item in open(fn).readlines()\

                if not item.startswith('$')]

    pattkey = re.compile('|'.join([r'\b(%s)' % item for item in kargs]))

    '''

    print pattkey

    print pattkey.pattern

    '''

    # create dictionary with keys from kargs

    masterDict = dict(zip(kargs, [[] for _ in kargs]))

    inData = False

    for line in fileList:

        if inData and line.startswith('*'):

            data.extend(re.findall(pattnum, line))

        elif inData and not line.startswith('*'):

            masterDict[m.group(0)].append([convert_data(item)\

                                           for item in data])

            inData = False

            m = pattkey.match(line)

            if m:

                # m.group(0) is the current keyword

                if '*' in line:

                    inData = True

                    data = re.findall(pattnum, line)

                else:

                    data = re.findall(pattnum, line)

                    masterDict[m.group(0)].append([convert_data(item)\

                                                   for item in data])

        else:

            m = pattkey.match(line)

            if m:

                # m.group(0) is the current keyword

                if '*' in line:

                    inData = True

                    data = re.findall(pattnum, line)

                else:

                    data = re.findall(pattnum, line)

                    masterDict[m.group(0)].append([convert_data(item)\

                                                   for item in data])

    return masterDict
 
fn = 'sample.txt'

keywords =  ['Point', 'Othr', 'Rect', 'PRect', 'PLine', 'Line', 'Tria']   

dd = parseData(fn, *keywords)

for key in dd:

    print key

    for item in dd[key]:

        print '    %s' % item

Dec 28 '07 #30

psbasha

440

256MB

[Thanks BV..You are really great..you are too good in regular expressions and file parsing

If the file contains the Point data as shown below

Expand|Select|Wrap|Line Numbers

 Sample

Point  *3280505         0               1.28286145-03  1.28286145E+03

*       -2.01004501+02

The output should be
[3280505, 0, 0.00128286145, 1282.8614500000001, -201.00450099999998]

But we are getting the output as
[3280505, 0, 1.28286145, 3, 1282.8614500000001, -2.0100450099999998, 2]

How to fix the above exponent data?.

-PSB

Dec 28 '07 #31

bvdet

2,851

Expert Mod 2GB

[Thanks BV..You are really great..you are too good in regular expressions and file parsing

If the file contains the Point data as shown below

Expand|Select|Wrap|Line Numbers

Sample

Point *3280505 0 1.28286145-03 1.28286145E+03

* -2.01004501+02

The output should be
[3280505, 0, 0.00128286145, 1282.8614500000001, -201.00450099999998]

But we are getting the output as
[3280505, 0, 1.28286145, 3, 1282.8614500000001, -2.0100450099999998, 2]

How to fix the above exponent data?.

-PSB

You are welcome. :)

Your data is invalid, because there is no 'E' indicating exponential notation. You will need to correct the data before processing it so it can be converted to a floating point number. This pattern matches the invalid data:

Expand|Select|Wrap|Line Numbers

 pattinvalid = re.compile(r'''

                          \d+\.\d+\+\d+|           # invalid eng notation +

                          \d+\.\d+-\d+             # invalid eng notation -

                          ''', re.X

                         )

This code corrects the data:

Expand|Select|Wrap|Line Numbers

 ........if pattinvalid.search(line):

            for item in pattinvalid.findall(line):

                line = line.replace(item, item.replace('-', 'E-').replace('+', 'E+'))

Dec 28 '07 #32

psbasha

440

256MB

Hi BV,

Could you please help me in understanding the below piece of code in simpler way.

fileList = [item.strip() for item in open(fn).readlines()\
if not item.startswith('$')]

I mean after 'for' loop and 'if' condition we are not using the ':' for the block begin.
How is it different from ordinary 'for' and 'if' with ':' ussage?.Whether both are same or to reduce the lines of code and better readability of code we will follow the above approach.What is the above approach of writing is called in Phython?

Can you provide the links for learning the above concepts.

I was trying to implement the invalid data format code in the main code provided by you,but I am not able to succeeded in it.If I understand the above concept I hope I can implement the invalid logic very easily
Thanks
PSB

Dec 29 '07 #33

alijannaty52

The best way i could find out for you.You go the thru the link .Hope this will be helpful .

BestFileReadingMethod

Dec 29 '07 #34

bvdet

2,851

Expert Mod 2GB

Hi BV,

Could you please help me in understanding the below piece of code in simpler way.

fileList = [item.strip() for item in open(fn).readlines()\
if not item.startswith('$')]

I mean after 'for' loop and 'if' condition we are not using the ':' for the block begin.
How is it different from ordinary 'for' and 'if' with ':' ussage?.Whether both are same or to reduce the lines of code and better readability of code we will follow the above approach.What is the above approach of writing is called in Python?

Can you provide the links for learning the above concepts.

I was trying to implement the invalid data format code in the main code provided by you,but I am not able to succeeded in it.If I understand the above concept I hope I can implement the invalid logic very easily
Thanks
PSB

The code assigned to fileList creates a list as the variable name implies and is called a list comprehension. This list comprehension is equivalent to:

Expand|Select|Wrap|Line Numbers

 f = open(fn)

    fileList = []

    for line in f:

        if not line.startswith('$'):

            fileList.append(line.strip())

    f.close()

To read more about list comprehensions - LINK
For more links, do a web search on 'list comprehension python'.

The full source code for parsing your sample data file:

Expand|Select|Wrap|Line Numbers

 import re
 
def convert_data(s):

    for func in (int, float):

        try:

            n = func(s)

            return n

        except:

            pass

    return s
 
pattnum = re.compile(r'''

                      -\d+\.\d+E\+\d+|          # engineering notation -+

                      \d+\.\d+E\+\d+|           # engineering notation ++

                      -\d+\.\d+E-\d+|           # engineering notation --

                      \d+\.\d+E-\d+|            # engineering notation +-

                      -\d+\.\d+|                # negative float format

                      \d+\.\d+|                 # positive float format

                      -\d+\.|                   # negative float format

                      \d+\.|                    # positive float format

                      -\.\d+|                   # negative float format

                      \.\d+|                    # positive float format

                      \d+                       # positive integer

                      ''', re.X

                     )
 
pattinvalid = re.compile(r'''

                          \d+\.\d+\+\d+|           # invalid eng notation +

                          \d+\.\d+-\d+             # invalid eng notation -

                          ''', re.X

                         )                          
 
def parseData(fn, *kargs):

    fileList = [item.strip() for item in open(fn).readlines()\

                if not item.startswith('$')]
 
    pattkey = re.compile('|'.join([r'\b(%s)' % item for item in kargs]))
 
    # create dictionary with keys from kargs

    masterDict = dict(zip(kargs, [[] for _ in kargs]))

    inData = False

    for line in fileList:
 
        # check for invalid data

        if pattinvalid.search(line):

            for item in pattinvalid.findall(line):

                line = line.replace(item, item.replace('-', 'E-').replace('+', 'E+'))
 
        if inData and line.startswith('*'):

            data.extend(re.findall(pattnum, line))

        elif inData and not line.startswith('*'):

            masterDict[m.group(0)].append([convert_data(item)\

                                           for item in data])

            inData = False

            m = pattkey.match(line)

            if m:

                # m.group(0) is the current keyword

                if '*' in line:

                    inData = True

                    data = re.findall(pattnum, line)

                else:

                    data = re.findall(pattnum, line)

                    masterDict[m.group(0)].append([convert_data(item)\

                                                   for item in data])

        else:

            m = pattkey.match(line)

            if m:

                # m.group(0) is the current keyword

                if '*' in line:

                    inData = True

                    data = re.findall(pattnum, line)

                else:

                    data = re.findall(pattnum, line)

                    masterDict[m.group(0)].append([convert_data(item)\

                                                   for item in data])

    return masterDict
 
if __name__ == '__main__':

    fn = 'sample_points.txt'

    keywords =  ['Point', 'Othr', 'Rect', 'PRect', 'PLine', 'Line', 'Tria']   

    dd = parseData(fn, *keywords)

    for key in dd:

        print key

        for item in dd[key]:

            print '    %s' % item
 
''' Output

>>> Point

    [1, 0.0, 0.0, 0.0]

    [2, 1.0, 0.0, 0.0]

    [3, 2.0, 0.0, 0.0]

    [4, -3.0, 0.0, 0.0]

    [5, 0.0, 1.0, 0.0]

    [6, 1.0, 1.0, 0.0]

    [7, 2.0, 1.0, 0.0]

    [8, 4.0, 1.0, 0.0]

    [9, 0.0, -2.0, 0.0]

    [3280504, 0, 1282.8614500000001, 1282.8614500000001, -201.004501]

    [3280606, 0, 0.0069264000650000003, -1282.8614500000001, -10100.4501, -0.014385767359999999]

PLine

    [1, 6, 1.5, 9.375, 0.001, -0.001]

Tria

    [5, 1, 7, 2, 11]

PRect

    [4, 11, 15, 16, 10, 11, 0.29999999999999999]

Line

    [1, 1, 1, 2, 0.0029520000000000002, 0.99254699999999996, 0.121827]

Rect

    [2, 1, 2, 3, 7, 6]

    [3, 1, 3, 4, 8, 7]

    [4, 1, 5, 6, 10, 11, 0.0]

Othr

    [1, 1, 5, 6, 10, 11, 0.0, 0.0, 10, 11, 0.0, 1.0]

>>> 

'''
 
''' Data File Contents

$$$$$

START

COLOR RED

LINETYPE SOLID

END

$$$$$$$

PLine    1        6      1.5     9.375   .001   -.001

$ Line Details

Line*    1               1                1              2

*        .002952         .992547         .121827

$

Rect     2        1       2       3       7       6

Rect     3        1       3       4       8       7

PRect*   4               11              15              16

*        10              11              0.3

Rect*    4               1               5               6

*        10              11              0.

Othr*    1               1               5               6

*        10              11              0.              0.

*        10              11              0.              1.0

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

Tria     5        1       7       2       11

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

Point    1               0.0     0.0     0.0

Point    2               1.0     0.0     0.0

Point    3               2.0     0.0     0.0

Point    4              -3.0     0.0     0.0

Point    5               0.0     1.0     0.0

Point    6               1.0     1.0     0.0

Point    7               2.0     1.0     0.0

Point    8               4.0     1.0     0.0

Point*   9                               0.0            -2.0

*          0.0

Point  *3280504         0               1.28286145E+03  1.28286145+03

*       -2.01004501E+02

#

Point  *3280606         0               6.926400065-03  -1.28286145+03

*       -1.01004501+04  -1.438576736-02

$

END

'''

You should test this on real data for valid results. I cannot guarantee that this is a final solution for you.

Dec 29 '07 #35

psbasha

440

256MB

Expand|Select|Wrap|Line Numbers

 SampleFile

$$$$$

START

COLOR RED

LINETYPE SOLID

END

$$$$$$$

PLine   1        6      1.5     9.375   .001    .001

$ Line Details

Line*   1               1                1              2

*       .002952         .992547         .121827

$
 
Rect    2        1       2       3       7       6

Rect    3        1       3       4       8       7

PRect*  4               11              15              16

*       10              11              0.3

Rect*   4               1               5               6

*       10              11              0.

Othr*   1               1               5               6

*       10              11              0.              0.

*       10              11              0.              1.0

Oth1*   1               1               5               6

*       10              11              0.              0.

*       10              11              0.              1.0

*       10              11              0.              1.0

*       10              11              0.              1.0

Rect*   5               1               5               6

*       10              11              0.

Rect    1000000010000000200000007000000060000000

Rect    2000000020000000300000008000000070000000

Rect    3000000030000000400000009000000080000000

Tria    40000000400000005000000090000000

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$  $

Tria     5        1       7       2       11

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$  

Point   1               0.0     0.0     0.0

Point   2               1.0     0.0     0.0

Point   3               2.0     0.0     0.0

Point   4               3.0     0.0     0.0

Point   5               0.0     1.0     0.0

Point   6               1.0     1.0     0.0

Point   7               2.0     1.0     0.0

Point   8               4.0     1.0     0.0

Point*  9                              0.0             2.0

*       0.0

Point  *3280504         0               1.28286145E+03  1.28286145E+03

*       -2.01004501E+02

Point  *3280505         0               1.28286145-03  1.28286145+03

*       -2.01004501+02

Point   100000000.      0.      0.

Point   200000005.      0.      0.

Point   3000000010.     0.      0.

Point   4000000015.     0.      0.

Point   5000000020.     0.      0.

Point   600000000.      5.      0.

Point   700000005.      5.      0.

Point   8000000010.     5.      0.

Point   9000000015.     5.      0.

$

END

Dec 29 '07 #36

psbasha

440

256MB

In the above format ( i.e 8 Digit and 16 Digit) ,if we have complete '8' digits format in the column ,the output is shown incorrect.

Find the output below:

Expand|Select|Wrap|Line Numbers

 Output

>>> Point

    [1, 0.0, 0.0, 0.0]

    [2, 1.0, 0.0, 0.0]

    [3, 2.0, 0.0, 0.0]

    [4, 3.0, 0.0, 0.0]

    [5, 0.0, 1.0, 0.0]

    [6, 1.0, 1.0, 0.0]

    [7, 2.0, 1.0, 0.0]

    [8, 4.0, 1.0, 0.0]

    [9, 0.0, 2.0, 0.0]

    [3280504, 0, 1282.8614500000001, 1282.8614500000001, -201.004501]

    [3280505, 0, 0.0012828614500000001, 1282.8614500000001, -201.004501]

    [100000000.0, 0.0, 0.0]

    [200000005.0, 0.0, 0.0]

    [3000000010.0, 0.0, 0.0]

    [4000000015.0, 0.0, 0.0]

    [5000000020.0, 0.0, 0.0]

    [600000000.0, 5.0, 0.0]

    [700000005.0, 5.0, 0.0]

    [8000000010.0, 5.0, 0.0]

    [9000000015.0, 5.0, 0.0]

PLine

    [1, 6, 1.5, 9.375, 0.001, 0.001]

Tria

    [40000000400000005000000090000000L]

    [5, 1, 7, 2, 11]

PRect

    [4, 11, 15, 16, 10, 11, 0.29999999999999999]

Line

    [1, 1, 1, 2, 0.0029520000000000002, 0.99254699999999996, 0.121827]

Rect

    [2, 1, 2, 3, 7, 6]

    [3, 1, 3, 4, 8, 7]

    [4, 1, 5, 6, 10, 11, 0.0]

    [5, 1, 5, 6, 10, 11, 0.0]

    [1000000010000000200000007000000060000000L]

    [2000000020000000300000008000000070000000L]

    [3000000030000000400000009000000080000000L]

Othr

    [1, 1, 5, 6, 10, 11, 0.0, 0.0, 10, 11, 0.0, 1.0]

Incorrect output data are

Expand|Select|Wrap|Line Numbers

 Incorrect
 
    [100000000.0, 0.0, 0.0]

    [200000005.0, 0.0, 0.0]

    [3000000010.0, 0.0, 0.0]

    [4000000015.0, 0.0, 0.0]

    [5000000020.0, 0.0, 0.0]

    [600000000.0, 5.0, 0.0]

    [700000005.0, 5.0, 0.0]

    [8000000010.0, 5.0, 0.0]

    [9000000015.0, 5.0, 0.0]

Tria

    [40000000400000005000000090000000L]
 
Rect

    [1000000010000000200000007000000060000000L]

    [2000000020000000300000008000000070000000L]

    [3000000030000000400000009000000080000000L]

The above incorrect output data has to be seperated by commas.How to fix the above scenario when we have complete 8 or 16 digit format field?

Thanks
PSB

Dec 29 '07 #37

psbasha

440

256MB

Shown below is the Corrected the Inputdata file into correct format

Thanks
PSB

Dec 30 '07 #38

psbasha

440

256MB

Expand|Select|Wrap|Line Numbers

 Correct Formated Input file

$$$$$

START

COLOR RED

LINETYPE SOLID

END

$$$$$$$

PLine   1        6      1.5     9.375   .001    .001

$ Line Details

Line*   1               1                1              2

*       .002952         .992547         .121827

$
 
Rect    2        1      2       3       7       6

Rect    3        1      3       4       8       7

PRect*  4               11              15              16

*       10              11              0.3

Rect*   4               1               5               6

*       10              11              0.

Othr*   1               1               5               6

*       10              11              0.              0.

*       10              11              0.              1.0

Oth1*   1               1               5               6

*       10              11              0.              0.

*       10              11              0.              1.0

*       10              11              0.              1.0

*       10              11              0.              1.0

Rect*   5               1               5               6

*       10              11              0.

Rect    10000000    10000000200000007000000060000000

Rect    20000000    20000000300000008000000070000000

Rect    30000000    30000000400000009000000080000000

Tria    40000000    400000005000000090000000

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$  $

Tria    5        1      7       2       11

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$  

Point   1               0.0     0.0     0.0

Point   2               1.0     0.0     0.0

Point   3               2.0     0.0     0.0

Point   4               3.0     0.0     0.0

Point   5               0.0     1.0     0.0

Point   6               1.0     1.0     0.0

Point   7               2.0     1.0     0.0

Point   8               4.0     1.0     0.0

Point*  9                               0.0             2.0

*       0.0

Point  *3280504         0               1.28286145E+03  1.28286145E+03

*       -2.01004501E+02

Point  *3280505         0               1.28286145-03  1.28286145+03

*       -2.01004501+02

Point   10000000    0.      0.      0.

Point   20000000    5.      0.      0.

Point   30000000    10.     0.      0.

Point   40000000    15.     0.      0.

Point   50000000    20.     0.      0.

Point   60000000    0.      5.      0.

Point   70000000    5.      5.      0.

Point   80000000    10.     5.      0.

Point   90000000    15.     5.      0.

$

END

We can use this above input data for testing
Thanks
PSB

Dec 30 '07 #39

bvdet

2,851

Expert Mod 2GB

You will need to make a small change to regex pattern pattnum:

Expand|Select|Wrap|Line Numbers

 
pattnum = re.compile(r'''

                      -\d+\.\d+E\+\d+|          # engineering notation -+

                      \d+\.\d+E\+\d+|           # engineering notation ++

                      -\d+\.\d+E-\d+|           # engineering notation --

                      \d+\.\d+E-\d+|            # engineering notation +-

                      -\d+\.\d+|                # negative float format

                      \d+\.\d+|                 # positive float format

                      -\d+\.|                   # negative float format

                      \d+\.|                    # positive float format

                      -\.\d+|                   # negative float format

                      \.\d+|                    # positive float format

                      \d{1,8}                   # positive integer

                      ''', re.X

                     )

This will prevent the matching of more than 8 digits at a time. Further adjustments may be required.

Dec 30 '07 #40

psbasha

440

256MB

Thanks BV for your suggestion.

I tried to play around the Pattern you have suggested .Still I am getting the Incorrect data.

Tria
[40000000, 400000005000000090000000L]

Rect
[10000000, 10000000200000007000000060000000L]
[20000000, 20000000300000008000000070000000L]
[30000000, 30000000400000009000000080000000L]

-PSB

Dec 30 '07 #41

bvdet

2,851

Expert Mod 2GB

Look carefully at the suggested pattern. That pattern produces the following output from your corrected sample data:

Expand|Select|Wrap|Line Numbers

 >>> Point

    [1, 0.0, 0.0, 0.0]

    [2, 1.0, 0.0, 0.0]

    [3, 2.0, 0.0, 0.0]

    [4, 3.0, 0.0, 0.0]

    [5, 0.0, 1.0, 0.0]

    [6, 1.0, 1.0, 0.0]

    [7, 2.0, 1.0, 0.0]

    [8, 4.0, 1.0, 0.0]

    [9, 0.0, 2.0, 0.0]

    [3280504, 0, 1282.8614500000001, 1282.8614500000001, -201.004501]

    [3280505, 0, 0.0012828614500000001, 1282.8614500000001, -201.004501]

    [10000000, 0.0, 0.0, 0.0]

    [20000000, 5.0, 0.0, 0.0]

    [30000000, 10.0, 0.0, 0.0]

    [40000000, 15.0, 0.0, 0.0]

    [50000000, 20.0, 0.0, 0.0]

    [60000000, 0.0, 5.0, 0.0]

    [70000000, 5.0, 5.0, 0.0]

    [80000000, 10.0, 5.0, 0.0]

    [90000000, 15.0, 5.0, 0.0]

PLine

    [1, 6, 1.5, 9.375, 0.001, 0.001]

Tria

    [40000000, 40000000, 50000000, 90000000]

    [5, 1, 7, 2, 11]

PRect

    [4, 11, 15, 16, 10, 11, 0.29999999999999999]

Line

    [1, 1, 1, 2, 0.0029520000000000002, 0.99254699999999996, 0.121827]

Oth1

    [1, 1, 1, 5, 6, 10, 11, 0.0, 0.0, 10, 11, 0.0, 1.0, 10, 11, 0.0, 1.0, 10, 11, 0.0, 1.0]

Rect

    [2, 1, 2, 3, 7, 6]

    [3, 1, 3, 4, 8, 7]

    [4, 1, 5, 6, 10, 11, 0.0]

    [5, 1, 5, 6, 10, 11, 0.0]

    [10000000, 10000000, 20000000, 70000000, 60000000]

    [20000000, 20000000, 30000000, 80000000, 70000000]

    [30000000, 30000000, 40000000, 90000000, 80000000]

Othr

    [1, 1, 5, 6, 10, 11, 0.0, 0.0, 10, 11, 0.0, 1.0]

>>>

Dec 30 '07 #42

psbasha

440

256MB

You are right BV.Sorry, I have not copied the entire pattern you have suggested.I have copied the last statement of the pattern in my code.So I have missed one statement of the pattern.

Thanks for your suggestion and help BV.

-PSB

Dec 30 '07 #43

psbasha

440

256MB

BV,

suggest me books and links for the regular expression to start with Basics and later for advance concepts

Thanks
PSB

Dec 30 '07 #44

bvdet

2,851

Expert Mod 2GB

BV,

suggest me books and links for the regular expression to start with Basics and later for advance concepts

Thanks
PSB

This link has some good introductory and intermediate information on regular expressions - LINK

I have been using Kodos for experimenting and testing regular expressions and mostly learned by practicing with and incorporating into my scripts when needed. I do not consider myself an expert on re. Trial and error may be the hard way, but that's the way I learned what I know about Python.

Dec 30 '07 #45

psbasha

440

256MB

This link has some good introductory and intermediate information on regular expressions - LINK

I have been using Kodos for experimenting and testing regular expressions and mostly learned by practicing with and incorporating into my scripts when needed. I do not consider myself an expert on re. Trial and error may be the hard way, but that's the way I learned what I know about Python.

Expand|Select|Wrap|Line Numbers

 Re

pattnum = re.compile(r'''

                      -\d+\.\d+E\+\d+|          # engineering notation -+

                      \d+\.\d+E\+\d+|           # engineering notation ++

                      -\d+\.\d+E-\d+|           # engineering notation --

                      \d+\.\d+E-\d+|            # engineering notation +-

                      -\d+\.\d+|                # negative float format

                      \d+\.\d+|                 # positive float format

                      -\d+\.|                   # negative float format

                      \d+\.|                    # positive float format

                      -\.\d+|                   # negative float format

                      \.\d+|                    # positive float format

                      \d{1,8}                   # positive integer

                      ''', re.X
 
key_patt = re.compile(r'/([A-Za-z_-]+)/')

data_patt = re.compile(r'\d+\.\d+|\d+|\w+')

Hi BV,

Can you please elaborate the explanation for the above pattern ,with simple examples.What each pattern line stands for?. How are we deciding to go for this types of pattern.

Thanks
PSB

Jan 5 '08 #46

bvdet

2,851

Expert Mod 2GB

Expand|Select|Wrap|Line Numbers

Re

pattnum = re.compile(r'''

                      -\d+\.\d+E\+\d+|          # engineering notation -+

                      \d+\.\d+E\+\d+|           # engineering notation ++

                      -\d+\.\d+E-\d+|           # engineering notation --

                      \d+\.\d+E-\d+|            # engineering notation +-

                      -\d+\.\d+|                # negative float format

                      \d+\.\d+|                 # positive float format

                      -\d+\.|                   # negative float format

                      \d+\.|                    # positive float format

                      -\.\d+|                   # negative float format

                      \.\d+|                    # positive float format

                      \d{1,8}                   # positive integer

                      ''', re.X

key_patt = re.compile(r'/([A-Za-z_-]+)/')

data_patt = re.compile(r'\d+\.\d+|\d+|\w+')

Hi BV,

Can you please elaborate the explanation for the above pattern ,with simple examples.What each pattern line stands for?. How are we deciding to go for this types of pattern.

Thanks
PSB

Each line in pattnum matches a slightly different format of number as noted in the comments. The last line (''', re.X) contins the VERBOSE flag, which tells the compiler to ignore unecsaped whitespace and comments. The next to last line (\d{1,8}) greedily matches between 1 and eight digits at a time. That is what we fixed earlier to work with your formatted data.

key_patt matches words like this:
/ABC_abc-def/
The brackets '[......]' tell the compiler to match the set of characters enclosed. Since the slash characters are outside the brackets, they must enclose the word in a given string to match. That's how we matched your keywords.

data_patt matches a floating point number, integer or alphanumeric character. The character '|' tells the compiler to match the patttern to the left OR the pattern to the right in a given string.

Jan 5 '08 #47

psbasha

440

256MB

You will need to make a small change to regex pattern pattnum:

Expand|Select|Wrap|Line Numbers

pattnum = re.compile(r'''

                      -\d+\.\d+E\+\d+|          # engineering notation -+

                      \d+\.\d+E\+\d+|           # engineering notation ++

                      -\d+\.\d+E-\d+|           # engineering notation --

                      \d+\.\d+E-\d+|            # engineering notation +-

                      -\d+\.\d+|                # negative float format

                      \d+\.\d+|                 # positive float format

                      -\d+\.|                   # negative float format

                      \d+\.|                    # positive float format

                      -\.\d+|                   # negative float format

                      \.\d+|                    # positive float format

                      \d{1,8}                   # positive integer

                      ''', re.X

                     )

This will prevent the matching of more than 8 digits at a time. Further adjustments may be required.

Expand|Select|Wrap|Line Numbers

 SampleData

Line1*  1               1                1              2

*       .002952         .992547         .121827

$
 
Rect2   2        1      2       3       7       6

Rect    3        1      3       4       8       7

PRect2* 4               11              15              16

*       10              11              0.3

Rect2*   4               1               5               6

*       10              11              0.

Othr*   1               1               5               6

*       10              11              0.              0.

*       10              11              0.              1.0

Oth1*   1               1               5               6

*       10              11              0.              0.

*       10              11              0.              1.0

*       10              11              0.              1.0

*       10              11              0.              1.0

Rect*   5               1               5               6

*       10              11              0.

Rect    10000000    10000000200000007000000060000000

Rect    20000000    20000000300000008000000070000000

Rect    30000000    30000000400000009000000080000000

Tria3   40000000    400000005000000090000000

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$  $

Tria    6        1      7       2       11

$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$  

Point   1               0.0     0.0     0.0

Point   2               1.0     0.0     0.0

Point   3               2.0     0.0     0.0

Point   4               3.0     0.0     0.0

Point   5               0.0     1.0     0.0

Point   6               1.0     1.0     0.0

Point   7               2.0     1.0     0.0

Point   8               4.0     1.0     0.0

Point*  9                               0.0             2.0

*       0.0

Point  *3280504         0               1.28286145E+03  1.28286145E+03

*       -2.01004501E+02

Point  *3280505         0               1.28286145-03  1.28286145+03

*       -2.01004501+02

Point   10000000    0.      0.      0.

Point   20000000    5.      0.      0.

Point   30000000    10.     0.      0.

Point   40000000    15.     0.      0.

Point   50000000    20.     0.      0.

Point   60000000    0.      5.      0.

Point   70000000    5.      5.      0.

Point   80000000    10.     5.      0.

Point   90000000    15.     5.      0.

$

END

if the keywords are defined as below
keywords = ['Point', 'Othr', 'Rect2', 'Rect','PRect', 'PLine', 'Line1', 'Tria'3,'Oth1']
The output we are getting is Incorrect.

Expand|Select|Wrap|Line Numbers

 Output

$Incorrect output

Line1

    [1, 1, 1, 1, 2, 0.0029520000000000002, 0.99254699999999996, 0.121827]

Tria3

    [3,40000000, 40000000, 50000000, 90000000]
 
Oth1

    [1, 1, 1, 5, 6, 10, 11, 0.0, 0.0, 10, 11, 0.0, 1.0, 10, 11, 0.0, 1.0, 10, 11, 0.0, 1.0]
 
Rect2

    [2, 2, 1, 2, 3, 7, 6]

    [2, 4, 1, 5, 6, 10, 11, 0.0]

$Correct output is 

Line1

    [1, 1, 1, 2, 0.0029520000000000002, 0.99254699999999996, 0.121827]

Tria3

    [40000000, 40000000, 50000000, 90000000]
 
Oth1

    [ 1, 1, 5, 6, 10, 11, 0.0, 0.0, 10, 11, 0.0, 1.0, 10, 11, 0.0, 1.0, 10, 11, 0.0, 1.0]
 
Rect2

    [ 2, 1, 2, 3, 7, 6]

    [ 4, 1, 5, 6, 10, 11, 0.0]

The source code is taking the 'Rect2' keyword number '2' also.sinmilarly for Line1,Tria3

Jan 6 '08 #48

bvdet

2,851

Expert Mod 2GB

Try making adjustments to pattnum and pattkey:

Expand|Select|Wrap|Line Numbers

 
# last line in pattnum

# matches integers of length between 1 and 8 digits,

# if not preceded by an alpha character

# matching as many repetitions possible

................(?<![a-zA-Z])\d{1,8}  # positive integer
 
# matches keywords listed in kargs

# may or may not have a trailing asterisk

# there must be a word boundary both ends

....pattkey = re.compile('|'.join([r'\b(%s)\*?\b' % item for item in kargs]))

Jan 6 '08 #49

psbasha

440

256MB

Try making adjustments to pattnum and pattkey:

Expand|Select|Wrap|Line Numbers

# last line in pattnum

# matches integers of length between 1 and 8 digits,

# if not preceded by an alpha character

# matching as many repetitions possible

................(?<![a-zA-Z])\d{1,8} # positive integer

# matches keywords listed in kargs

# may or may not have a trailing asterisk

# there must be a word boundary both ends

....pattkey = re.compile('|'.join([r'\b(%s)\*?\b' % item for item in kargs]))

I try to add the pattern as suggested at the last

Expand|Select|Wrap|Line Numbers

 Pat

pattnum = re.compile(r'''

                      -\d+\.\d+E\+\d+|          # engineering notation -+

                      \d+\.\d+E\+\d+|           # engineering notation ++

                      -\d+\.\d+E-\d+|           # engineering notation --

                      \d+\.\d+E-\d+|            # engineering notation +-

                      -\d+\.\d+|                # negative float format

                      \d+\.\d+|                 # positive float format

                      -\d+\.|                   # negative float format

                      \d+\.|                    # positive float format

                      -\.\d+|                   # negative float format

                      \.\d+|                    # positive float format

                      \d{1,8}|

                      \(?<![a-zA-Z])\d{1,8}  # positive integer

                      ''', re.X

                     )

and the line

pattkey = re.compile('|'.join([r'\b(%s)\*?\b' % item for item in kargs]))

I hope there is a syntac error in the pattern

\(?<![a-zA-Z])\d{1,8} # positive integer.

I am getting the following error

Expand|Select|Wrap|Line Numbers

 Error

  File "C:\\Sample.py", line 12, in ?

    pattnum = re.compile(r'''

  File "C:\Python24\lib\sre.py", line 180, in compile

    return _compile(pattern, flags)

  File "C:\Python24\lib\sre.py", line 227, in _compile

    raise error, v # invalid expression

error: unbalanced parenthesis

Jan 6 '08 #50

any other best way of reading the file

Similar topics