Connecting Tech Pros Worldwide Forums | Help | Site Map

Read pdf file

Newbie
 
Join Date: Aug 2009
Posts: 11
#1: Sep 24 '09
Hi All

How to read existing pdf file content using asp.net

Thanks
Anitha

ssnaik84's Avatar
Member
 
Join Date: Aug 2009
Location: Bengaluru, India
Posts: 124
#2: Sep 24 '09

re: Read pdf file


what you want to do exactly?

take a look at iTextSharp
Frinavale's Avatar
Site Moderator
 
Join Date: Oct 2006
Location: The Great White North
Posts: 5,156
#3: Sep 24 '09

re: Read pdf file


Read? What do you mean when you say "read"? Do you mean how do you read the file into your code? Do you mean how does the user get to Read the pdf file?
Newbie
 
Join Date: Oct 2008
Location: Islamabad, Pakistan
Posts: 9
#4: Sep 25 '09

re: Read pdf file


use "iTextSharp" and use the following code to read contents of pdf in your code


Expand|Select|Wrap|Line Numbers
  1. using iTextSharp.text.pdf;
  2. using iTextSharp.text;
  3.  
  4. private void openPDF()
  5. {           
  6.             string str = "";
  7.             string newFile = "c:\\New Document.pdf";
  8.             Document doc = new Document();
  9.  
  10.             PdfReader reader = new PdfReader("c:\\New Document.pdf");
  11.             for (int i = 1; i <= reader.NumberOfPages; i++)
  12.             {
  13.                 byte[] bt = reader.GetPageContent(i);
  14.  
  15.                 str += ExtractTextFromPDFBytes(bt);
  16.  
  17.             }
  18. }
  19.  
  20.  
  21.  private string ExtractTextFromPDFBytes(byte[] input)
  22.         {
  23.             if (input == null || input.Length == 0) return "";
  24.  
  25.             try
  26.             {
  27.                 string resultString = "";
  28.  
  29.                 // Flag showing if we are we currently inside a text object
  30.                 bool inTextObject = false;
  31.  
  32.                 // Flag showing if the next character is literal 
  33.                 // e.g. '\\' to get a '\' character or '\(' to get '('
  34.                 bool nextLiteral = false;
  35.  
  36.                 // () Bracket nesting level. Text appears inside ()
  37.                 int bracketDepth = 0;
  38.  
  39.                 // Keep previous chars to get extract numbers etc.:
  40.                 char[] previousCharacters = new char[_numberOfCharsToKeep];
  41.                 for (int j = 0; j < _numberOfCharsToKeep; j++) previousCharacters[j] = ' ';
  42.  
  43.  
  44.                 for (int i = 0; i < input.Length; i++)
  45.                 {
  46.                     char c = (char)input[i];
  47.  
  48.                     if (inTextObject)
  49.                     {
  50.                         // Position the text
  51.                         if (bracketDepth == 0)
  52.                         {
  53.                             if (CheckToken(new string[] { "TD", "Td" }, previousCharacters))
  54.                             {
  55.                                 resultString += "\n\r";
  56.                             }
  57.                             else
  58.                             {
  59.                                 if (CheckToken(new string[] { "'", "T*", "\"" }, previousCharacters))
  60.                                 {
  61.                                     resultString += "\n";
  62.                                 }
  63.                                 else
  64.                                 {
  65.                                     if (CheckToken(new string[] { "Tj" }, previousCharacters))
  66.                                     {
  67.                                         resultString += " ";
  68.                                     }
  69.                                 }
  70.                             }
  71.                         }
  72.  
  73.                         // End of a text object, also go to a new line.
  74.                         if (bracketDepth == 0 &&
  75.                             CheckToken(new string[] { "ET" }, previousCharacters))
  76.                         {
  77.  
  78.                             inTextObject = false;
  79.                             resultString += " ";
  80.                         }
  81.                         else
  82.                         {
  83.                             // Start outputting text
  84.                             if ((c == '(') && (bracketDepth == 0) && (!nextLiteral))
  85.                             {
  86.                                 bracketDepth = 1;
  87.                             }
  88.                             else
  89.                             {
  90.                                 // Stop outputting text
  91.                                 if ((c == ')') && (bracketDepth == 1) && (!nextLiteral))
  92.                                 {
  93.                                     bracketDepth = 0;
  94.                                 }
  95.                                 else
  96.                                 {
  97.                                     // Just a normal text character:
  98.                                     if (bracketDepth == 1)
  99.                                     {
  100.                                         // Only print out next character no matter what. 
  101.                                         // Do not interpret.
  102.                                         if (c == '\\' && !nextLiteral)
  103.                                         {
  104.                                             nextLiteral = true;
  105.                                         }
  106.                                         else
  107.                                         {
  108.                                             if (((c >= ' ') && (c <= '~')) ||
  109.                                                 ((c >= 128) && (c < 255)))
  110.                                             {
  111.                                                 resultString += c.ToString();
  112.                                             }
  113.  
  114.                                             nextLiteral = false;
  115.                                         }
  116.                                     }
  117.                                 }
  118.                             }
  119.                         }
  120.                     }
  121.  
  122.                     // Store the recent characters for 
  123.                     // when we have to go back for a checking
  124.                     for (int j = 0; j < _numberOfCharsToKeep - 1; j++)
  125.                     {
  126.                         previousCharacters[j] = previousCharacters[j + 1];
  127.                     }
  128.                     previousCharacters[_numberOfCharsToKeep - 1] = c;
  129.  
  130.                     // Start of a text object
  131.                     if (!inTextObject && CheckToken(new string[] { "BT" }, previousCharacters))
  132.                     {
  133.                         inTextObject = true;
  134.                     }
  135.                 }
  136.                 return resultString;
  137.             }
  138.             catch
  139.             {
  140.                 return "";
  141.             }
  142.         }
  143.  
  144.  private bool CheckToken(string[] tokens, char[] recent)
  145.    {
  146.      foreach (string token in tokens)
  147.        {
  148.           if ((recent[_numberOfCharsToKeep - 3] == token[0]) &&
  149.            (recent[_numberOfCharsToKeep - 2] == token[1]) &&
  150.            ((recent[_numberOfCharsToKeep - 1] == ' ') ||
  151.            (recent[_numberOfCharsToKeep - 1] == 0x0d) ||
  152.            (recent[_numberOfCharsToKeep - 1] == 0x0a)) &&
  153.            ((recent[_numberOfCharsToKeep - 4] == ' ') ||
  154.            (recent[_numberOfCharsToKeep - 4] == 0x0d) ||
  155.            (recent[_numberOfCharsToKeep - 4] == 0x0a))
  156.                  )
  157.            {
  158.                     return true;
  159.             }
  160.             }
  161.             return false;
  162.         }
Reply