xmlparser.cpp
Go to the documentation of this file.
00001 /***************************************************************************
00002   file : $URL: http://svn.code.sf.net/p/frepple/code/trunk/src/utils/xmlparser.cpp $
00003   version : $LastChangedRevision: 1713 $  $LastChangedBy: jdetaeye $
00004   date : $LastChangedDate: 2012-07-18 11:46:01 +0200 (Wed, 18 Jul 2012) $
00005  ***************************************************************************/
00006 
00007 /***************************************************************************
00008  *                                                                         *
00009  * Copyright (C) 2007-2012 by Johan De Taeye, frePPLe bvba                 *
00010  *                                                                         *
00011  * This library is free software; you can redistribute it and/or modify it *
00012  * under the terms of the GNU Affero General Public License as published   *
00013  * by the Free Software Foundation; either version 3 of the License, or    *
00014  * (at your option) any later version.                                     *
00015  *                                                                         *
00016  * This library is distributed in the hope that it will be useful,         *
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of          *
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the            *
00019  * GNU Affero General Public License for more details.                     *
00020  *                                                                         *
00021  * You should have received a copy of the GNU Affero General Public        *
00022  * License along with this program.                                        *
00023  * If not, see <http://www.gnu.org/licenses/>.                             *
00024  *                                                                         *
00025  ***************************************************************************/
00026 
00027 #define FREPPLE_CORE
00028 #include "frepple/utils.h"
00029 #include <sys/stat.h>
00030 
00031 /* Uncomment the next line to create a lot of debugging messages during
00032  * the parsing of XML-data. */
00033 //#define PARSE_DEBUG
00034 
00035 // With VC++ we use the Win32 functions to browse a directory
00036 #ifdef _MSC_VER
00037 #define WIN32_LEAN_AND_MEAN
00038 #include <windows.h>
00039 #else
00040 // With Unix-like systems we use a check suggested by the autoconf tools
00041 #if HAVE_DIRENT_H
00042 # include <dirent.h>
00043 # define NAMLEN(dirent) strlen((dirent)->d_name)
00044 #else
00045 # define dirent direct
00046 # define NAMLEN(dirent) (dirent)->d_namlen
00047 # if HAVE_SYS_NDIR_H
00048 #  include <sys/ndir.h>
00049 # endif
00050 # if HAVE_SYS_DIR_H
00051 #  include <sys/dir.h>
00052 # endif
00053 # if HAVE_NDIR_H
00054 #  include <ndir.h>
00055 # endif
00056 #endif
00057 #endif
00058 
00059 
00060 namespace frepple
00061 {
00062 namespace utils
00063 {
00064 
00065 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::STANDARD = 1;
00066 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLAN = 2;
00067 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLANDETAIL = 4;
00068 
00069 
00070 void  XMLInput::processingInstruction
00071 (const XMLCh *const target, const XMLCh *const data)
00072 {
00073   char* type = xercesc::XMLString::transcode(target);
00074   char* value = xercesc::XMLString::transcode(data);
00075   try
00076   {
00077     if (!strcmp(type,"python"))
00078     {
00079       // "python" is the only processing instruction which we process.
00080       // Others will be silently ignored
00081       try
00082       {
00083         // Execute the processing instruction
00084         PythonInterpreter::execute(value);
00085       }
00086       catch (const DataException& e)
00087       {
00088         if (abortOnDataException)
00089         {
00090           xercesc::XMLString::release(&type);
00091           xercesc::XMLString::release(&value);
00092           throw;
00093         }
00094         else logger << "Continuing after data error: " << e.what() << endl;
00095       }
00096     }
00097     xercesc::XMLString::release(&type);
00098     xercesc::XMLString::release(&value);
00099   }
00100   catch (...)
00101   {
00102     xercesc::XMLString::release(&type);
00103     xercesc::XMLString::release(&value);
00104     throw;
00105   }
00106 }
00107 
00108 
00109 void XMLInput::startElement(const XMLCh* const uri, const XMLCh* const n,
00110     const XMLCh* const qname, const xercesc::Attributes& atts)
00111 {
00112   // Validate the state
00113   assert(!states.empty());
00114 
00115   // Check for excessive number of open objects
00116   if (numElements >= maxdepth)
00117     throw DataException("XML-document with elements nested excessively deep");
00118 
00119   // Push the element on the stack
00120   datapair *pElement = &m_EStack[numElements+1];
00121   pElement->first.reset(n);
00122   pElement->second.reset();
00123 
00124   // Store a pointer to the attributes
00125   attributes = &atts;
00126 
00127   switch (states.top())
00128   {
00129     case SHUTDOWN:
00130       // STATE: Parser is shutting down, and we can ignore all input that
00131       // is still coming
00132       return;
00133 
00134     case IGNOREINPUT:
00135       // STATE: Parser is ignoring a part of the input
00136       if (pElement->first.getHash() == endingHashes.top())
00137         // Increase the count of occurences before the ignore section ends
00138         ++ignore;
00139       ++numElements;
00140       return;
00141 
00142     case INIT:
00143       // STATE: The only time the parser comes in this state is when we read
00144       // opening tag of the ROOT tag.
00145 #ifdef PARSE_DEBUG
00146       if (!m_EHStack.empty())
00147         logger << "Initialize root tag for reading object "
00148             << getCurrentObject() << " ("
00149             << typeid(*getCurrentObject()).name() << ")" << endl;
00150       else
00151         logger << "Initialize root tag for reading object NULL" << endl;
00152 #endif
00153       states.top() = READOBJECT;
00154       endingHashes.push(pElement->first.getHash());
00155       // Note that there is no break or return here. We also execute the
00156       // statements of the following switch-case.
00157 
00158     case READOBJECT:
00159       // STATE: Parser is reading data elements of an object
00160       // Debug
00161 #ifdef PARSE_DEBUG
00162       logger << "   Start element " << pElement->first.getName()
00163           << " - object " << getCurrentObject() << endl;
00164 #endif
00165 
00166       // Call the handler of the object
00167       assert(!m_EHStack.empty());
00168       try {getCurrentObject()->beginElement(*this, pElement->first);}
00169       catch (const DataException& e)
00170       {
00171         if (abortOnDataException) throw;
00172         else logger << "Continuing after data error: " << e.what() << endl;
00173       }
00174 
00175       // Now process all attributes. For attributes we only call the
00176       // endElement() member and skip the beginElement() method.
00177       numElements += 1;
00178       if (states.top() != IGNOREINPUT)
00179         for (unsigned int i=0, cnt=atts.getLength(); i<cnt; i++)
00180         {
00181           char* val = xercesc::XMLString::transcode(atts.getValue(i));
00182           m_EStack[numElements+1].first.reset(atts.getLocalName(i));
00183           m_EStack[numElements+1].second.setData(val);
00184 #ifdef PARSE_DEBUG
00185           char* attname = xercesc::XMLString::transcode(atts.getQName(i));
00186           logger << "   Processing attribute " << attname
00187               << " - object " << getCurrentObject() << endl;
00188           xercesc::XMLString::release(&attname);
00189 #endif
00190           try {getCurrentObject()->endElement(*this, m_EStack[numElements+1].first, m_EStack[numElements+1].second);}
00191           catch (const DataException& e)
00192           {
00193             if (abortOnDataException) throw;
00194             else logger << "Continuing after data error: " << e.what() << endl;
00195           }
00196           xercesc::XMLString::release(&val);
00197           // Stop processing attributes if we are now in the ignore mode
00198           if (states.top() == IGNOREINPUT) break;
00199         }
00200   }  // End of switch statement
00201 
00202   // Outside of this handler, no attributes are available
00203   attributes = NULL;
00204 }
00205 
00206 
00207 void XMLInput::endElement(const XMLCh* const uri,
00208     const XMLCh* const s,
00209     const XMLCh* const qname)
00210 {
00211   // Validate the state
00212   assert(numElements >= 0);
00213   assert(!states.empty());
00214   assert(numElements < maxdepth);
00215 
00216   // Remove an element from the stack
00217   datapair *pElement = &(m_EStack[numElements--]);
00218 
00219   switch (states.top())
00220   {
00221     case INIT:
00222       // This should never happen!
00223       throw LogicException("Unreachable code reached");
00224 
00225     case SHUTDOWN:
00226       // STATE: Parser is shutting down, and we can ignore all input that is
00227       // still coming
00228       return;
00229 
00230     case IGNOREINPUT:
00231       // STATE: Parser is ignoring a part of the input
00232 #ifdef PARSE_DEBUG
00233       logger << "   End element " << pElement->first.getName()
00234           << " - IGNOREINPUT state" << endl;
00235 #endif
00236       // Continue if we aren't dealing with the tag being ignored
00237       if (pElement->first.getHash() != endingHashes.top()) return;
00238       if (ignore == 0)
00239       {
00240         // Finished ignoring now
00241         states.pop();
00242         endingHashes.pop();
00243 #ifdef PARSE_DEBUG
00244         logger << "Finish IGNOREINPUT state" << endl;
00245 #endif
00246       }
00247       else
00248         --ignore;
00249       break;
00250 
00251     case READOBJECT:
00252       // STATE: Parser is reading data elements of an object
00253 #ifdef PARSE_DEBUG
00254       logger << "   End element " << pElement->first.getName()
00255           << " - object " << getCurrentObject() << endl;
00256 #endif
00257 
00258       // Check if we finished with the current handler
00259       assert(!m_EHStack.empty());
00260       if (pElement->first.getHash() == endingHashes.top())
00261       {
00262         // Call the ending handler of the Object, with a special
00263         // flag to specify that this object is now ended
00264         objectEnded = true;
00265         try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00266         catch (const DataException& e)
00267         {
00268           if (abortOnDataException) throw;
00269           else logger << "Continuing after data error: " << e.what() << endl;
00270         }
00271         objectEnded = false;
00272 #ifdef PARSE_DEBUG
00273         logger << "Finish reading object " << getCurrentObject() << endl;
00274 #endif
00275         // Pop from the handler object stack
00276         prev = getCurrentObject();
00277         m_EHStack.pop_back();
00278         endingHashes.pop();
00279 
00280         // Pop from the state stack
00281         states.pop();
00282         if (m_EHStack.empty())
00283           shutdown();
00284         else
00285         {
00286           // Call also the endElement function on the owning object
00287           try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00288           catch (const DataException& e)
00289           {
00290             if (abortOnDataException) throw;
00291             else logger << "Continuing after data error: " << e.what() << endl;
00292           }
00293 #ifdef PARSE_DEBUG
00294           logger << "   End element " << pElement->first.getName()
00295               << " - object " << getCurrentObject() << endl;
00296 #endif
00297         }
00298       }
00299       else
00300         // This tag is not the ending tag of an object
00301         // Call the function of the Object
00302         try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00303         catch (const DataException& e)
00304         {
00305           if (abortOnDataException) throw;
00306           else logger << "Continuing after data error: " << e.what() << endl;
00307         }
00308   }
00309 }
00310 
00311 
00312 // Unfortunately the prototype for this handler function differs between
00313 // Xerces-c 2.x and 3.x
00314 #if XERCES_VERSION_MAJOR==2
00315 void XMLInput::characters(const XMLCh *const c, const unsigned int n)
00316 #else
00317 void XMLInput::characters(const XMLCh *const c, const XMLSize_t n)
00318 #endif
00319 {
00320   // No data capture during the ignore state
00321   if (states.top()==IGNOREINPUT) return;
00322 
00323   // Process the data
00324   char* name = xercesc::XMLString::transcode(c);
00325   m_EStack[numElements].second.addData(name, strlen(name));
00326   xercesc::XMLString::release(&name);
00327 }
00328 
00329 
00330 void XMLInput::warning(const xercesc::SAXParseException& exception)
00331 {
00332   char* message = xercesc::XMLString::transcode(exception.getMessage());
00333   logger << "Warning: " << message
00334       << " at line: " << exception.getLineNumber() << endl;
00335   xercesc::XMLString::release(&message);
00336 }
00337 
00338 
00339 DECLARE_EXPORT void XMLInput::readto(Object * pPI)
00340 {
00341   // Keep track of the tag where this object will end
00342   assert(numElements >= -1);
00343   endingHashes.push(m_EStack[numElements+1].first.getHash());
00344   if (pPI)
00345   {
00346     // Push a new object on the handler stack
00347 #ifdef PARSE_DEBUG
00348     logger << "Start reading object " << pPI
00349         << " (" << typeid(*pPI).name() << ")" << endl;
00350 #endif
00351     prev = getCurrentObject();
00352     m_EHStack.push_back(make_pair(pPI,static_cast<void*>(NULL)));
00353     states.push(READOBJECT);
00354   }
00355   else
00356   {
00357     // Ignore the complete content of this element
00358 #ifdef PARSE_DEBUG
00359     logger << "Start ignoring input" << endl;
00360 #endif
00361     states.push(IGNOREINPUT);
00362   }
00363 }
00364 
00365 
00366 void XMLInput::shutdown()
00367 {
00368   // Already shutting down...
00369   if (states.empty() || states.top() == SHUTDOWN) return;
00370 
00371   // Message
00372 #ifdef PARSE_DEBUG
00373   logger << "   Forcing a shutdown - SHUTDOWN state" << endl;
00374 #endif
00375 
00376   // Change the state
00377   states.push(SHUTDOWN);
00378 
00379   // Done if we have no elements on the stack, i.e. a normal end.
00380   if (numElements<0) return;
00381 
00382   // Call the ending handling of all objects on the stack
00383   // This allows them to finish off in a valid state, and delete any temporary
00384   // objects they may have allocated.
00385   objectEnded = true;
00386   m_EStack[numElements].first.reset("Not a real tag");
00387   m_EStack[numElements].second.reset();
00388   while (!m_EHStack.empty())
00389   {
00390     try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);}
00391     catch (const DataException& e)
00392     {
00393       if (abortOnDataException) throw;
00394       else logger << "Continuing after data error: " << e.what() << endl;
00395     }
00396     m_EHStack.pop_back();
00397   }
00398 }
00399 
00400 
00401 void XMLInput::reset()
00402 {
00403   // Delete the xerces parser object
00404   delete parser;
00405   parser = NULL;
00406 
00407   // Call the ending handling of all objects on the stack
00408   // This allows them to finish off in a valid state, and delete any temporary
00409   // objects they may have allocated.
00410   if (!m_EHStack.empty())
00411   {
00412     // The next line is to avoid calling the endElement handler twice for the
00413     // last object. E.g. endElement handler causes and exception, and as part
00414     // of the exception handling we call the reset method.
00415     if (objectEnded) m_EHStack.pop_back();
00416     objectEnded = true;
00417     m_EStack[++numElements].first.reset("Not a real tag");
00418     m_EStack[++numElements].second.reset();
00419     while (!m_EHStack.empty())
00420     {
00421       try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);}
00422       catch (const DataException& e)
00423       {
00424         if (abortOnDataException) throw;
00425         else logger << "Continuing after data error: " << e.what() << endl;
00426       }
00427       m_EHStack.pop_back();
00428     }
00429   }
00430 
00431   // Cleanup of stacks
00432   while (!states.empty()) states.pop();
00433   while (!endingHashes.empty()) endingHashes.pop();
00434 
00435   // Set all variables back to their starting values
00436   numElements = -1;
00437   ignore = 0;
00438   objectEnded = false;
00439   attributes = NULL;
00440 }
00441 
00442 
00443 void XMLInput::parse(xercesc::InputSource &in, Object *pRoot, bool validate)
00444 {
00445   try
00446   {
00447     // Create a Xerces parser
00448     parser = xercesc::XMLReaderFactory::createXMLReader();
00449 
00450     // Set the features of the parser. A bunch of the options are dependent
00451     // on whether we want to validate the input or not.
00452     parser->setProperty(xercesc::XMLUni::fgXercesScannerName, const_cast<XMLCh*>
00453         (validate ? xercesc::XMLUni::fgSGXMLScanner : xercesc::XMLUni::fgWFXMLScanner));
00454     parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, validate);
00455     parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpacePrefixes, false);
00456     parser->setFeature(xercesc::XMLUni::fgXercesIdentityConstraintChecking, false);
00457     parser->setFeature(xercesc::XMLUni::fgXercesDynamic, false);
00458     parser->setFeature(xercesc::XMLUni::fgXercesSchema, validate);
00459     parser->setFeature(xercesc::XMLUni::fgXercesSchemaFullChecking, false);
00460     parser->setFeature(xercesc::XMLUni::fgXercesValidationErrorAsFatal,true);
00461     parser->setFeature(xercesc::XMLUni::fgXercesIgnoreAnnotations,true);
00462 
00463     if (validate)
00464     {
00465       // Specify the no-namespace schema file
00466       string schema = Environment::searchFile("frepple.xsd");
00467       if (schema.empty())
00468         throw RuntimeException("Can't find XML schema file 'frepple.xsd'");
00469       XMLCh *c = xercesc::XMLString::transcode(schema.c_str());
00470       parser->setProperty(
00471         xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, c
00472       );
00473       xercesc::XMLString::release(&c);
00474     }
00475 
00476     // If we are reading into a NULL object, there is no need to use a
00477     // content handler or a handler stack.
00478     if (pRoot)
00479     {
00480       // Set the event handler. If we are reading into a NULL object, there is
00481       // no need to use a content handler.
00482       parser->setContentHandler(this);
00483 
00484       // Get the parser to read data into the object pRoot.
00485       m_EHStack.push_back(make_pair(pRoot,static_cast<void*>(NULL)));
00486       states.push(INIT);
00487     }
00488 
00489     // Set the error handler
00490     parser->setErrorHandler(this);
00491 
00492     // Parse the input
00493     parser->parse(in);
00494   }
00495   // Note: the reset() method needs to be called in all circumstances. The
00496   // reset method allows all objects to finish in a valid state and clean up
00497   // any memory they may have allocated.
00498   catch (const xercesc::XMLException& toCatch)
00499   {
00500     char* message = xercesc::XMLString::transcode(toCatch.getMessage());
00501     string msg(message);
00502     xercesc::XMLString::release(&message);
00503     reset();
00504     throw RuntimeException("Parsing error: " + msg);
00505   }
00506   catch (const xercesc::SAXParseException& toCatch)
00507   {
00508     char* message = xercesc::XMLString::transcode(toCatch.getMessage());
00509     ostringstream msg;
00510     if (toCatch.getLineNumber() > 0)
00511       msg << "Parsing error: " << message << " at line " << toCatch.getLineNumber();
00512     else
00513       msg << "Parsing error: " << message;
00514     xercesc::XMLString::release(&message);
00515     reset();
00516     throw RuntimeException(msg.str());
00517   }
00518   catch (const exception& toCatch)
00519   {
00520     reset();
00521     ostringstream msg;
00522     msg << "Error during XML parsing: " << toCatch.what();
00523     throw RuntimeException(msg.str());
00524   }
00525   catch (...)
00526   {
00527     reset();
00528     throw RuntimeException(
00529       "Parsing error: Unexpected exception during XML parsing");
00530   }
00531   reset();
00532 }
00533 
00534 
00535 DECLARE_EXPORT ostream& operator << (ostream& os, const XMLEscape& x)
00536 {
00537   for (const char* p = x.data; *p; ++p)
00538   {
00539     switch (*p)
00540     {
00541       case '&': os << "&amp;"; break;
00542       case '<': os << "&lt;"; break;
00543       case '>': os << "&gt;"; break;
00544       case '"': os << "&quot;"; break;
00545       case '\'': os << "&apos;"; break;
00546       default: os << *p;
00547     }
00548   }
00549   return os;
00550 }
00551 
00552 
00553 DECLARE_EXPORT void XMLOutput::incIndent()
00554 {
00555   indentstring[m_nIndent++] = '\t';
00556   if (m_nIndent > 40) m_nIndent = 40;
00557   indentstring[m_nIndent] = '\0';
00558 }
00559 
00560 
00561 DECLARE_EXPORT void XMLOutput::decIndent()
00562 {
00563   if (--m_nIndent < 0) m_nIndent = 0;
00564   indentstring[m_nIndent] = '\0';
00565 }
00566 
00567 
00568 DECLARE_EXPORT void XMLOutput::writeElement
00569 (const Keyword& tag, const Object* object, mode m)
00570 {
00571   // Avoid NULL pointers and skip hidden objects
00572   if (!object || object->getHidden()) return;
00573 
00574   // Adjust current and parent object pointer
00575   const Object *previousParent = parentObject;
00576   parentObject = currentObject;
00577   currentObject = object;
00578   ++numObjects;
00579   ++numParents;
00580 
00581   // Call the write method on the object
00582   if (m != DEFAULT)
00583     // Mode is overwritten
00584     object->writeElement(this, tag, m);
00585   else
00586     // Choose wether to save a reference of the object.
00587     // The root object can't be saved as a reference.
00588     object->writeElement(this, tag, numParents>2 ? REFERENCE : DEFAULT);
00589 
00590   // Adjust current and parent object pointer
00591   --numParents;
00592   currentObject = parentObject;
00593   parentObject = previousParent;
00594 }
00595 
00596 
00597 DECLARE_EXPORT void XMLOutput::writeElementWithHeader(const Keyword& tag, const Object* object)
00598 {
00599   // Root object can't be null...
00600   if (!object)
00601     throw RuntimeException("Can't accept a NULL object as XML root");
00602 
00603   // There should not be any saved objects yet
00604   if (numObjects > 0)
00605     throw LogicException("Can't have multiple headers in a document");
00606   assert(!parentObject);
00607   assert(!currentObject);
00608 
00609   // Write the first line for the xml document
00610   writeString(getHeaderStart());
00611 
00612   // Adjust current object pointer
00613   currentObject = object;
00614 
00615   // Write the object
00616   ++numObjects;
00617   ++numParents;
00618   BeginObject(tag, getHeaderAtts());
00619   object->writeElement(this, tag, NOHEADER);
00620 
00621   // Adjust current and parent object pointer
00622   currentObject = NULL;
00623   parentObject = NULL;
00624 }
00625 
00626 
00627 DECLARE_EXPORT void XMLOutput::writeHeader(const Keyword& tag)
00628 {
00629   // There should not be any saved objects yet
00630   if (numObjects > 0 || !parentObject || !currentObject)
00631     throw LogicException("Writing invalid header to XML document");
00632 
00633   // Write the first line and the opening tag
00634   writeString(getHeaderStart());
00635   BeginObject(tag, getHeaderAtts());
00636 
00637   // Fake a dummy parent
00638   numParents += 2;
00639 }
00640 
00641 
00642 DECLARE_EXPORT bool XMLElement::getBool() const
00643 {
00644   switch (getData()[0])
00645   {
00646     case 'T':
00647     case 't':
00648     case '1':
00649       return true;
00650     case 'F':
00651     case 'f':
00652     case '0':
00653       return false;
00654   }
00655   throw DataException("Invalid boolean value: " + string(getData()));
00656 }
00657 
00658 
00659 DECLARE_EXPORT const char* Attribute::getName() const
00660 {
00661   if (ch) return ch;
00662   Keyword::tagtable::const_iterator i = Keyword::getTags().find(hash);
00663   if (i == Keyword::getTags().end())
00664     throw LogicException("Undefined element keyword");
00665   return i->second->getName().c_str();
00666 }
00667 
00668 
00669 DECLARE_EXPORT Keyword::Keyword(const string& name) : strName(name)
00670 {
00671   // Error condition: name is empty
00672   if (name.empty()) throw LogicException("Creating keyword without name");
00673 
00674   // Create a number of variations of the tag name
00675   strStartElement = string("<") + name;
00676   strEndElement = string("</") + name + ">\n";
00677   strElement = string("<") + name + ">";
00678   strAttribute = string(" ") + name + "=\"";
00679 
00680   // Compute the hash value
00681   dw = hash(name.c_str());
00682 
00683   // Create a properly encoded Xerces string
00684   xercesc::XMLPlatformUtils::Initialize();
00685   xmlname = xercesc::XMLString::transcode(name.c_str());
00686 
00687   // Verify that the hash is "perfect".
00688   check();
00689 }
00690 
00691 
00692 DECLARE_EXPORT Keyword::Keyword(const string& name, const string& nspace)
00693   : strName(name)
00694 {
00695   // Error condition: name is empty
00696   if (name.empty())
00697     throw LogicException("Creating keyword without name");
00698   if (nspace.empty())
00699     throw LogicException("Creating keyword with empty namespace");
00700 
00701   // Create a number of variations of the tag name
00702   strStartElement = string("<") + nspace + ":" + name;
00703   strEndElement = string("</") + nspace + ":" + name + ">\n";
00704   strElement = string("<") + nspace + ":" + name + ">";
00705   strAttribute = string(" ") + nspace + ":" + name + "=\"";
00706 
00707   // Compute the hash value
00708   dw = hash(name);
00709 
00710   // Create a properly encoded Xerces string
00711   xercesc::XMLPlatformUtils::Initialize();
00712   xmlname = xercesc::XMLString::transcode(string(nspace + ":" + name).c_str());
00713 
00714   // Verify that the hash is "perfect".
00715   check();
00716 }
00717 
00718 
00719 void Keyword::check()
00720 {
00721   // To be thread-safe we make sure only a single thread at a time
00722   // can execute this check.
00723   static Mutex dd;
00724   {
00725     ScopeMutexLock l(dd);
00726     tagtable::const_iterator i = getTags().find(dw);
00727     if (i!=getTags().end() && i->second->getName()!=strName)
00728       throw LogicException("Tag XML-tag hash function clashes for "
00729           + i->second->getName() + " and " + strName);
00730     getTags().insert(make_pair(dw,this));
00731   }
00732 }
00733 
00734 
00735 DECLARE_EXPORT Keyword::~Keyword()
00736 {
00737   // Remove from the tag list
00738   tagtable::iterator i = getTags().find(dw);
00739   if (i!=getTags().end()) getTags().erase(i);
00740 
00741   // Destroy the xerces string
00742   xercesc::XMLString::release(&xmlname);
00743   xercesc::XMLPlatformUtils::Terminate();
00744 }
00745 
00746 
00747 DECLARE_EXPORT const Keyword& Keyword::find(const char* name)
00748 {
00749   tagtable::const_iterator i = getTags().find(hash(name));
00750   return *(i!=getTags().end() ? i->second : new Keyword(name));
00751 }
00752 
00753 
00754 DECLARE_EXPORT Keyword::tagtable& Keyword::getTags()
00755 {
00756   static tagtable alltags;
00757   return alltags;
00758 }
00759 
00760 
00761 DECLARE_EXPORT hashtype Keyword::hash(const char* c)
00762 {
00763   if (c == 0 || *c == 0) return 0;
00764 
00765   // Compute hash
00766   const char* curCh = c;
00767   hashtype hashVal = *curCh++;
00768   while (*curCh)
00769     hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++;
00770 
00771   // Divide by modulus
00772   return hashVal % 954991;
00773 }
00774 
00775 
00776 DECLARE_EXPORT hashtype Keyword::hash(const XMLCh* t)
00777 {
00778   char* c = xercesc::XMLString::transcode(t);
00779   if (c == 0 || *c == 0)
00780   {
00781     xercesc::XMLString::release(&c);
00782     return 0;
00783   }
00784 
00785   // Compute hash
00786   const char* curCh = c;
00787   hashtype hashVal = *curCh++;
00788   while (*curCh)
00789     hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++;
00790 
00791   // Divide by modulus
00792   xercesc::XMLString::release(&c);
00793   return hashVal % 954991;
00794 }
00795 
00796 
00797 DECLARE_EXPORT void Keyword::printTags()
00798 {
00799   for (tagtable::iterator i = getTags().begin(); i != getTags().end(); ++i)
00800     logger << i->second->getName() << "   " << i->second->dw << endl;
00801 }
00802 
00803 
00804 void XMLInputFile::parse(Object *pRoot, bool validate)
00805 {
00806   // Check if string has been set
00807   if (filename.empty())
00808     throw DataException("Missing input file or directory");
00809 
00810   // Check if the parameter is the name of a directory
00811   struct stat stat_p;
00812   if (stat(filename.c_str(), &stat_p))
00813     // Can't verify the status
00814     throw RuntimeException("Couldn't open input file '" + filename + "'");
00815   else if (stat_p.st_mode & S_IFDIR)
00816   {
00817     // Data is a directory: loop through all *.xml files now. No recursion in
00818     // subdirectories is done.
00819     // The code is unfortunately different for Windows & Linux. Sigh...
00820 #ifdef _MSC_VER
00821     string f = filename + "\\*.xml";
00822     WIN32_FIND_DATA dir_entry_p;
00823     HANDLE h = FindFirstFile(f.c_str(), &dir_entry_p);
00824     if (h == INVALID_HANDLE_VALUE)
00825       throw RuntimeException("Couldn't open input file '" + f + "'");
00826     do
00827     {
00828       f = filename + '/' + dir_entry_p.cFileName;
00829       XMLInputFile(f.c_str()).parse(pRoot);
00830     }
00831     while (FindNextFile(h, &dir_entry_p));
00832     FindClose(h);
00833 #elif HAVE_DIRENT_H
00834     struct dirent *dir_entry_p;
00835     DIR *dir_p = opendir(filename.c_str());
00836     while (NULL != (dir_entry_p = readdir(dir_p)))
00837     {
00838       int n = NAMLEN(dir_entry_p);
00839       if (n > 4 && !strcmp(".xml", dir_entry_p->d_name + n - 4))
00840       {
00841         string f = filename + '/' + dir_entry_p->d_name;
00842         XMLInputFile(f.c_str()).parse(pRoot, validate);
00843       }
00844     }
00845     closedir(dir_p);
00846 #else
00847     throw RuntimeException("Can't process a directory on your platform");
00848 #endif
00849   }
00850   else
00851   {
00852     // Normal file
00853     // Parse the file
00854     XMLCh *f = xercesc::XMLString::transcode(filename.c_str());
00855     xercesc::LocalFileInputSource in(f);
00856     xercesc::XMLString::release(&f);
00857     XMLInput::parse(in, pRoot, validate);
00858   }
00859 }
00860 
00861 } // end namespace
00862 } // end namespace

Documentation generated for frePPLe by  doxygen