00001 /* 00002 * Copyright 2006-2008 The FLWOR Foundation. 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http://www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 */ 00016 00017 #ifndef ZORBA_XQUERY_FUNCTIONS_API_H 00018 #define ZORBA_XQUERY_FUNCTIONS_API_H 00019 00020 #include <zorba/config.h> 00021 #include <zorba/internal/unique_ptr.h> 00022 #include <zorba/zorba_string.h> 00023 00024 namespace zorba { 00025 00026 /////////////////////////////////////////////////////////////////////////////// 00027 00028 /** 00029 * Provides a way for a function to return a sequence of some type \c T that 00030 * can be iterated over. 00031 * 00032 * @tparam T The type of sequence. 00033 */ 00034 template<typename T> 00035 class Sequence { 00036 public: 00037 typedef T value_type; 00038 00039 struct iterator { 00040 virtual ~iterator() { } 00041 virtual bool next( value_type *result ) = 0; 00042 }; 00043 00044 /** 00045 * Constructs a new %Sequence. 00046 * This constructor is intended only for function implementors. 00047 * 00048 * @param i The iterator that provides the elements of the sequence. 00049 * Ownership of the iterator is taken. 00050 */ 00051 Sequence( iterator *i ) : i_( i ) { } 00052 00053 /** 00054 * Copy constructs a %Sequence. 00055 * 00056 * @param s The %Sequence to copy from. Note that it is a destructive copy 00057 * in that the sequence of \a s is 'i' moved. 00058 */ 00059 Sequence( Sequence const &s ) : i_( std::move( s.i_ ) ) { } 00060 00061 /** 00062 * Gets the next element in the sequence. 00063 * 00064 * @param result A pointer to the variable to receive the next element. 00065 * @return \c true only if there is a next element. 00066 */ 00067 bool next( value_type *result ) { 00068 return i_->next( result ); 00069 } 00070 00071 private: 00072 mutable std::unique_ptr<iterator> i_; 00073 00074 // forbid 00075 Sequence& operator=( Sequence const& ); 00076 }; 00077 00078 /////////////////////////////////////////////////////////////////////////////// 00079 00080 namespace fn { 00081 00082 ////////// 5.4 Functions on string values ///////////////////////////////////// 00083 00084 /** 00085 * Translates every character to its upper-case correspondent as defined in the 00086 * appropriate case mappings section in the Unicode standard. 00087 * 00088 * Every lower-case character that does not have an upper-case correspondent, 00089 * as well as every upper-case character, is included in the returned value in 00090 * its original form. 00091 * 00092 * @param arg The string to translate. 00093 * @return \a arg translated to upper-case. 00094 */ 00095 ZORBA_DLL_PUBLIC 00096 String upper_case( String const &arg ); 00097 00098 /** 00099 * Translates every character to its lower-case correspondent as defined in the 00100 * appropriate case mappings section in the Unicode standard. 00101 * 00102 * Every upper-case character that does not have a lower-case correspondent, as 00103 * well as every lower-case character, is included in the returned value in its 00104 * original form. 00105 * 00106 * @param arg The string to translate. 00107 * @return \a arg translated to lower-case. 00108 */ 00109 ZORBA_DLL_PUBLIC 00110 String lower_case( String const &arg ); 00111 00112 ////////// 5.5 Functions based on substring matching ////////////////////////// 00113 00114 /** 00115 * Tests whether or not the value of \a arg1 ends with a sequence of 00116 * collation units that provides a match to the collation units of \a arg2. 00117 * 00118 * @param arg1 The string to test. 00119 * @param arg2 The substring. 00120 * @return \c true only if \a arg1 ends with \a arg2 or \a arg2 is the 00121 * zero-length string. 00122 */ 00123 ZORBA_DLL_PUBLIC 00124 bool ends_with( String const &arg1, String const &arg2 ); 00125 00126 /** 00127 * Tests whether or not the value of \a arg1 ends with a sequence of 00128 * collation units that provides a match to the collation units of \a arg2. 00129 * 00130 * @param arg1 The string to test. 00131 * @param arg2 The substring. 00132 * @return \c true only if \a arg1 ends with \a arg2 or \a arg2 is the 00133 * zero-length string. 00134 */ 00135 ZORBA_DLL_PUBLIC 00136 bool ends_with( String const &arg1, char const *arg2 ); 00137 00138 /** 00139 * Tests whether or not the value of \a arg1 starts with a sequence of 00140 * collation units that provides a match to the collation units of \a arg2. 00141 * 00142 * @param arg1 The string to test. 00143 * @param arg2 The substring. 00144 * @return \c true only if \a arg1 starts with \a arg2 or \a arg2 is the 00145 * zero-length string. 00146 */ 00147 ZORBA_DLL_PUBLIC 00148 bool starts_with( String const &arg1, String const &arg2 ); 00149 00150 /** 00151 * Tests whether or not the value of \a arg1 starts with a sequence of 00152 * collation units that provides a match to the collation units of \a arg2. 00153 * 00154 * @param arg1 The string to test. 00155 * @param arg2 The substring. 00156 * @return \c true only if \a arg1 starts with \a arg2 or \a arg2 is the 00157 * zero-length string. 00158 */ 00159 ZORBA_DLL_PUBLIC 00160 bool starts_with( String const &arg1, char const *arg2 ); 00161 00162 //////// 6 Functions that manipulate URIs ///////////////////////////////////// 00163 00164 /** 00165 * Encodes reserved characters in an xs:string that is intended to be used in 00166 * the path segment of a URI. It is invertible but not idempotent. 00167 * 00168 * This function applies the URI escaping rules defined in section 2 of [RFC 00169 * 3986] to the xs:string supplied as \a uri_part. The effect of the function 00170 * is to escape reserved characters. Each such character in the string is 00171 * replaced with its percent-encoded form as described in [RFC 3986]. 00172 * 00173 * All characters are escaped except those identified as "unreserved" by [RFC 00174 * 3986], that is the upper- and lower-case letters A-Z, the digits 0-9, 00175 * HYPHEN-MINUS ("-"), LOW LINE ("_"), FULL STOP ".", and TILDE "~". 00176 * 00177 * @param uri_part The URI to be encoded. 00178 * @return the encoded string. 00179 */ 00180 ZORBA_DLL_PUBLIC 00181 String encode_for_uri( String const &uri_part ); 00182 00183 ////////// 7.6 String Functions that Use Pattern Matching ///////////////////// 00184 00185 /** 00186 * This function breaks the \a input string into a sequence of strings, 00187 * treating any substring that matches \a pattern as a separator. The 00188 * separators themselves are not returned. 00189 * 00190 * Performance note: if \a pattern is a simple string (not a regular expression 00191 * with meta-characers), it is more efficient to use String::find(). 00192 * 00193 * @param input The string to be split into tokens. If \a input is the empty 00194 * sequence, or if \a input is the zero-length string, the result is the empty 00195 * sequence. 00196 * @param pattern The regular expression. If it matches a zero-length string, 00197 * then an error is raised: [err:FORX0003]. 00198 * @param flags The regular expression flags, if any. 00199 * @return a sequence of strings for the tokens. 00200 */ 00201 ZORBA_DLL_PUBLIC 00202 Sequence<String> tokenize( String const &input, char const *pattern, 00203 char const *flags = "" ); 00204 00205 /** 00206 * This function breaks the \a input string into a sequence of strings, 00207 * treating any substring that matches \a pattern as a separator. The 00208 * separators themselves are not returned. 00209 * 00210 * Performance note: if \a pattern is a simple string (not a regular expression 00211 * with meta-characers), it is more efficient to use String::find(). 00212 * 00213 * @param input The string to be split into tokens. If \a input is the empty 00214 * sequence, or if \a input is the zero-length string, the result is the empty 00215 * sequence. 00216 * @param pattern The regular expression. If it matches a zero-length string, 00217 * then an error is raised: [err:FORX0003]. 00218 * @param flags The regular expression flags, if any. 00219 * @return a sequence of strings for the tokens. 00220 */ 00221 inline 00222 Sequence<String> tokenize( String const &input, String const &pattern, 00223 char const *flags = "" ) { 00224 return tokenize( input, pattern.c_str(), flags ); 00225 } 00226 00227 /////////////////////////////////////////////////////////////////////////////// 00228 00229 } // namespace fn 00230 } // namespace zorba 00231 00232 #endif /* ZORBA_XQUERY_FUNCTIONS_API_H */ 00233 /* vim:set et sw=2 ts=2: */