vector<TParse_rule>
const& rules,
71vector<TFun_transform_other>
const& range_rules,
106pair<string, string>
result=
132 string const& regex,
152 Swap(*
this, temp);
171shared_ptr<CRegexp> temp(lhs.
m_Regexp);
194 return "eAmbiguousDate";
205vector<TParse_rule>
const& rules,
206vector<TFun_transform_other>
const& range_rules,
211 for(
autorule = rules.begin(); rule != rules.end(); ++rule ) {
212 CRegexp& re = rule->GetRegexp();
216 returnmake_pair(rule->GetTag(), rule->MakeTransform(
match));
222pair<string, string>
result= (* transform)(
value);
223 if( !
result.second.empty() ) {
230 returntransform_ambiguous_date_fun(
value);
247{
"december",
"12"},
249{
"february",
"02"},
251{
"january",
"01"},
260{
"november",
"11"},
262{
"october",
"10"},
264{
"september",
"09"},
269 autoit = s_MonthLookupTable.find(
month_name.c_str());
270 if( it == s_MonthLookupTable.end() ) {
281 char const* annot_tag;
288 "^((?:1\\d{3}|2\\d{3}))$",
292 "(?i)^([a-z]+(?:\\s[a-z]+)*)$",
296 "(?i)^((?:na|n[.]a[.]|n/a))$",
300 "^([123]\\d{3}\\-(?:[0][1-9]|[1][012])\\-(?:[0][1-9]|[12][0-9]|[3][01])(?:T(?:[01][0-9]|2[0123])(?:[:][0-5][0-9]){1,2})Z)$",
304 "^([123]\\d{3}\\-(?:[0][1-9]|[1][012])\\-(?:[0][1-9]|[12][0-9]|[3][01]))(?:[T ](?:[01][0-9]|2[0123])(?:[:][0-5][0-9]){1,2})?$",
308 "^([123]\\d{3}\\-(?:[0][1-9]|[1][012]))$",
312 "^([123]\\d{3}/(?:0?[1-9]|[1][012])/(?:0?[1-9]|[12][0-9]|[3][01]))$",
316 "^([123]\\d{3}\\-(?:0?[1-9]|[1][012])\\-(?:0?[1-9]|[12][0-9]|[3][01]))$",
321 "(?i)^((?:[1][3-9]|[2][0-9]|[3][012])([-./])(?:0?[1-9]|[1][012])\\2(?:[123]\\d{3}|\\d{2}))(?: (?:0[1-9]|1[012])(?:[:][0-5][0-9]){1,2}(?:[ ]?[AP]M|[ ]?[AP][.]M[.]))?$",
326 "(?i)^((?:0?[1-9]|[1][012])([-/.])(?:0?[1-9]|[12][0-9]|[3][01])\\2(?:[123]\\d{3}|\\d{2}))(?: (?:0[1-9]|1[012])(?:[:][0-5][0-9]){1,2}(?:[ ]?[AP]M|[ ]?[AP][.]M[.]))?$",
330 "(?i)^((?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\\s(?:0?[1-9]|[12][0-9]|[3][01]),?[ ](?:[123]\\d{3}|\\d{2}))$",
334 "(?i)^((?:0?[1-9]|[12][0-9]|[3][01])([- ])(?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\\2(?:[123]\\d{3}|\\d{2}))$",
338 "(?i)^((?:0?[1-9]|[12][0-9]|[3][01])[ ](?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?),[ ](?:[123]\\d{3}|\\d{2}))$",
342 "(?i)^((?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)[-./ ](?:[123]\\d{3}|\\d{2}))$",
346 "(?i)^((?:[12]\\d{3}|\\d{2})[-./ ](?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?))$",
350 "^((?:19\\d{2}|2\\d{3})[-/. ](?:0?[1-9]|1[012]))$",
355 "^((?:0?[1-9]|1[012])[-/. ](?:19\\d{2}|2\\d{3}))$",
360 "^((?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\-(?:[0][1-9]|[12][0-9]|[3][01])\\/(?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\-(?:[0][1-9]|[12][0-9]|[3][01]))$",
364 "^((?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\/(?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012]))$",
368 "^((?:19\\d{2}|2\\d{3})\\/(?:19\\d{2}|2\\d{3}))$",
372 "^((?:19[0-9]0|2\\d{2}0))s$",
376 "^.*?(?<=before[ ])((?:19\\d{2}|2\\d{3}))$",
380 "^.*?(?<=pre[-])((?:19\\d{2}|2\\d{3}))$",
388 if(parse_rules->empty()) {
389 for(
structTRules* entry = &rules_table[0]; entry->annot_tag != 0; ++entry ) {
390parse_rules->push_back(
TParse_rule(entry->annot_tag, entry->regexp, entry->transform) );
393 returnparse_rules.
Get();
405 if(range_rules->empty()) {
407range_rules->push_back(*entry);
410 returnrange_rules.
Get();
426 static CRegexpre(
"^(?:0?[1-9]|1[012])([-.\\/])(?:0?[1-9]|[12][0-9]|3[01])\\1((?:19\\d{2}|2\\d{3}|\\d{2}))$");
430 intyear = NStr::StringToNumeric<int>(
match);
432year = 1900 + ((year > 70) ? year : year + 100);
443 CRegexpre(
"(?i)(?:between(.+?)and(.+?)|^(.+?)\\/(.+?))$");
463 for( vector<TParse_rule>::const_iterator rule = rules.begin(); rule != rules.end(); ++rule ) {
465 if( rule->GetTag().find(
"RANGE") == 0 ) {
469 CRegexp& re_rule = rule->GetRegexp();
470 if( re_rule.
IsMatch(lhs) ) {
472 stringmatch_lhs = re_rule.
GetSub(lhs, 1);
473 if( re_rule.
IsMatch(rhs) ) {
474 stringmatch_rhs = re_rule.
GetSub(rhs, 1);
475 stringresult_lhs = rule->MakeTransform(match_lhs);
476 stringresult_rhs = rule->MakeTransform(match_rhs);
478 stringprefix =
"RANGE|";
479 if( rule->GetTag().find(
"CAST") == string::npos ) {
482 stringrange = result_lhs +
"/"+ result_rhs;
483 returnmake_pair(prefix + rule->GetTag(), range);
504vector<string> tokens;
510<< setfill(
'0') << setw(2)
511<< NStr::StringToNumeric<int>(tokens[1])
514<< NStr::StringToNumeric<int>(tokens[2]);
521vector<string> tokens;
524 intmonth = NStr::StringToNumeric<int>(tokens[0]);
525 intday = NStr::StringToNumeric<int>(tokens[1]);
526 intyear = NStr::StringToNumeric<int>(tokens[2]);
528 if( day < 13 && day != month ) {
533year = 1900 + ( ( year > 70 ) ? year : 100 + year );
539<< setfill(
'0') << setw(2)
550vector<string> tokens;
553 intday = NStr::StringToNumeric<int>(tokens[0]);
554 intmonth = NStr::StringToNumeric<int>(tokens[1]);
555 intyear = NStr::StringToNumeric<int>(tokens[2]);
557 if( day < 13 && day != month ) {
562year = 1900 + ( ( year > 70 ) ? year : 100 + year );
568<< setfill(
'0') << setw(2)
579vector<string> tokens;
582 intday = NStr::StringToNumeric<int>(tokens[0]);
583 intyear = NStr::StringToNumeric<int>(tokens[2]);
585year = 1900 + ( ( year > 70 ) ? year : 100 + year );
593<< setfill(
'0') << setw(2)
600vector<string> tokens;
603 stringmonth = tokens[1];
604 size_tpos = month.find_last_of(
",");
607 intday = NStr::StringToNumeric<int>(tokens[0]);
608 intyear = NStr::StringToNumeric<int>(tokens[2]);
610year = 1900 + ( ( year > 70 ) ? year : 100 + year );
618<< setfill(
'0') << setw(2)
627vector<string> tokens;
632 string& day = tokens[1];
633 size_tpos = day.find_last_of(
",");
634 if( pos != std::string::npos ) {
638 intday = NStr::StringToNumeric<int>(tokens[1]);
639 intyear = NStr::StringToNumeric<int>(tokens[2]);
641year = 1900 + ( ( year > 70 ) ? year : 100 + year );
649<< setfill(
'0') << setw(2)
657vector<string> tokens;
660 intyear = NStr::StringToNumeric<int>(tokens[1]);
662year = 1900 + ( ( year > 70 ) ? year : 100 + year );
674vector<string> tokens;
677 intyear = NStr::StringToNumeric<int>(tokens[0]);
679year = 1900 + ( ( year > 70 ) ? year : 100 + year );
692vector<string> tokens;
695 intmonth = NStr::StringToNumeric<int>(tokens[1]);
700<< setfill(
'0') << setw(2)
709vector<string> tokens;
712 intmonth = NStr::StringToNumeric<int>(tokens[0]);
717<< setfill(
'0') << setw(2)
725 intyear = NStr::StringToNumeric<int>(
value);
737 intyear = NStr::StringToNumeric<int>(
value);
void transform(Container &c, UnaryFunction *op)
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
NCBI_EXCEPTION_DEFAULT(CAmbiguousDateException, CException)
T & Get(void)
Create the variable if not created yet, return the reference.
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
TParse_rule & operator=(TParse_rule const &other)
void Swap(TParse_rule &lhs, TParse_rule &rhs)
CRegexp & GetRegexp() const
string const & GetRegexpStr() const
string MakeTransform(string const &value) const
string const & GetTag() const
TParse_rule(TParse_rule const &rhs)
TParse_rule(string const &tag, string const ®ex, TFun_transform transform)
shared_ptr< CRegexp > m_Regexp
TFun_transform m_Transform
static vector< TParse_rule > const & get_date_rule_collection()
static string transform_month_DD_YYYY(string const &value)
static string transform_identity(string const &value)
static vector< TFun_transform_other > const & get_date_range_rule_collection()
static string transform_MM_YYYY(string const &value)
static string transform_DD_mm_YYYY(string const &value)
static const char * kTransform_code_iso8601
static string transform_range_before(string const &value)
static const char * transfrom_code_range_iso8601
static const char * kTransform_code_cast_iso8601
static pair< string, string > extract_date_iso8601(string const &value, vector< TParse_rule > const &rules, vector< TFun_transform_other > const &range_rules, TFun_transform_other ambig_rule)
static string transform_YYYY_MM(string const &value)
static string transform_DD_month_comma_YYYY(string const &value)
static string transform_YYYY_month(string const &value)
string(* TFun_transform)(string const &)
static const char * kTransform_code_cast_na
pair< string, string >(* TFun_transform_other)(string const &)
const char * get_month_code_by_name(string const &month_name)
static const char * kTransform_code_no_date
static string transform_missing(string const &value)
static string transform_range_decade(string const &value)
static string transform_mm_DD_YYYY(string const &value)
static string transform_month_YYYY(string const &value)
static const char * kTransform_code_cast_ambig
static pair< string, string > transform_ambiguous_date(string const &value)
static string transform_DD_month_YYYY(string const &value)
static const char * kTransform_code_range_cast_iso8601
static string transform_YYYY_mm_DD(string const &value)
static TFun_transform_other get_transform_for_ambiguous_date()
static pair< string, string > transform_range(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
EErrCode
Error types that an application can generate.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
pair< string, string > ConvertDateTo_iso8601_and_annotate(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601, with annotation.
string ConvertDateTo_iso8601(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601.
CTempString GetSub(CTempString str, size_t idx=0) const
Get pattern/subpattern from previous GetMatch().
CTempString GetMatch(CTempString str, size_t offset=0, size_t idx=0, TMatch flags=fMatch_default, bool noreturn=false)
Get matching pattern and subpatterns.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static const char * month_name[]
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
const GenericPointer< typename T::ValueType > T2 value
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4