fromEnv =
env.Get(
"NCBI_GEO_LOC_NAME_FOR_COUNTRY");
74 if(fromEnv ==
"true") {
76}
else if(fromEnv ==
"false") {
81 stringfromConfig = reg.
GetString(
"OrgSubSource",
"UseGeoLocNameForCountry",
"off");
83 if(fromConfig ==
"1"|| fromConfig ==
"on"|| fromConfig ==
"true"|| fromConfig ==
"yes") {
131replace(name.begin(), name.end(),
'_',
'-');
132replace(name.begin(), name.end(),
' ',
'-');
141 if(name ==
"insertion-seq") {
143}
else if(name ==
"plasmid") {
145}
else if(name ==
"transposon") {
147}
else if(name ==
"sub-clone") {
161replace(name.begin(), name.end(),
'_',
'-');
162replace(name.begin(), name.end(),
' ',
'-');
172 if(name ==
"insertion-seq"||
174name ==
"transposon"||
175name ==
"sub-clone") {
266 if(month < 1 || month > 12 || day < 1) {
272}
else if(year > 3000) {
274}
else if(year < 1538) {
277 CTimemonth_o(year, month, 1);
289 "collection-date string is blank");
293 if(IsISOFormatDate(
str)) {
294 returnGetDateFromISODate(
str);
307month =
str.substr(0, pos);
308year =
str.substr(pos + 1);
311 "collection-date string is improperly formatted");
314day =
str.substr(0, pos);
315month =
str.substr(pos + 1, pos2 - pos - 1);
316year =
str.substr(pos2 + 1);
319 "collection-date string is improperly formatted");
330 "collection-date string has invalid month");
340 "collection-date string has invalid day value");
342}
catch(
constexception& ) {
345 "collection-date string is improperly formatted");
351 "collection-date string is improperly formatted");
357}
catch(
constexception& ) {
360 "collection-date string is improperly formatted");
370 if(year_val < 1000) {
372 "collection-date year is out of range");
375 if(year_val >= 2100) {
377 "collection-date year is out of range");
380 if(day_val > 0 && month_val > 0 && !IsDayValueOkForMonth(day_val, month_val, year_val)) {
382 "collection-date day is greater than monthly maximum");
415 boolin_future =
false;
417vector<string> pieces;
419 if(pieces.size() > 2) {
422 ITERATE(vector<string>, it, pieces) {
458vector<string> pieces;
460 if(pieces.size() > 2) {
463}
else if(pieces.size() == 2) {
464 boolfirst_bad =
false;
465 boolfirst_future =
false;
466 boolsecond_bad =
false;
467 boolsecond_future =
false;
470bad_format = first_bad || second_bad;
472in_future = first_future || second_future;
485 size_tpos2 =
NStr::Find(date_string,
"-", pos + 1);
486 if(pos2 !=
NPOS&& pos != 2) {
507vector<string> pieces;
509 if(pieces.size() > 2) {
511}
else if(pieces.size() == 2) {
536 size_tpos2 =
NStr::Find(date_string,
"-", pos + 1);
537 if(pos2 !=
NPOS&& pos != 2) {
563 "missing: control sample",
564 "missing: data agreement established pre-2023",
565 "missing: endangered species",
566 "missing: human-identifiable",
567 "missing: lab stock",
568 "missing: sample group",
569 "missing: synthetic construct",
570 "missing: third party data",
574 "restricted access",
580 if(s_Null_CollectionDatesSet.find(date_string.c_str()) != s_Null_CollectionDatesSet.end()) {
581 if(is_influenza_or_Sars2) {
582is_null_and_virus =
true;
583problem = date_string;
587 size_trval = CheckDateFormat(date_string);
588 if(rval & eDateFormatFlag_bad_format) {
589problem =
"Collection_date format is not in DD-Mmm-YYYY format";
590}
else if(rval & eDateFormatFlag_in_future) {
591problem =
"Collection_date is in the future";
592}
else if(rval & eDateFormatFlag_out_of_order) {
593problem =
"Collection_dates are out of order";
601 boolis_null_and_virus =
false;
613 if(second_pos !=
NPOS) {
616 boolmonth_ambig =
false;
617 stringfirst_date =
FixDateFormat(orig_date.substr(0, pos),
true, month_ambig);
621 stringsecond_date =
FixDateFormat(orig_date.substr(pos + delim.
length()),
true, month_ambig);
625 stringfix = first_date +
"/"+ second_date;
632 boolmonth_ambiguous =
false;
634 stringfix =
FixDateFormat(orig_date,
true, month_ambiguous);
635 if(month_ambiguous) {
638 static const char* delimiters[] = {
"/",
" to ",
" and ",
"-",
"_"};
659 if(suffix ==
NPOS) {
661 if(suffix ==
NPOS) {
662 if(require_time_zone) {
665suffix = orig_time.length();
668 if(orig_time.substr(suffix).length() != 6 ||
669!
isdigit((
unsigned char)orig_time[suffix + 1]) ||
670!
isdigit((
unsigned char)orig_time[suffix + 2]) ||
671orig_time[suffix + 3] !=
':'||
672!
isdigit((
unsigned char)orig_time[suffix + 4]) ||
673!
isdigit((
unsigned char)orig_time[suffix + 5])) {
684 if(suffix != 2 && suffix != 5 && suffix != 8) {
688 if(!
isdigit((
unsigned char)orig_time[0]) || !
isdigit((
unsigned char)orig_time[1])) {
696 if(hour < 0 || hour > 23) {
704 if(!
isdigit((
unsigned char)orig_time[3]) || !
isdigit((
unsigned char)orig_time[4])) {
709 if(min < 0 || min > 59) {
718 if(!
isdigit((
unsigned char)orig_time[6]) || !
isdigit((
unsigned char)orig_time[7])) {
726}
else if(sec > 59) {
751 if(cpy.length() != 10 && cpy.length() != 7) {
756string::const_iterator it = cpy.begin();
757 while(it != cpy.end() && rval) {
758 if(pos == 4 || pos == 7) {
772 if(month < 1 || month > 12) {
775 if(cpy.length() == 10) {
791 stringcpy = orig_date;
795 if(time_pos ==
NPOS) {
816 stringcpy = orig_date;
819 if(time_pos !=
NPOS) {
820cpy = cpy.substr(0, time_pos);
828 stringcpy = orig_date;
831 if(time_pos ==
NPOS) {
844 stringcpy = orig_date;
851 if(cpy.length() > 7) {
864vector<string> tokens;
865 stringtoken_delimiters =
" ,-/=_.";
867 stringcpy = orig_date;
871 boolis_chars =
false;
873 if(token_delimiters.find(*s) !=
NPOS) {
875tokens.push_back(curr_token);
879}
else if(is_chars && !
isalpha((
unsigned char)(*s))) {
882tokens.push_back(curr_token);
888tokens.push_back(curr_token);
899tokens.push_back(curr_token);
903 if(tokens.size() > 3) {
904vector<string>::iterator p = tokens.begin();
905 boolprev_is_number =
isdigit((
unsigned char)(*p)[0]);
906vector<string>::iterator s = p;
908 while(s != tokens.end()) {
909 if(prev_is_number &&
916prev_is_number =
false;
920prev_is_number =
isdigit((
unsigned char)(*p)[0]);
929 bool s_ChooseMonthAndDay(
const string& token1,
const string& token2,
boolmonth_first,
string& month,
int& day,
bool& month_ambiguous)
931month_ambiguous =
false;
936 if(val1 > 12 && val2 > 12) {
939}
else if(val1 < 13 && val2 < 13) {
946month_ambiguous =
true;
955}
else if(val1 < 13) {
972 if(orig_date.find_first_not_of(
" ,-/=_.0123456789") !=
NPOS) {
979 stringdelims =
" ,-/=_.";
981 size_t next= orig_date.find_first_of(delims,
prev);
982vector<string> tokens;
984 while(
next!= string::npos) {
987 if(
str.length() == 1) {
991tokens.push_back(
str);
994 next= orig_date.find_first_of(delims,
prev);
997 if(
prev< orig_date.length()) {
998 string str= orig_date.substr(
prev, string::npos);
999 if(
str.length() == 1) {
1003tokens.push_back(
str);
1014month_ambiguous =
false;
1016 stringorig_date =
test;
1027 stringreformatted_date;
1029 intyear = 0, day = 0;
1031 size_tnum_original_tokens = 0;
1035num_original_tokens = tokens.size();
1036 if(tokens.size() < 1 || tokens.size() > 3) {
1042vector<string>::iterator it = tokens.begin();
1043 while(it != tokens.end()) {
1045 boolfound =
false;
1055}
else if(one_token.length() > 0
1056&&
isdigit((
unsigned char)one_token[0])
1065}
else if(
isalpha((
unsigned char)one_token[0])) {
1070 if(one_token.length() > 3) {
1071one_token = one_token.substr(0, 3);
1084 if(this_val <
min) {
1086}
else if(this_val >
max) {
1100it = tokens.erase(it);
1106 if(tokens.size() == 0) {
1108}
else if(tokens.size() > 2) {
1115 if(!
s_ChooseMonthAndDay(tokens[0], tokens[1], month_first, month, day, month_ambiguous)) {
1119month_ambiguous =
true;
1124}
else if(tokens.size() == 1) {
1131 if(
val> 0 &&
val< 13) {
1146 if(tokens.size() == 2) {
1159 if(val1 < 10 && !zero_pad_1 && (val2 > 10 || zero_pad_2)) {
1165}
else if(val2 < 10 && !zero_pad_2 && (val1 > 10 || zero_pad_1)) {
1185 if(!
s_ChooseMonthAndDay(tokens[0], tokens[1], month_first, month, day, month_ambiguous)) {
1202 if(year > 0 && year < 100 && num_original_tokens > 1) {
1207 boolformat_bad =
false;
1208 boolin_future =
false;
1216 if(year >= 1000 && year < 2100) {
1219reformatted_date = month +
"-"+ reformatted_date;
1222 if(day_str.length() < 2) {
1223day_str =
"0"+ day_str;
1225reformatted_date = day_str +
"-"+ reformatted_date;
1230 returnreformatted_date;
1240 if(tokens.size() != 3) {
1249 ITERATE(vector<string>, it, tokens) {
1258vector<int> positions;
1259positions.push_back(0);
1260positions.push_back(0);
1261positions.push_back(0);
1264 ITERATE(vector<int>, it, nums) {
1266 if(positions[
eYear] > 0) {
1271positions[
eYear] = token_pos;
1272}
else if(*it > 12) {
1273 if(positions[
eDay] > 0) {
1278positions[
eDay] = token_pos;
1279}
else if(positions[
eMonth] > 0) {
1284positions[
eMonth] = token_pos;
1288 if(positions[
eDay] < positions[
eMonth]) {
1297 bool& lat_in_range,
bool& lon_in_range,
1298 double& lat_value,
double& lon_value)
1300format_correct =
false;
1301lat_in_range =
false;
1302lon_in_range =
false;
1303precision_correct =
false;
1313}
else if(sscanf (lat_lon.c_str(),
"%lf %c %lf %c%n", &ns, &lat, &ew, &lon, &processed) != 4
1314||
size_t(processed) != lat_lon.length()) {
1316}
else if((lat !=
'N'&& lat !=
'S') || (lon !=
'E'&& lon !=
'W')) {
1323lat_value = 0.0 - ns;
1328lon_value = 0.0 - ew;
1332vector<string> pieces;
1334 if(pieces.size() > 3) {
1338 charreformatted[1000];
1339sprintf (reformatted,
"%.*lf %c %.*lf %c", precision_lat, ns, lat,
1340precision_lon, ew, lon);
1342 size_t len= strlen (reformatted);
1344&& (
len== lat_lon.length()
1345|| (
len< lat_lon.length()
1346&& lat_lon[
len] ==
';'))) {
1347format_correct =
true;
1348 if(ns <= 90 && ns >= 0) {
1349lat_in_range =
true;
1351 if(ew <= 180 && ew >= 0) {
1352lon_in_range =
true;
1354 if(precision_lat < 3 && precision_lon < 3) {
1355precision_correct =
true;
1365 boolformat_correct =
false;
1366 boolprecision_correct =
false;
1367 boollat_in_range =
false;
1368 boollon_in_range =
false;
1369 doublelat_value = 0.0;
1370 doublelon_value = 0.0;
1372lat_in_range, lon_in_range,
1373lat_value, lon_value);
1374 if(!format_correct || !lat_in_range || !lon_in_range || precision_correct) {
1377vector<string> pieces;
1379 if(pieces.size() > 3) {
1382 if(precision_lat > 4) {
1385 if(precision_lon > 4) {
1389 charreformatted[1000];
1390sprintf(reformatted,
"%.*lf %c %.*lf %c", precision_lat,
fabs(lat_value), pieces[1].c_str()[0],
1391precision_lon,
fabs(lon_value), pieces[3].c_str()[0]);
1392 stringnew_val = reformatted;
1412 for(string::const_iterator
i= old_str.begin();
i!= old_str.end(); ++
i)
1417 charc =
static_cast<char>(sym);
1418 if(!
isalpha(c) && !
isdigit(c) && c !=
'.'&& c !=
'-'&& c !=
'+')
1422 else if(!new_str.empty() &&
1429 if(!
isalpha(c) && !
isdigit(c) && c !=
'.'&& c !=
'-'&& c !=
'+')
1445 boolis_number =
true;
1446 for(string::const_iterator
i= old_str.begin();
i!= old_str.end(); ++
i)
1451 charc =
static_cast<char>(sym);
1452 size_tj = new_str.size();
1453 if(j >= 4 && new_str[j-1] ==
' '&& new_str[j-2] ==
'.'&& new_str[j-3] ==
' '&&
isdigit(new_str[j-4]) &&
isdigit(c))
1461 if(!
isdigit(c) && c !=
'+'&& c !=
'-'&& c !=
'.'&& !
isspace(c)) {
1492 static string s_NormalizeTokens(vector<string> &tokens, vector<double> &numbers, vector<string> &anum, vector<int> &
precision, vector<string> &lat_long, vector<string> &nsew)
1494vector<string> pattern;
1495 for(
size_t i= 0;
i< tokens.size();
i++)
1497 string&token = tokens[
i];
1502numbers.push_back(num);
1503anum.push_back(token);
1504pattern.push_back(
"1");
1509=
static_cast<int>(token.length() - token.find(
'.') - 1);
1515vector<string>
tmp;
1517 doublenum0, num1, num2;
1520numbers.push_back(num0);
1521anum.push_back(
tmp[0]);
1522pattern.push_back(
"1");
1524numbers.push_back(num1);
1525anum.push_back(
tmp[1]);
1526pattern.push_back(
"1");
1528numbers.push_back(num2);
1529anum.push_back(
tmp[2]);
1530pattern.push_back(
"1");
1536 if(token ==
"\'"&&
i>= 3 &&
s_IsNumber(tokens[
i- 1]) && tokens[
i- 2] ==
"\'"&&
s_IsNumber(tokens[
i- 3]))
1544pattern.push_back(
"degrees");
1549pattern.push_back(
"\'");
1554pattern.push_back(
"\"");
1556 else if(token ==
","|| token ==
":"|| token ==
"_"|| token ==
"&"|| token ==
"."|| token ==
";"|| token ==
"#"||
NStr::EqualNocase(token,
"and"))
1561pattern.push_back(
"lat");
1562lat_long.push_back(
"lat");
1567pattern.push_back(
"lat");
1568lat_long.push_back(
"long");
1572pattern.push_back(
"N");
1573nsew.push_back(
"N");
1577pattern.push_back(
"N");
1578nsew.push_back(
"S");
1582pattern.push_back(
"N");
1583nsew.push_back(
"E");
1585 else if(token ==
"W"||
NStr::EqualNocase(token,
"west") || token ==
"Wdeg")
1587pattern.push_back(
"N");
1588nsew.push_back(
"W");
1590 else if(token ==
"NW")
1592nsew.push_back(
"N");
1593nsew.push_back(
"W");
1595 else if(token ==
"NE")
1597nsew.push_back(
"N");
1598nsew.push_back(
"E");
1600 else if(token ==
"SW")
1602nsew.push_back(
"S");
1603nsew.push_back(
"W");
1605 else if(token ==
"SE")
1607nsew.push_back(
"S");
1608nsew.push_back(
"E");
1623 if(numbers.size() != 2)
1628 if(lat_long.size() == 2)
1630 if(lat_long.front() ==
"long")
1632 swap(numbers[0], numbers[1]);
1634 if(nsew.size() == 2) {
1635 swap(nsew[0], nsew[1]);
1639 else if(!lat_long.empty())
1644 if(nsew.size() == 2)
1646 if((nsew[0] ==
"E"|| nsew[0] ==
"W") &&
1647(nsew[1] ==
"N"|| nsew[1] ==
"S"))
1649 swap(numbers[0], numbers[1]);
1651 swap(nsew[0], nsew[1]);
1653 if(nsew[0] ==
"N")
1655numbers[0] =
fabs(numbers[0]);
1657 else if(nsew[0] ==
"S")
1659 if(numbers[0] != 0)
1660numbers[0] = -
fabs(numbers[0]);
1667 if(nsew[1] ==
"E")
1669numbers[1] =
fabs(numbers[1]);
1671 else if(nsew[1] ==
"W")
1673 if(numbers[1] != 0)
1674numbers[1] = -
fabs(numbers[1]);
1683 else if(!nsew.empty())
1688 if(lat_long.empty() && nsew.empty() &&
fabs(numbers[0]) > 90 &&
fabs(numbers[1]) < 90)
1690 swap(numbers[0], numbers[1]);
1693 if(
fabs(numbers[0]) > 90 ||
fabs(numbers[1]) > 180)
1702vector<string> tokens;
1704vector<string> lat_long;
1705vector<string> nsew;
1706vector<string> anum;
1708 if(pattern.empty())
1713vector<double> degrees(2, 0);
1714vector<int> prec(2, 0);
1717 if( pattern ==
"1 1"||
1718pattern ==
"1 N 1 N"||
1719pattern ==
"N 1 N 1"||
1720pattern ==
"1 degrees N 1 degrees N"||
1721pattern ==
"lat 1 lat 1"||
1722pattern ==
"1 N lat 1 N lat"||
1723pattern ==
"1 degrees N lat 1 degrees N lat")
1725degrees[0] = numbers[0];
1726degrees[1] = numbers[1];
1730 else if((pattern ==
"1 1 \" 1 1 '"||
1731pattern ==
"1 degrees 1 \" N 1 degrees 1 ' N")
1732&& numbers[1] < 60 && numbers[3] < 60
1733&& numbers[1] >= 0 && numbers[3] >= 0)
1735sign1 = anum[0][0] ==
'-'? -1 : 1;
1736sign2 = anum[2][0] ==
'-'? -1 : 1;
1737degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 3600);
1738degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60);
1742 else if( (pattern ==
"1 1 ' 1"||
1743pattern ==
"1 degrees 1 ' N 1 degrees N")
1747sign1 = anum[0][0] ==
'-'? -1 : 1;
1748degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1749degrees[1] = numbers[2];
1753 else if(pattern ==
"1 1 ' 1 \" 1" 1754&& numbers[1] < 60 && numbers[2] < 60
1755&& numbers[1] >= 0 && numbers[2] >= 0)
1757sign1 = anum[0][0] ==
'-'? -1 : 1;
1758degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1759degrees[1] = numbers[3];
1763 else if((pattern ==
"1 1 ' 1 \" 1 1 '"||
1764pattern ==
"1 1 1 N 1 1 N"||
1765pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 ' N")
1766&& numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60
1767&& numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0)
1769sign1 = anum[0][0] ==
'-'? -1 : 1;
1770sign2 = anum[3][0] ==
'-'? -1 : 1;
1771degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1772degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 60);
1776 else if(( pattern ==
"1 1 ' 1 \" 1 1 ' 1 \""||
1777pattern ==
"1 1 ' 1 \" N 1 1 ' 1 \" N"||
1778pattern ==
"1 degrees 1 ' 1 \" 1 degrees 1 ' 1 \""||
1779pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 ' 1 \" N"||
1780pattern ==
"N 1 degrees 1 ' 1 \" N 1 degrees 1 ' 1 \""||
1781pattern ==
"1 degrees 1 ' 1 N 1 degrees 1 ' 1 N"||
1782pattern ==
"1 degrees 1 1 N 1 degrees 1 1 N"||
1783pattern ==
"1 1 1 N 1 1 1 N")
1784&& numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60 && numbers[5] < 60
1785&& numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0 && numbers[5] >= 0)
1787sign1 = anum[0][0] ==
'-'? -1 : 1;
1788sign2 = anum[3][0] ==
'-'? -1 : 1;
1789degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1790degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 60 + numbers[5] / 3600);
1794 else if(( pattern ==
"1 1 ' 1 1 '"||
1795pattern ==
"1 1 N 1 1 N"||
1796pattern ==
"1 1 ' N 1 1 ' N"||
1797pattern ==
"1 degrees 1 ' N 1 degrees 1 ' N"||
1798pattern ==
"lat 1 degrees 1 ' N lat 1 degrees 1 ' N"||
1799pattern ==
"1 degrees 1 N 1 degrees 1 N"||
1800pattern ==
"1 degrees 1 N 1 degrees 1 ' N"||
1801pattern ==
"1 degrees 1 ' N 1 degrees 1 N"||
1802pattern ==
"N 1 degrees 1 ' N 1 degrees 1"||
1803pattern ==
"N 1 degrees 1 ' N 1 degrees 1 '"||
1804pattern ==
"N 1 degrees 1 ' N 1 1 '")
1805&& numbers[1] < 60 && numbers[3] < 60
1806&& numbers[1] >= 0 && numbers[3] >= 0)
1808sign1 = anum[0][0] ==
'-'? -1 : 1;
1809sign2 = anum[2][0] ==
'-'? -1 : 1;
1810degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1811degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60);
1815 else if((pattern ==
"1 N 1 1 N"||
1816pattern ==
"1 degrees N 1 degrees 1 ' N")
1820sign2 = anum[1][0] ==
'-'? -1 : 1;
1821degrees[0] = numbers[0];
1822degrees[1] = sign2*(
fabs(numbers[1]) + numbers[2] / 60);
1826 else if((pattern ==
"1 degrees 1 ' 1 degrees 1 ' 1 \""||
1827pattern ==
"N 1 1 N 1 1 1")
1828&& numbers[1] < 60 && numbers[3] < 60 && numbers[4] < 60
1829&& numbers[1] >= 0 && numbers[3] >= 0 && numbers[4] >= 0)
1831sign1 = anum[0][0] ==
'-'? -1 : 1;
1832sign2 = anum[2][0] ==
'-'? -1 : 1;
1833degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1834degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60 + numbers[4] / 3600);
1838 else if(pattern ==
"1 degrees 1 degrees 1 ' 1 \"" 1839&& numbers[2] < 60 && numbers[3] < 60
1840&& numbers[2] >= 0 && numbers[3] >= 0)
1842sign2 = anum[1][0] ==
'-'? -1 : 1;
1843degrees[0] = numbers[0];
1844degrees[1] = sign2*(
fabs(numbers[1]) + numbers[2] / 60 + numbers[3] / 3600);
1848 else if(pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 \" N" 1849&& numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60
1850&& numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0)
1852sign1 = anum[0][0] ==
'-'? -1 : 1;
1853sign2 = anum[3][0] ==
'-'? -1 : 1;
1854degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1855degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 3600);
1864 swap(degrees, numbers);
1872 stringnorth_or_south;
1874 stringeast_or_west;
1880 if(ch < '0' || ch >
'9') {
1886lat_lon_stream >> lat;
1887lat_lon_stream >> north_or_south;
1888lat_lon_stream >> lon;
1889lat_lon_stream >> east_or_west;
1890 if( lat_lon_stream.bad() ) {
1894 if( north_or_south !=
"N"&& north_or_south !=
"S") {
1898 if( east_or_west !=
"E"&& east_or_west !=
"W") {
1904 size_t len= lat.length();
1905 if(pos + 9 <
len) {
1912 size_t len= lon.length();
1913 if(pos + 9 <
len) {
1918 returnlat +
" "+ north_or_south +
" "+ lon +
" "+ east_or_west;
1935vector<double> numbers;
1939 if(!numbers.empty())
1952 if(lat_value < 0) {
1954lat_value = -lat_value;
1957 if(lon_value < 0) {
1959lon_value = -lon_value;
1966 stringres = lat +
" "+ ns +
" "+ lon +
" "+ ew;
1975 boolgoodmatch =
false;
1981 id->SetGuessCountry(guess->
GetLevel0());
1982 id->SetGuessProvince(guess->
GetLevel1());
1999 doublelanddistance = 0.0;
2003 id->SetClosestCountry(guess->
GetLevel0());
2004 id->SetClosestProvince(guess->
GetLevel1());
2013 doublelanddistance = 0.0;
2017 id->SetClosestCountry(guess->
GetLevel0());
2018 id->SetClosestProvince(guess->
GetLevel1());
2026 doublewaterdistance = 0.0;
2029 id->SetClosestWater(guess->
GetLevel0());
2040 doubledistance = 0.0;
2045 id->SetGuessCountry(country);
2046 id->SetGuessProvince(province);
2068{
"Adriatic Sea",
"Mediterranean Sea"},
2069{
"Aegean Sea",
"Mediterranean Sea"},
2070{
"Alboran Sea",
"Mediterranean Sea"},
2071{
"Andaman Sea",
"Indian Ocean"},
2072{
"Arabian Sea",
"Indian Ocean"},
2073{
"Argentine Sea",
"Atlantic Ocean"},
2074{
"Ariake Sea",
"Pacific Ocean"},
2075{
"Baffin Bay",
"Atlantic Ocean"},
2076{
"Balearic Sea",
"Mediterranean Sea"},
2077{
"Baltic Sea",
"Atlantic Ocean"},
2078{
"Barents Sea",
"Arctic Ocean"},
2079{
"Bay of Bengal",
"Indian Ocean"},
2080{
"Beaufort Sea",
"Arctic Ocean"},
2081{
"Bering Sea",
"Pacific Ocean"},
2082{
"Bismarck Sea",
"Pacific Ocean"},
2083{
"Black Sea",
"Mediterranean Sea"},
2084{
"Bohai Sea",
"Pacific Ocean"},
2085{
"Caribbean Sea",
"Atlantic Ocean"},
2086{
"Celebes Sea",
"Pacific Ocean"},
2087{
"Champlain Sea",
"Atlantic Ocean"},
2088{
"Chilean Sea",
"Pacific Ocean"},
2089{
"China Seas",
"Pacific Ocean"},
2090{
"Chukchi Sea",
"Arctic Ocean"},
2091{
"Coral Sea",
"Pacific Ocean"},
2092{
"Davis Strait",
"Atlantic Ocean"},
2093{
"East China Sea",
"Pacific Ocean"},
2094{
"East Siberian Sea",
"Arctic Ocean"},
2095{
"English Channel",
"Atlantic Ocean"},
2096{
"Erythraean Sea",
"Indian Ocean"},
2097{
"Golfo de California",
"Pacific Ocean"},
2098{
"Greenland Sea",
"Arctic Ocean"},
2099{
"Gulf of Mexico",
"Atlantic Ocean"},
2100{
"Gulf of Thailand",
"Pacific Ocean"},
2101{
"Gulf of Tonkin",
"Pacific Ocean"},
2102{
"Hudson Bay",
"Arctic Ocean"},
2103{
"Ionian Sea",
"Mediterranean Sea"},
2104{
"Irish Sea",
"Atlantic Ocean"},
2105{
"Irminger Sea",
"Atlantic Ocean"},
2106{
"James Bay",
"Atlantic Ocean"},
2107{
"Java Sea",
"Indian Ocean"},
2108{
"Kara Sea",
"Arctic Ocean"},
2109{
"Koro Sea",
"Pacific Ocean"},
2110{
"Labrador Sea",
"Atlantic Ocean"},
2111{
"Laccadive Sea",
"Indian Ocean"},
2112{
"Laptev Sea",
"Arctic Ocean"},
2113{
"Ligurian Sea",
"Mediterranean Sea"},
2114{
"Lincoln Sea",
"Arctic Ocean"},
2115{
"Myrtoan Sea",
"Mediterranean Sea"},
2116{
"North Sea",
"Atlantic Ocean"},
2117{
"Norwegian Sea",
"Atlantic Ocean"},
2118{
"Pechora Sea",
"Arctic Ocean"},
2119{
"Persian Gulf",
"Indian Ocean"},
2120{
"Philippine Sea",
"Pacific Ocean"},
2121{
"Red Sea",
"Indian Ocean"},
2122{
"Salish Sea",
"Pacific Ocean"},
2123{
"Sargasso Sea",
"Atlantic Ocean"},
2124{
"Scotia Sea",
"Southern Ocean"},
2125{
"Sea of Azov",
"Black Sea"},
2126{
"Sea of Chiloe",
"Pacific Ocean"},
2127{
"Sea of Crete",
"Mediterranean Sea"},
2128{
"Sea of Japan",
"Pacific Ocean"},
2129{
"Sea of Okhotsk",
"Pacific Ocean"},
2130{
"Sea of the Hebrides",
"Atlantic Ocean"},
2131{
"Sea of Zanj",
"Indian Ocean"},
2132{
"Seas of Greenland",
"Atlantic Ocean"},
2133{
"Sethusamudram",
"Indian Ocean"},
2134{
"Sibutu Passage",
"Pacific Ocean"},
2135{
"Solomon Sea",
"Pacific Ocean"},
2136{
"South China Sea",
"Pacific Ocean"},
2137{
"Sulu Sea",
"Pacific Ocean"},
2138{
"Tasman Sea",
"Pacific Ocean"},
2139{
"Thracian Sea",
"Mediterranean Sea"},
2140{
"Timor Sea",
"Indian Ocean"},
2141{
"Tyrrhenian Sea",
"Mediterranean Sea"},
2142{
"Wandel Sea",
"Arctic Ocean"},
2143{
"White Sea",
"Arctic Ocean"},
2144{
"Yellow Sea",
"Pacific Ocean"}
2153 if( new_water_pair_iter != sc_WaterPairMap.end() ) {
2154 returnnew_water_pair_iter->second;
2163 stringcountryname = input_countryname;
2168 boolformat_correct, lat_in_range, lon_in_range, precision_correct;
2169 doublelat_value = 0.0, lon_value = 0.0;
2171lat_in_range, lon_in_range,
2172lat_value, lon_value);
2173 if(!format_correct) {
2177lat_lon = lat_lon.substr(0, pos);
2179lat_in_range, lon_in_range,
2180lat_value, lon_value);
2185 if(!format_correct || !lat_in_range || !lon_in_range) {
2195countryname = countryname.substr(0, pos);
2199countryname = countryname.substr(0, pos);
2204countryname =
"Svalbard";
2207 stringcountry = countryname;
2213province = country.substr(pos + 1);
2216country = country.substr(0, pos);
2247 stringwguess =
id->GetGuessWater();
2248 stringcguess =
id->GetGuessCountry();
2252 if(province.empty() &&
NStr::Equal(cguess, country)) {
2272 doubleneardist = 0.0;
2277 id->SetGuessCountry (country);
2279 flags=
id->Classify(country, province);
2283&& !
x_GetLatLonWaterMap().IsNearLatLon(lat_value, lon_value, 20.0, neardist, country)) {
2286adjusted_flags = adjust_id ==
NULL? 0 : adjust_id->
Classify(country, province);
2287 if(adjusted_flags) {
2293 flags= adjusted_flags;
2301adjusted_flags = adjust_id ==
NULL? 0 : adjust_id->
Classify(country, province);
2302 if(adjusted_flags) {
2308 flags= adjusted_flags;
2316adjusted_flags = adjust_id ==
NULL? 0 : adjust_id->
Classify(country, province);
2317 if(adjusted_flags) {
2323 flags= adjusted_flags;
2340 error=
"Latitude and longitude values appear to be exchanged";
2344 if(lat_value < 0.0) {
2345 error=
"Latitude should be set to N (northern hemisphere)";
2347 error=
"Latitude should be set to S (southern hemisphere)";
2349lat_lon =
MakeLatLon(-lat_value, lon_value);
2352 if(lon_value < 0.0) {
2353 error=
"Longitude should be set to E (eastern hemisphere)";
2355 error=
"Longitude should be set to W (western hemisphere)";
2357lat_lon =
MakeLatLon(lat_value, -lon_value);
2365 stringfull_guess =
id->GetFullGuess();
2368 error=
"Lat_lon "+ lat_lon +
" is in "+
id->GetFullGuess()
2369+
" (more specific than "+ country +
")";
2374 boolsuppress =
false;
2375 stringreportregion;
2377 stringdesphrase =
"designated subregion ";
2378 stringsubphrase =
"another subregion ";
2379 stringphrase = nosubphrase;
2380 boolshow_claimed =
false;
2392reportregion = countryname;
2398reportregion =
id->GetClosestFull();
2400reportregion =
id->GetClosestCountry();
2403show_claimed =
true;
2406 stringwater =
id->GetGuessWater();
2419}
else if(!suppress) {
2422 error=
"Lat_lon '"+ lat_lon +
"' is closest to "+ phrase +
"'"+ reportregion +
"' at distance " 2424+
" km, but in water '"+
id->GetGuessWater()
2425+
"' - claimed region '"+
id->GetClaimedFull()
2428 error=
"Lat_lon '"+ lat_lon +
"' is closest to "+ phrase +
"'"+ reportregion
2430+
id->GetGuessWater() +
"'";
2433}
else if(neardist > 0.0) {
2435 error=
"Lat_lon '"+ lat_lon +
"' is in water '"+
id->GetGuessWater() +
"', '" 2439 error=
"Lat_lon '"+ lat_lon +
"' is in water '"+
id->GetGuessWater() +
"'";
2442 stringfull_guess =
id->GetFullGuess();
2451 error=
"Lat_lon '"+ lat_lon +
"' maps to '"+
id->GetFullGuess() +
"' instead of '" 2452+ countryname +
"'";
2456 error=
"Lat_lon '"+ lat_lon +
"' maps to '"+
id->GetFullGuess() +
"' instead of '" 2457+ country +
"' - claimed region '"+
id->GetClaimedFull()
2465 error=
"Lat_lon '"+ lat_lon +
"' maps to '"+
id->GetFullGuess() +
"' instead of '" 2466+ countryname +
"' - claimed region '"+
id->GetClaimedFull()
2475 error=
"Lat_lon '"+ lat_lon +
"' is closest to '"+
id->GetClosestCountry() +
"' instead of '" 2476+ countryname +
"'";
2479 error=
"Lat_lon '"+ lat_lon +
"' is closest to '"+
id->GetClosestWater() +
"' instead of '" 2480+ countryname +
"'";
2483 error=
"Unable to determine mapping for lat_lon '"+ lat_lon +
"' and country '"+ countryname +
"'";
2513 "pooled males and females",
2514 "pooled male and female",
2525 if(find(begin, end,
value) != end) {
2543vector<string> words;
2545 if(words.size() == 0) {
2554 boolis_good =
false;
2556 ITERATE(vector<string>, w, words) {
2560 if(find(begin, end, *w) != end) {
2581vector<string> words;
2584 if(words.size() == 0) {
2592vector<string> good_values;
2593 boolpooled =
false;
2595 ITERATE(vector<string>, w, words) {
2602 if(find(begin, end, *w) != end) {
2604good_values.push_back(
"male");
2606good_values.push_back(
"female");
2608good_values.push_back(*w);
2616 if(good_values.size() == 0) {
2621 stringfixed = good_values[0];
2622 for(
size_t i= 1;
i< good_values.size();
i++) {
2623 if(good_values.size() > 2) {
2626 if(
i== good_values.size() - 1) {
2629fixed +=
" "+ good_values[
i];
2632fixed =
"pooled "+ fixed;
2647string::const_iterator it =
value.begin();
2648 if(*it ==
'+'|| *it ==
'-') {
2653 boolany_digit =
false;
2654 boolskip_comma =
true;
2655 while(it !=
value.end() && (
isdigit(*it) || *it ==
',')) {
2659skip_comma =
false;
2670 if(it ==
value.end()) {
2685 if(it ==
value.end() || *it !=
' '|| !any_digit) {
2691 while(it !=
value.end()) {
2729 charreformatted[1000];
2731 stringrval = reformatted;
2760rval =
number+
" "+
"m";
2778}
else if(
value.length() > 240) {
2782 for(
autoit :
value) {
2831 stringgenus = taxname.substr(0, pos);
2836 stringspecies = taxname.substr(pos + 1);
2840 if(pos != 1 ||
value[0] !=
'p') {
2861}
else if(
value.length() > 32) {
2870 static const char* s_ForbiddenPhrases[] = {
2882 for(
autoit : s_ForbiddenPhrases) {
2945 if(s_PlasmidNameExceptions.find(
value) != end(s_PlasmidNameExceptions)) {
2962 #include "cell_line.inc" 2966vector<string> tokens;
2968 if(tokens.size() < 4) {
2970<<
"; disregarding");
2987 size_t count=
sizeof(kCellLine) /
sizeof(*kCellLine);
2988 const char*
const* start = kCellLine;
3003 stringcell_line_search = cell_line;
3007rval =
"The International Cell Line Authentication Committee database indicates that "+
3008cell_line +
" from "+ organism +
" is known to be contaminated by "+
3011 ". Please see http://iclac.org/databases/cross-contaminations/ for more information and references.";
3032 "Antigua and Barbuda",
3037 "Ashmore and Cartier Islands",
3057 "Bosnia and Herzegovina",
3061 "British Virgin Islands",
3071 "Central African Republic",
3075 "Christmas Island",
3076 "Clipperton Island",
3081 "Coral Sea Islands",
3089 "Democratic Republic of the Congo",
3093 "Dominican Republic",
3097 "Equatorial Guinea",
3103 "Falkland Islands (Islas Malvinas)",
3109 "French Polynesia",
3110 "French Southern and Antarctic Lands",
3118 "Glorioso Islands",
3130 "Heard Island and McDonald Islands",
3152 "Juan de Nova Island",
3155 "Kerguelen Archipelago",
3178 "Marshall Islands",
3183 "Mediterranean Sea",
3185 "Micronesia, Federated States of",
3210 "Northern Mariana Islands",
3218 "Papua New Guinea",
3223 "Pitcairn Islands",
3228 "Republic of the Congo",
3234 "Saint Barthelemy",
3236 "Saint Kitts and Nevis",
3239 "Saint Pierre and Miquelon",
3240 "Saint Vincent and the Grenadines",
3243 "Sao Tome and Principe",
3256 "South Georgia and the South Sandwich Islands",
3263 "State of Palestine",
3279 "Trinidad and Tobago",
3284 "Turks and Caicos Islands",
3289 "United Arab Emirates",
3298 "Wallis and Futuna",
3318 "Netherlands Antilles",
3319 "Serbia and Montenegro",
3322 "The former Yugoslav Republic of Macedonia",
3332 "missing: control sample",
3333 "missing: data agreement established pre-2023",
3334 "missing: endangered species",
3335 "missing: human-identifiable",
3336 "missing: lab stock",
3337 "missing: sample group",
3338 "missing: synthetic construct",
3339 "missing: third party data",
3349 stringname = country;
3350 size_tpos = country.find(
':');
3352 if( pos !=
NPOS) {
3353 if(pos == country.length() - 1) {
3356name = country.substr(0, pos);
3372 bool CCountries::IsValid(
const string& country,
bool& is_miscapitalized,
bool& is_null_and_virus,
boolis_influenza_or_Sars2)
3374is_miscapitalized =
false;
3376 stringname = country;
3377 size_tpos = country.find(
':');
3379 if( pos !=
NPOS) {
3380name = country.substr(0, pos);
3381 if(pos == country.length() - 1) {
3395 if(is_influenza_or_Sars2) {
3396is_null_and_virus =
true;
3403is_miscapitalized =
true;
3409is_miscapitalized =
true;
3415is_miscapitalized =
true;
3426 boolis_null_and_virus =
false;
3427 return IsValid(country, is_miscapitalized, is_null_and_virus,
false);
3433 stringname = country;
3434 size_tpos = country.find(
':');
3436 if( pos !=
NPOS) {
3437name = country.substr(0, pos);
3447is_miscapitalized =
false;
3449 stringname = country;
3450 size_tpos = country.find(
':');
3452 if( pos !=
NPOS) {
3453name = country.substr(0, pos);
3464is_miscapitalized =
true;
3476{
"england",
"United Kingdom: England"},
3477{
"great britain",
"United Kingdom: Great Britain"},
3478{
"new jersey, usa",
"USA: New Jersey"}
3484{
"ABW",
"Aruba"},
3485{
"AFG",
"Afghanistan"},
3486{
"AGO",
"Angola"},
3487{
"AIA",
"Anguilla"},
3488{
"ALA",
"Aland Islands"},
3489{
"ALB",
"Albania"},
3490{
"AND",
"Andorra"},
3491{
"ARE",
"United Arab Emirates"},
3492{
"ARG",
"Argentina"},
3493{
"ARM",
"Armenia"},
3494{
"ASM",
"American Samoa"},
3495{
"ATA",
"Antarctica"},
3496{
"ATF",
"French Southern Territories"},
3497{
"ATG",
"Antigua and Barbuda"},
3498{
"AUS",
"Australia"},
3499{
"AUT",
"Austria"},
3500{
"AZE",
"Azerbaijan"},
3501{
"Antigua & Barbuda",
"Antigua and Barbuda"},
3502{
"Ashmore & Cartier Islands",
"Ashmore and Cartier Islands"},
3503{
"BDI",
"Burundi"},
3504{
"BEL",
"Belgium"},
3505{
"BEN",
"Benin"},
3506{
"BES",
"Bonaire, Sint Eustatius and Saba"},
3507{
"BFA",
"Burkina Faso"},
3508{
"BGD",
"Bangladesh"},
3509{
"BGR",
"Bulgaria"},
3510{
"BHR",
"Bahrain"},
3511{
"BHS",
"Bahamas"},
3512{
"BIH",
"Bosnia and Herzegovina"},
3513{
"BLM",
"Saint Barthelemy"},
3514{
"BLR",
"Belarus"},
3515{
"BLZ",
"Belize"},
3516{
"BMU",
"Bermuda"},
3517{
"BOL",
"Bolivia"},
3518{
"BRA",
"Brazil"},
3519{
"BRB",
"Barbados"},
3520{
"BRN",
"Brunei"},
3521{
"BTN",
"Bhutan"},
3522{
"BVT",
"Bouvet Island"},
3523{
"BWA",
"Botswana"},
3524{
"Brasil",
"Brazil"},
3525{
"CAF",
"Central African Republic"},
3526{
"CAN",
"Canada"},
3527{
"CCK",
"Cocos Islands"},
3528{
"CHE",
"Switzerland"},
3529{
"CHL",
"Chile"},
3530{
"CHN",
"China"},
3531{
"CIV",
"Cote d'Ivoire"},
3532{
"CMR",
"Cameroon"},
3533{
"COD",
"Democratic Republic of the Congo"},
3534{
"COG",
"Republic of the Congo"},
3535{
"COK",
"Cook Islands"},
3536{
"COL",
"Colombia"},
3537{
"COM",
"Comoros"},
3538{
"CPV",
"Cape Verde"},
3539{
"CRI",
"Costa Rica"},
3541{
"CUW",
"Curacao"},
3542{
"CXR",
"Christmas Island"},
3543{
"CYM",
"Cayman Islands"},
3544{
"CYP",
"Cyprus"},
3545{
"CZE",
"Czechia"},
3546{
"Cape Verde Islands",
"Cape Verde"},
3547{
"DEU",
"Germany"},
3548{
"DJI",
"Djibouti"},
3549{
"DMA",
"Dominica"},
3550{
"DNK",
"Denmark"},
3551{
"DOM",
"Dominican Republic"},
3552{
"DZA",
"Algeria"},
3553{
"Democratic Republic of Congo",
"Democratic Republic of the Congo"},
3554{
"ECU",
"Ecuador"},
3555{
"EGY",
"Egypt"},
3556{
"ERI",
"Eritrea"},
3557{
"ESH",
"Western Sahara"},
3558{
"ESP",
"Spain"},
3559{
"EST",
"Estonia"},
3560{
"ETH",
"Ethiopia"},
3561{
"FIN",
"Finland"},
3563{
"FLK",
"Falkland Islands (Islas Malvinas)"},
3564{
"FRA",
"France"},
3565{
"FRO",
"Faroe Islands"},
3566{
"FSM",
"Micronesia, Federated States of"},
3567{
"Falkland Islands",
"Falkland Islands (Islas Malvinas)"},
3568{
"French Southern & Antarctic Lands",
"French Southern and Antarctic Lands"},
3569{
"GAB",
"Gabon"},
3570{
"GBR",
"United Kingdom"},
3571{
"GEO",
"Georgia"},
3572{
"GGY",
"Guernsey"},
3573{
"GHA",
"Ghana"},
3574{
"GIB",
"Gibraltar"},
3575{
"GIN",
"Guinea"},
3576{
"GLP",
"Guadeloupe"},
3577{
"GMB",
"Gambia"},
3578{
"GNB",
"Guinea-Bissau"},
3579{
"GNQ",
"Equatorial Guinea"},
3580{
"GRC",
"Greece"},
3581{
"GRD",
"Grenada"},
3582{
"GRL",
"Greenland"},
3583{
"GTM",
"Guatemala"},
3584{
"GUF",
"French Guiana"},
3586{
"GUY",
"Guyana"},
3587{
"HKG",
"Hong Kong"},
3588{
"HMD",
"Heard Island and McDonald Islands"},
3589{
"HND",
"Honduras"},
3590{
"HRV",
"Croatia"},
3591{
"HTI",
"Haiti"},
3592{
"HUN",
"Hungary"},
3593{
"Heard Island & McDonald Islands",
"Heard Island and McDonald Islands"},
3594{
"IDN",
"Indonesia"},
3595{
"IMN",
"Isle of Man"},
3596{
"IND",
"India"},
3597{
"IOT",
"British Indian Ocean Territory"},
3598{
"IRL",
"Ireland"},
3601{
"ISL",
"Iceland"},
3602{
"ISR",
"Israel"},
3603{
"ITA",
"Italy"},
3604{
"Ivory Coast",
"Cote d'Ivoire"},
3605{
"JAM",
"Jamaica"},
3606{
"JEY",
"Jersey"},
3607{
"JOR",
"Jordan"},
3608{
"JPN",
"Japan"},
3609{
"KAZ",
"Kazakhstan"},
3610{
"KEN",
"Kenya"},
3611{
"KGZ",
"Kyrgyzstan"},
3612{
"KHM",
"Cambodia"},
3613{
"KIR",
"Kiribati"},
3614{
"KNA",
"Saint Kitts and Nevis"},
3615{
"KOR",
"South Korea"},
3616{
"KWT",
"Kuwait"},
3617{
"LAO",
"Lao People's Democratic Republic"},
3618{
"LBN",
"Lebanon"},
3619{
"LBR",
"Liberia"},
3620{
"LBY",
"Libyan Arab Jamahiriya"},
3621{
"LCA",
"Saint Lucia"},
3622{
"LIE",
"Liechtenstein"},
3623{
"LKA",
"Sri Lanka"},
3624{
"LSO",
"Lesotho"},
3625{
"LTU",
"Lithuania"},
3626{
"LUX",
"Luxembourg"},
3627{
"LVA",
"Latvia"},
3628{
"La Reunion Island",
"Reunion"},
3629{
"Luxemburg",
"Luxembourg"},
3630{
"MAC",
"Macao"},
3631{
"MAF",
"Saint Martin (French part)"},
3632{
"MAR",
"Morocco"},
3633{
"MCO",
"Monaco"},
3634{
"MDA",
"Moldova"},
3635{
"MDG",
"Madagascar"},
3636{
"MDV",
"Maldives"},
3637{
"MEX",
"Mexico"},
3638{
"MHL",
"Marshall Islands"},
3639{
"MKD",
"North Macedonia"},
3641{
"MLT",
"Malta"},
3642{
"MMR",
"Myanmar"},
3643{
"MNE",
"Montenegro"},
3644{
"MNG",
"Mongolia"},
3645{
"MNP",
"Northern Mariana Islands"},
3646{
"MOZ",
"Mozambique"},
3647{
"MRT",
"Mauritania"},
3648{
"MSR",
"Montserrat"},
3649{
"MTQ",
"Martinique"},
3650{
"MUS",
"Mauritius"},
3651{
"MWI",
"Malawi"},
3652{
"MYS",
"Malaysia"},
3653{
"MYT",
"Mayotte"},
3654{
"Macedonia",
"North Macedonia"},
3655{
"NAM",
"Namibia"},
3656{
"NCL",
"New Caledonia"},
3657{
"NER",
"Niger"},
3658{
"NFK",
"Norfolk Island"},
3659{
"NGA",
"Nigeria"},
3660{
"NIC",
"Nicaragua"},
3662{
"NLD",
"Netherlands"},
3663{
"NOR",
"Norway"},
3664{
"NPL",
"Nepal"},
3665{
"NRU",
"Nauru"},
3666{
"NZL",
"New Zealand"},
3667{
"Netherland",
"Netherlands"},
3668{
"New Guinea",
"Papua New Guinea"},
3670{
"P, R, China",
"China"},
3671{
"P.R. China",
"China"},
3672{
"P.R.China",
"China"},
3673{
"PAK",
"Pakistan"},
3674{
"PAN",
"Panama"},
3675{
"PCN",
"Pitcairn"},
3677{
"PHL",
"Philippines"},
3678{
"PLW",
"Palau"},
3679{
"PNG",
"Papua New Guinea"},
3680{
"POL",
"Poland"},
3681{
"PRI",
"Puerto Rico"},
3682{
"PRK",
"North Korea"},
3683{
"PRT",
"Portugal"},
3684{
"PRY",
"Paraguay"},
3685{
"PSE",
"Palestinian Territory"},
3686{
"PYF",
"French Polynesia"},
3687{
"People's Republic of China",
"China"},
3688{
"Pr China",
"China"},
3689{
"Prchina",
"China"},
3690{
"QAT",
"Qatar"},
3691{
"REU",
"Reunion"},
3692{
"ROU",
"Romania"},
3693{
"RUS",
"Russia"},
3694{
"RWA",
"Rwanda"},
3695{
"Republic of Congo",
"Republic of the Congo"},
3696{
"SAU",
"Saudi Arabia"},
3697{
"SDN",
"Sudan"},
3698{
"SEN",
"Senegal"},
3699{
"SGP",
"Singapore"},
3700{
"SGS",
"South Georgia and the South Sandwich Islands"},
3701{
"SHN",
"Saint Helena"},
3702{
"SJM",
"Svalbard and Jan Mayen"},
3703{
"SLB",
"Solomon Islands"},
3704{
"SLE",
"Sierra Leone"},
3705{
"SLV",
"El Salvador"},
3706{
"SMR",
"San Marino"},
3707{
"SOM",
"Somalia"},
3708{
"SPM",
"Saint Pierre and Miquelon"},
3709{
"SRB",
"Serbia"},
3710{
"SSD",
"South Sudan"},
3711{
"STP",
"Sao Tome and Principe"},
3712{
"SUR",
"Suriname"},
3713{
"SVK",
"Slovakia"},
3714{
"SVN",
"Slovenia"},
3715{
"SWE",
"Sweden"},
3716{
"SWZ",
"Eswatini"},
3717{
"SXM",
"Sint Maarten (Dutch part)"},
3718{
"SYC",
"Seychelles"},
3719{
"SYR",
"Syrian Arab Republic"},
3720{
"Saint Kitts & Nevis",
"Saint Kitts and Nevis"},
3721{
"Saint Pierre & Miquelon",
"Saint Pierre and Miquelon"},
3722{
"Saint Vincent & Grenadines",
"Saint Vincent and the Grenadines"},
3723{
"Saint Vincent & the Grenadines",
"Saint Vincent and the Grenadines"},
3724{
"Saint Vincent and Grenadines",
"Saint Vincent and the Grenadines"},
3725{
"San Tome and Principe Island",
"Sao Tome and Principe"},
3726{
"Sao Tome & Principe",
"Sao Tome and Principe"},
3727{
"South Georgia & South Sandwich Islands",
"South Georgia and the South Sandwich Islands"},
3728{
"South Georgia & the South Sandwich Islands",
"South Georgia and the South Sandwich Islands"},
3729{
"St Helena",
"Saint Helena"},
3730{
"St Lucia",
"Saint Lucia"},
3731{
"St Pierre and Miquelon",
"Saint Pierre and Miquelon"},
3732{
"St Vincent and the Grenadines",
"Saint Vincent and the Grenadines"},
3733{
"St. Helena",
"Saint Helena"},
3734{
"St. Lucia",
"Saint Lucia"},
3735{
"St. Pierre and Miquelon",
"Saint Pierre and Miquelon"},
3736{
"St. Vincent and the Grenadines",
"Saint Vincent and the Grenadines"},
3737{
"TCA",
"Turks and Caicos Islands"},
3740{
"THA",
"Thailand"},
3741{
"TJK",
"Tajikistan"},
3742{
"TKL",
"Tokelau"},
3743{
"TKM",
"Turkmenistan"},
3744{
"TLS",
"Timor-Leste"},
3745{
"TON",
"Tonga"},
3746{
"TTO",
"Trinidad and Tobago"},
3747{
"TUN",
"Tunisia"},
3748{
"TUR",
"Turkey"},
3749{
"TUV",
"Tuvalu"},
3750{
"TWN",
"Taiwan"},
3751{
"TZA",
"Tanzania"},
3752{
"The Netherlands",
"Netherlands"},
3753{
"Trinidad & Tobago",
"Trinidad and Tobago"},
3754{
"Turks & Caicos",
"Turks and Caicos Islands"},
3755{
"Turks & Caicos Islands",
"Turks and Caicos Islands"},
3756{
"Turks and Caicos",
"Turks and Caicos Islands"},
3757{
"U.S.A.",
"USA"},
3758{
"UGA",
"Uganda"},
3759{
"UK",
"United Kingdom"},
3760{
"UKR",
"Ukraine"},
3761{
"UMI",
"United States Minor Outlying Islands"},
3762{
"URY",
"Uruguay"},
3763{
"UZB",
"Uzbekistan"},
3764{
"United States",
"USA"},
3765{
"United States of America",
"USA"},
3766{
"VAT",
"Holy See (Vatican City State)"},
3767{
"VCT",
"Saint Vincent and the Grenadines"},
3768{
"VEN",
"Venezuela"},
3769{
"VGB",
"British Virgin Islands"},
3770{
"VIR",
"Virgin Islands"},
3771{
"VNM",
"Viet Nam"},
3772{
"VUT",
"Vanuatu"},
3773{
"Vietnam",
"Viet Nam"},
3774{
"WLF",
"Wallis and Futuna"},
3775{
"WSM",
"Samoa"},
3776{
"YEM",
"Yemen"},
3777{
"ZAF",
"South Africa"},
3778{
"ZMB",
"Zambia"},
3779{
"ZWE",
"Zimbabwe"},
3780{
"the Netherlands",
"Netherlands"}
3787{
"Burma",
"Myanmar"},
3788{
"Siam",
"Thailand"}
3794{
"Antigua",
"Antigua and Barbuda: Antigua"},
3795{
"Ashmore Island",
"Ashmore and Cartier Islands: Ashmore Island"},
3796{
"Autonomous Region of the Azores",
"Portugal: Azores"},
3797{
"Azores",
"Portugal: Azores"},
3798{
"Barbuda",
"Antigua and Barbuda: Barbuda"},
3799{
"Bassas da India",
"French Southern and Antarctic Lands: Bassas da India"},
3800{
"Caicos Islands",
"Turks and Caicos Islands: Caicos Islands"},
3801{
"Canary Islands",
"Spain: Canary Islands"},
3802{
"Cartier Island",
"Ashmore and Cartier Islands: Cartier Island"},
3803{
"East Germany",
"Germany: East Germany"},
3804{
"El Hierro",
"Spain: El Hierro"},
3805{
"Europa Island",
"French Southern and Antarctic Lands: Europa Island"},
3806{
"Fuerteventura",
"Spain: Fuerteventura"},
3807{
"Glorioso Islands",
"French Southern and Antarctic Lands: Glorioso Islands"},
3808{
"Gran Canaria",
"Spain: Gran Canaria"},
3809{
"Grenadines",
"Saint Vincent and the Grenadines: Grenadines"},
3810{
"Heard Island",
"Heard Island and McDonald Islands: Heard Island"},
3811{
"Ile Amsterdam",
"French Southern and Antarctic Lands: Ile Amsterdam"},
3812{
"Ile Saint-Paul",
"French Southern and Antarctic Lands: Ile Saint-Paul"},
3813{
"Iles Crozet",
"French Southern and Antarctic Lands: Iles Crozet"},
3814{
"Iles Kerguelen",
"French Southern and Antarctic Lands: Iles Kerguelen"},
3815{
"Juan de Nova Island",
"French Southern and Antarctic Lands: Juan de Nova Island"},
3816{
"La Gomera",
"Spain: La Gomera"},
3817{
"La Graciosa",
"Spain: La Graciosa"},
3818{
"La Palma",
"Spain: La Palma"},
3819{
"Lanzarote",
"Spain: Lanzarote"},
3820{
"Madeira",
"Portugal: Madeira"},
3821{
"McDonald Island",
"Heard Island and McDonald Islands: McDonald Island"},
3822{
"McDonald Islands",
"Heard Island and McDonald Islands: McDonald Islands"},
3823{
"Miquelon",
"Saint Pierre and Miquelon: Miquelon"},
3824{
"Nevis",
"Saint Kitts and Nevis: Nevis"},
3825{
"Principe",
"Sao Tome and Principe: Principe"},
3826{
"Saint Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3827{
"Saint Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3828{
"Saint Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3829{
"Sao Tome",
"Sao Tome and Principe: Sao Tome"},
3830{
"Scotland",
"United Kingdom: Scotland"},
3831{
"South Sandwich Islands",
"South Georgia and the South Sandwich Islands: South Sandwich Islands"},
3832{
"St Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3833{
"St Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3834{
"St Thomas",
"USA: Saint Thomas"},
3835{
"St Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3836{
"St. Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3837{
"St. Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3838{
"St. Thomas",
"USA: Saint Thomas"},
3839{
"St. Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3840{
"Tenerife",
"Spain: Tenerife"},
3841{
"Tobago",
"Trinidad and Tobago: Tobago"},
3842{
"Trinidad",
"Trinidad and Tobago: Trinidad"},
3843{
"Tromelin Island",
"French Southern and Antarctic Lands: Tromelin Island"},
3844{
"Turks Islands",
"Turks and Caicos Islands: Turks Islands"},
3845{
"Wales",
"United Kingdom: Wales"},
3846{
"West Germany",
"Germany: West Germany"},
3861 "District of Columbia",
3908vector<string> words;
3910 for(vector<string>::iterator word = words.begin(); word != words.end(); ++word)
3911 if(!word->empty() &&
isalpha(word->at(0)))
3912word->at(0) = (
unsignedchar)
toupper(word->at(0));
3920 if(found != k_whole_country_fixes.end()) {
3921new_country = found->second;
3926 for(
size_t i= 0;
i< num_states; ++
i) {
3941 stringcountry2(*c);
3945 while(pos2 !=
NPOS)
3947 if(pos2 <= pos1 && pos2+country2.length() >= pos1+country1.length())
3958 intnum_matches = 0;
3963 while(pos !=
NPOS)
3965 if(!((pos+country.length()<phrase.length() &&
isalpha(phrase[pos+country.length()]))
3966|| (pos > 0 &&
isalpha(phrase[pos-1]))
3973 return(num_matches > 1);
3991 boolany_found =
true;
3992 while(!
val.empty() && any_found) {
4004 val=
val.substr(0,
val.length() - 1);
4008 val=
val.substr(0,
val.length() - 4);
4011 size_t len=
val.length();
4013 val=
val.substr(0,
val.length() - 1);
4016}
else if(
len> 5) {
4018 booldo_remove =
true;
4019 size_tpos =
val.length() - 2;
4021 while(dist < 4 && do_remove) {
4029 val=
val.substr(0,
val.length() - 1);
4040vector<string> tokens;
4043vector<string>::iterator it = tokens.begin();
4044 while(it != tokens.end()) {
4046 if(pos !=
NPOS&& pos > 3 && (*it).length() - pos > 4) {
4047 string first= (*it).substr(0, pos);
4048 stringremainder = (*it).substr(pos + 1);
4050 size_tlen_to_space =
first.length();
4051 while(space_pos !=
NPOS) {
4053len_to_space =
first.length();
4056 if(len_to_space > 4) {
4057(*it) = (*it).substr(0, pos);
4058it = tokens.insert(it, remainder);
4073 size_ttlen =
test.length();
4074 size_twlen = word.
length();
4077 while(pos !=
NPOS) {
4078 size_tp = start + pos;
4079 if( (p == 0 || !
isalpha((
unsigned char)
test[p - 1])) &&
4080(p + wlen >= tlen || !
isalpha((
unsigned char)
test[p + wlen])) ) {
4103 constvector<string>& countries,
4104 string& valid_country,
4105 string& orig_valid_country,
4106 bool& too_many_countries,
4109 for(
autocountry : countries) {
4110 if(!country.empty() && !too_many_countries)
4112 string check= country;
4116 boolcheck_has_bad_cap =
false;
4119 if(valid_country.empty())
4121valid_country =
check;
4122orig_valid_country =
check;
4123bad_cap = check_has_bad_cap;
4127too_many_countries =
true;
4133 if(found != fix_map.
end())
4135 if(valid_country.empty())
4137valid_country = found->second;
4138orig_valid_country =
check;
4142too_many_countries =
true;
4154 if(
val.length() == 0)
return false;
4156 char*
str=
new char[
sizeof(char) * (
val.length() + 1)];
4157strcpy(
str,
val.c_str());
4168 while(ch !=
'\0') {
4175 while(ch ==
' ') {
4201{
"Acadia Parish",
"Acadia Parish"},
4202{
"AcadiaParish",
"Acadia Parish"},
4203{
"Allen Parish",
"Allen Parish"},
4204{
"AllenParish",
"Allen Parish"},
4205{
"Ascension Parish",
"Ascension Parish"},
4206{
"AscensionParish",
"Ascension Parish"},
4207{
"Assumption Parish",
"Assumption Parish"},
4208{
"AssumptionParish",
"Assumption Parish"},
4209{
"Avoyelles Parish",
"Avoyelles Parish"},
4210{
"AvoyellesParish",
"Avoyelles Parish"},
4211{
"Beauregard Parish",
"Beauregard Parish"},
4212{
"BeauregardParish",
"Beauregard Parish"},
4213{
"Bienville Parish",
"Bienville Parish"},
4214{
"BienvilleParish",
"Bienville Parish"},
4215{
"Bossier Parish",
"Bossier Parish"},
4216{
"BossierParish",
"Bossier Parish"},
4217{
"Caddo Parish",
"Caddo Parish"},
4218{
"CaddoParish",
"Caddo Parish"},
4219{
"Calcasieu Parish",
"Calcasieu Parish"},
4220{
"CalcasieuParish",
"Calcasieu Parish"},
4221{
"Caldwell Parish",
"Caldwell Parish"},
4222{
"CaldwellParish",
"Caldwell Parish"},
4223{
"Cameron Parish",
"Cameron Parish"},
4224{
"CameronParish",
"Cameron Parish"},
4225{
"Catahoula Parish",
"Catahoula Parish"},
4226{
"CatahoulaParish",
"Catahoula Parish"},
4227{
"Claiborne Parish",
"Claiborne Parish"},
4228{
"ClaiborneParish",
"Claiborne Parish"},
4229{
"Concordia Parish",
"Concordia Parish"},
4230{
"ConcordiaParish",
"Concordia Parish"},
4231{
"DeSoto Parish",
"DeSoto Parish"},
4232{
"DeSotoParish",
"DeSoto Parish"},
4233{
"East Baton Rouge Parish",
"East Baton Rouge Parish"},
4234{
"East Carroll Parish",
"East Carroll Parish"},
4235{
"East Feliciana Parish",
"East Feliciana Parish"},
4236{
"EastBatonRougeParish",
"East Baton Rouge Parish"},
4237{
"EastCarrollParish",
"East Carroll Parish"},
4238{
"EastFelicianaParish",
"East Feliciana Parish"},
4239{
"Evangeline Parish",
"Evangeline Parish"},
4240{
"EvangelineParish",
"Evangeline Parish"},
4241{
"Franklin Parish",
"Franklin Parish"},
4242{
"FranklinParish",
"Franklin Parish"},
4243{
"Grant Parish",
"Grant Parish"},
4244{
"GrantParish",
"Grant Parish"},
4245{
"Iberia Parish",
"Iberia Parish"},
4246{
"IberiaParish",
"Iberia Parish"},
4247{
"Iberville Parish",
"Iberville Parish"},
4248{
"IbervilleParish",
"Iberville Parish"},
4249{
"Jackson Parish",
"Jackson Parish"},
4250{
"JacksonParish",
"Jackson Parish"},
4251{
"Jefferson Davis Parish",
"Jefferson Davis Parish"},
4252{
"Jefferson Parish",
"Jefferson Parish"},
4253{
"JeffersonDavisParish",
"Jefferson Davis Parish"},
4254{
"JeffersonParish",
"Jefferson Parish"},
4255{
"Lafayette Parish",
"Lafayette Parish"},
4256{
"LafayetteParish",
"Lafayette Parish"},
4257{
"Lafourche Parish",
"Lafourche Parish"},
4258{
"LafourcheParish",
"Lafourche Parish"},
4259{
"LaSalle Parish",
"LaSalle Parish"},
4260{
"LaSalleParish",
"LaSalle Parish"},
4261{
"Lincoln Parish",
"Lincoln Parish"},
4262{
"LincolnParish",
"Lincoln Parish"},
4263{
"Livingston Parish",
"Livingston Parish"},
4264{
"LivingstonParish",
"Livingston Parish"},
4265{
"Madison Parish",
"Madison Parish"},
4266{
"MadisonParish",
"Madison Parish"},
4267{
"Morehouse Parish",
"Morehouse Parish"},
4268{
"MorehouseParish",
"Morehouse Parish"},
4269{
"Natchitoches Parish",
"Natchitoches Parish"},
4270{
"NatchitochesParish",
"Natchitoches Parish"},
4271{
"Orleans Parish",
"Orleans Parish"},
4272{
"OrleansParish",
"Orleans Parish"},
4273{
"Ouachita Parish",
"Ouachita Parish"},
4274{
"OuachitaParish",
"Ouachita Parish"},
4275{
"Plaquemines Parish",
"Plaquemines Parish"},
4276{
"PlaqueminesParish",
"Plaquemines Parish"},
4277{
"Pointe Coupee Parish",
"Pointe Coupee Parish"},
4278{
"PointeCoupeeParish",
"Pointe Coupee Parish"},
4279{
"Rapides Parish",
"Rapides Parish"},
4280{
"RapidesParish",
"Rapides Parish"},
4281{
"Red River Parish",
"Red River Parish"},
4282{
"RedRiverParish",
"Red River Parish"},
4283{
"Richland Parish",
"Richland Parish"},
4284{
"RichlandParish",
"Richland Parish"},
4285{
"Sabine Parish",
"Sabine Parish"},
4286{
"SabineParish",
"Sabine Parish"},
4287{
"St. Bernard Parish",
"St. Bernard Parish"},
4288{
"St. Charles Parish",
"St. Charles Parish"},
4289{
"St. Helena Parish",
"St. Helena Parish"},
4290{
"St. James Parish",
"St. James Parish"},
4291{
"St. John the Baptist Parish",
"St. John the Baptist Parish"},
4292{
"St. Landry Parish",
"St. Landry Parish"},
4293{
"St. Martin Parish",
"St. Martin Parish"},
4294{
"St. Mary Parish",
"St. Mary Parish"},
4295{
"St. Tammany Parish",
"St. Tammany Parish"},
4296{
"St.BernardParish",
"St. Bernard Parish"},
4297{
"St.CharlesParish",
"St. Charles Parish"},
4298{
"St.HelenaParish",
"St. Helena Parish"},
4299{
"St.JamesParish",
"St. James Parish"},
4300{
"St.JohntheBaptistParish",
"St. John the Baptist Parish"},
4301{
"St.LandryParish",
"St. Landry Parish"},
4302{
"St.MartinParish",
"St. Martin Parish"},
4303{
"St.MaryParish",
"St. Mary Parish"},
4304{
"St.TammanyParish",
"St. Tammany Parish"},
4305{
"Tangipahoa Parish",
"Tangipahoa Parish"},
4306{
"TangipahoaParish",
"Tangipahoa Parish"},
4307{
"Tensas Parish",
"Tensas Parish"},
4308{
"TensasParish",
"Tensas Parish"},
4309{
"Terrebonne Parish",
"Terrebonne Parish"},
4310{
"TerrebonneParish",
"Terrebonne Parish"},
4311{
"Union Parish",
"Union Parish"},
4312{
"UnionParish",
"Union Parish"},
4313{
"Vermilion Parish",
"Vermilion Parish"},
4314{
"VermilionParish",
"Vermilion Parish"},
4315{
"Vernon Parish",
"Vernon Parish"},
4316{
"VernonParish",
"Vernon Parish"},
4317{
"Washington Parish",
"Washington Parish"},
4318{
"WashingtonParish",
"Washington Parish"},
4319{
"Webster Parish",
"Webster Parish"},
4320{
"WebsterParish",
"Webster Parish"},
4321{
"West Baton Rouge Parish",
"West Baton Rouge Parish"},
4322{
"West Carroll Parish",
"West Carroll Parish"},
4323{
"West Feliciana Parish",
"West Feliciana Parish"},
4324{
"WestBatonRougeParish",
"West Baton Rouge Parish"},
4325{
"WestCarrollParish",
"West Carroll Parish"},
4326{
"WestFelicianaParish",
"West Feliciana Parish"},
4327{
"Winn Parish",
"Winn Parish"},
4328{
"WinnParish",
"Winn Parish"}
4336 if( parish.empty() ) {
4341 if( parish_find_iter != parishAbbrevMap.end() ) {
4343parish = parish_find_iter->second;
4352{
"AK",
"Alaska"},
4353{
"AL",
"Alabama"},
4354{
"Alabama",
"Alabama"},
4355{
"Alaska",
"Alaska"},
4356{
"American Samoa",
"American Samoa"},
4357{
"AR",
"Arkansas"},
4358{
"Arizona",
"Arizona"},
4359{
"Arkansas",
"Arkansas"},
4360{
"AS",
"American Samoa"},
4361{
"AZ",
"Arizona"},
4362{
"CA",
"California"},
4363{
"California",
"California"},
4364{
"CO",
"Colorado"},
4365{
"Colorado",
"Colorado"},
4366{
"Connecticut",
"Connecticut"},
4367{
"CT",
"Connecticut"},
4368{
"DC",
"District of Columbia"},
4369{
"DE",
"Delaware"},
4370{
"Delaware",
"Delaware"},
4371{
"District of Columbia",
"District of Columbia"},
4372{
"FL",
"Florida"},
4373{
"Florida",
"Florida"},
4374{
"GA",
"Georgia"},
4375{
"Georgia",
"Georgia"},
4377{
"Guam",
"Guam"},
4378{
"Hawaii",
"Hawaii"},
4379{
"HI",
"Hawaii"},
4382{
"Idaho",
"Idaho"},
4383{
"IL",
"Illinois"},
4384{
"Illinois",
"Illinois"},
4385{
"IN",
"Indiana"},
4386{
"Indiana",
"Indiana"},
4387{
"Iowa",
"Iowa"},
4388{
"Kansas",
"Kansas"},
4389{
"Kentucky",
"Kentucky"},
4390{
"KS",
"Kansas"},
4391{
"KY",
"Kentucky"},
4392{
"LA",
"Louisiana"},
4393{
"Louisiana",
"Louisiana"},
4394{
"MA",
"Massachusetts"},
4395{
"Maine",
"Maine"},
4396{
"Maryland",
"Maryland"},
4397{
"Massachusetts",
"Massachusetts"},
4398{
"MD",
"Maryland"},
4400{
"MI",
"Michigan"},
4401{
"Michigan",
"Michigan"},
4402{
"Minnesota",
"Minnesota"},
4403{
"Mississippi",
"Mississippi"},
4404{
"Missouri",
"Missouri"},
4405{
"MN",
"Minnesota"},
4406{
"MO",
"Missouri"},
4407{
"Montana",
"Montana"},
4408{
"MS",
"Mississippi"},
4409{
"MT",
"Montana"},
4410{
"NC",
"North Carolina"},
4411{
"ND",
"North Dakota"},
4412{
"NE",
"Nebraska"},
4413{
"Nebraska",
"Nebraska"},
4414{
"Nevada",
"Nevada"},
4415{
"New Hampshire",
"New Hampshire"},
4416{
"New Jersey",
"New Jersey"},
4417{
"New Mexico",
"New Mexico"},
4418{
"New York",
"New York"},
4419{
"NH",
"New Hampshire"},
4420{
"NJ",
"New Jersey"},
4421{
"NM",
"New Mexico"},
4422{
"North Carolina",
"North Carolina"},
4423{
"North Dakota",
"North Dakota"},
4424{
"NV",
"Nevada"},
4425{
"NY",
"New York"},
4427{
"Ohio",
"Ohio"},
4428{
"OK",
"Oklahoma"},
4429{
"Oklahoma",
"Oklahoma"},
4430{
"OR",
"Oregon"},
4431{
"Oregon",
"Oregon"},
4432{
"PA",
"Pennsylvania"},
4433{
"Pennsylvania",
"Pennsylvania"},
4434{
"PR",
"Puerto Rico"},
4435{
"Puerto Rico",
"Puerto Rico"},
4436{
"Rhode Island",
"Rhode Island"},
4437{
"RI",
"Rhode Island"},
4438{
"SC",
"South Carolina"},
4439{
"SD",
"South Dakota"},
4440{
"South Carolina",
"South Carolina"},
4441{
"South Dakota",
"South Dakota"},
4442{
"Tennessee",
"Tennessee"},
4443{
"Texas",
"Texas"},
4444{
"TN",
"Tennessee"},
4446{
"US Virgin Islands",
"US Virgin Islands"},
4448{
"Utah",
"Utah"},
4449{
"VA",
"Virginia"},
4450{
"Vermont",
"Vermont"},
4451{
"VI",
"US Virgin Islands"},
4452{
"Virgin Islands",
"US Virgin Islands"},
4453{
"Virginia",
"Virginia"},
4454{
"VT",
"Vermont"},
4455{
"WA",
"Washington"},
4456{
"Washington",
"Washington"},
4457{
"West Virginia",
"West Virginia"},
4458{
"WI",
"Wisconsin"},
4459{
"Wisconsin",
"Wisconsin"},
4460{
"WV",
"West Virginia"},
4461{
"WY",
"Wyoming"},
4462{
"Wyoming",
"Wyoming"}
4472 if(
state.empty() ) {
4476 stringoriginal =
state;
4477 stringworking =
state;
4494 if( state_find_iter != stateAbbrevMap.end() ) {
4496 state= state_find_iter->second;
4509 if( country.empty() ) {
4514 stringoriginal = country;
4515 stringworking = country;
4519working = working.substr ( 1, working.length() - 2 );
4545vector<string> components;
4549 if( components.size() < 1 ) {
4554 for(
size_tj = 0; j < components.size(); j++ ) {
4573 for(
size_tj = 0; j < components.size(); j++ ) {
4574 boolmodified =
false;
4575 if(
s_IsState( components[j], modified )) {
4600res.append (
"USA: ");
4605res.append ( components[
match] );
4609 for(
size_tj = 0; j < components.size(); j++ ) {
4610 if( j ==
match)
continue;
4612res.append ( components[j] );
4620}
else if( num_states > 1 ) {
4636 if( ! exception_file.empty()) {
4639 for(
const auto&
row: my_stream ) {
4640 TFieldNonumber_of_fields =
row. GetNumberOfFields();
4641 if( number_of_fields != 2 )
continue;
4642 stringfr =
row[0].Get<
string>();
4643 stringto =
row[1].Get<
string>();
4644exceptions [fr] = to;
4655 for(
const auto& itm : exceptions ) {
4656 stringfr = itm.first;
4657 stringto = itm.second;
4664 if( ! f1.empty() && ! f2.empty()) {
4665fr = f1 +
": "+ f2;
4668(*exception_map)[fr] = to;
4676 if( ! exception_file.empty()) {
4687 stringworking = country;
4692 stringcorrected = (*exception_map) [working];
4693 if( ! corrected.empty()) {
4733 stringmicronesia =
"Micronesia, Federated States of";
4757 if(!usa1.empty() && !usa2.empty()) {
4761 input=
"USA: "+ usa2;
4765 autoold_name_fix = k_old_country_name_fixes.find(
input.c_str());
4766 if(old_name_fix != k_old_country_name_fixes.end()) {
4767 input= old_name_fix->second;
4771 if(us_territories) {
4790 if(!new_country.empty())
4793 booltoo_many_countries =
false;
4794 boolbad_cap =
false;
4796 stringvalid_country;
4797 stringorig_valid_country;
4799 x_FindCountryName(k_country_name_fixes, countries, valid_country, orig_valid_country, too_many_countries, bad_cap);
4800 if(valid_country.empty()) {
4801 x_FindCountryName(k_subregion_fixes, countries, valid_country, orig_valid_country, too_many_countries, bad_cap);
4804 if(!valid_country.empty() && !too_many_countries)
4807 if(!valid_country.empty() && too_many_countries && valid_country ==
input)
4812new_country = str1+
": "+str2;
4816 else if(!valid_country.empty() && !too_many_countries)
4821 stringbefore =
input.substr(0,pos);
4826 stringafter =
input.substr(pos+orig_valid_country.length());
4830 elsenew_country = valid_country;
4831 if(!before.empty() || !after.empty()) {
4833new_country +=
": ";
4835new_country +=
", ";
4838 if(!before.empty())
4839new_country += before;
4840 if(!before.empty() && !after.empty() && !
NStr::Equal(after,
")"))
4841new_country +=
", ";
4843new_country += after;
4856 for(
size_t i= 0;
i< country.length();
i++) {
4857 if(country[
i] ==
':') {
4860country[
i] =
',';
4872 stringnew_country = country;
4874 if(country_end_pos !=
NPOS)
4877 while(country[pos] ==
','|| country[pos] ==
':'||
isspace((
unsigned char)country[pos]))
4881 stringafter = country.substr(pos);
4882 if(after.empty()) {
4883 if(pos > country_end_pos) {
4884new_country = country.substr(0, country_end_pos);
4888 if(capitalize_after_colon)
4890new_country = country.substr(0,country_end_pos);
4891new_country +=
": "+ after;
4903{
"adult",
"adult"},
4905{
"juvenile",
"juvenile"},
4906{
"larva",
"larva"}
4913 stringfix =
value;
4916 if(it != sc_DevStagePairs.end()) {
4924{
"hemocyte",
"hemocyte"},
4925{
"hepatocyte",
"hepatocyte"},
4926{
"lymphocyte",
"lymphocyte"},
4927{
"neuroblast",
"neuroblast"}
4933 stringfix =
value;
4936 if(it != sc_CellTypePairs.end()) {
4951vector<CTempString> tokens;
4953 if(tokens.size() > 1) {
4954qual_map[tokens[0]] = tokens[1];
4960 const char**built_in,
size_tnum_built_in,
4965 if(!
file.empty()) {
4972 if(built_in ==
NULL) {
4975 if(getenv(
"NCBI_DEBUG")) {
4976 ERR_POST(
Note<<
"Falling back on built-in data for "+ data_name);
4978 for(
size_t i= 0;
i< num_built_in;
i++) {
4979 const char*p = built_in[
i];
4984 if(getenv(
"NCBI_DEBUG")) {
4989}
while(!
lr->AtEOF());
4993 #include "isolation_sources.inc" 5003 s_AddOneDataFile(
"isolation_sources.txt",
"isolation sources", (
const char**)k_isolation_sources,
sizeof(k_isolation_sources) /
sizeof(
char*),
s_IsolationSourceMap.Get());
5013 stringfix =
value;
5023 for(
size_t i= 0;
i<
max;
i++) {
5040 stringfix =
value;
5050 for(
size_t i= 0;
i<
max;
i++) {
5073 stringnew_val =
value;
5121 const string& name =
GetName();
5194 returndeprecate_subsources.test((
ESubtype)subtype);
5224 "[BankIt_uncultured16S_wizard]; [universal primers]; [tgge]",
5225 "[BankIt_uncultured16S_wizard]; [universal primers]; [dgge]",
5226 "[BankIt_uncultured16S_wizard]; [universal primers]",
5227 "[BankIt_cultured16S_wizard]",
5228 "[BankIt_organellerRNA_wizard]",
5229 "[BankIt_ITS_wizard]; [rRNAITS_notfound]",
5230 "[BankIt_ITS_wizard]",
5231 "[uncultured (using universal primers)]",
5232 "[uncultured (using universal primers) bacterial source]",
5233 "[cultured bacterial source]",
5234 "[enrichment culture bacterial source]",
5235 "[mixed bacterial source (cultured and uncultured)]",
5236 "[uncultured]; [universal primers]",
5237 "[mixed bacterial source]",
5239 "[cDNA derived from mRNA, purified viral particles]",
5240 "[cDNA derived from mRNA, whole cell/tissue lysate]",
5241 "[cDNA derived from genomic RNA, whole cell/tissue lysate]",
5242 "[cDNA derived from genomic RNA, purified viral particles]",
5243 "[universal primers]",
5244 "[uncultured; wizard]",
5245 "[uncultured; wizard; spans unknown]",
5246 "[cultured; wizard]",
5247 "[cultured; wizard; spans unknown]",
5248 "[intergenic wizard]",
5249 "[intergenic wizard; spans unknown]",
5250 "[Microsatellite wizard]",
5251 "[Microsatellite wizard; multiple repeats]",
5253 "[D-loop wizard; spans unknown]",
5254 "[D-loop wizard; spans known]",
5259 "[BankIt_uncultured16S_wizard]; [species_specific primers]; [tgge]",
5260 "[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]",
5261 "[BankIt_uncultured16S_wizard]; [species_specific primers]",
5262 "[uncultured (with species-specific primers)]",
5263 "[uncultured]; [amplified with species-specific primers]",
5264 "[uncultured (using species-specific primers) bacterial source]",
5265 "[amplified with species-specific primers]",
5274 if(pos != string::npos) {
5295 size_tremove_len = to_remove.length();
5297 while(pos !=
NPOS) {
5298 size_textra_len = strspn (
value.c_str() + pos + remove_len,
" ;");
5299 value=
value.substr(0, pos) +
value.substr(pos + remove_len + extra_len);
5311 if(is_species_level) {
5314 value=
"amplified with species-specific primers";
5335(
const string& country_name,
doubley,
doublemin_x,
doublemax_x,
doublescale)
5336: m_CountryName(country_name) ,
5351 #define EPSILON 0.001 5409: m_CountryName(country_name) , m_MinX (min_x), m_MinY (min_y), m_MaxX(max_x), m_MaxY (max_y)
5412 size_tpos =
NStr::Find(country_name,
":");
5417 m_Level0= country_name.substr(0, pos);
5419 m_Level1= country_name.substr(pos + 1);
5493&& m_MaxX <= other_block->
GetMaxX()
5495&& m_MinY <= other_block->
GetMaxY()) {
5551m_WaterDistance(-1),
5552m_ClaimedDistance(-1)
5620 #include "lat_lon_country.inc" 5623 #include "lat_lon_water.inc" 5628 if(getenv(
"NCBI_DEBUG")) {
5629 ERR_POST(
Note<<
"Falling back on built-in data for latlon / water data.");
5634 stringcurrent_country;
5636 for(
int i= 0;
i< num;
i++) {
5638 if(line[0] ==
'-') {
5640}
else if(
isalpha((
unsigned char)line[0])) {
5641current_country = line;
5642}
else if(
isdigit((
unsigned char)line[0])) {
5645vector<string> tokens;
5647 if(tokens.size() > 3) {
5649 for(
size_tj = 2; j < tokens.size() - 1; j+=2) {
5666 if(getenv(
"NCBI_DEBUG")) {
5667 ERR_POST(
Note<<
"Reading from "+ filename +
" for latlon/water data.");
5674 stringcurrent_country;
5679vector<SIZE_TYPE> tab_positions;
5684 if(line[0] ==
'-') {
5686}
else if(
isalpha((
unsigned char)line[0])) {
5687current_country = line;
5688}
else if(
isdigit((
unsigned char)line[0])) {
5701tab_positions.clear();
5703 while( tab_pos !=
NPOS) {
5704tab_positions.push_back(tab_pos);
5705tab_pos = line.
find(
'\t', tab_pos+1);
5708tab_positions.push_back(line.
length());
5710 const char* line_start = line.
data();
5711 if( tab_positions.size() >= 4 ) {
5712 CTempStringy_str( line_start + tab_positions[0]+1, tab_positions[1] - tab_positions[0] - 1 );
5716 for(
size_tj = 1; j < tab_positions.size() - 2; j+=2) {
5717 const SIZE_TYPEpos1 = tab_positions[j];
5718 const SIZE_TYPEpos2 = tab_positions[j+1];
5719 const SIZE_TYPEpos3 = tab_positions[j+2];
5720 CTempStringfirst_num( line_start + pos1 + 1, pos2 - pos1 - 1 );
5721 CTempStringsecond_num( line_start + pos2 + 1, pos3 - pos2 - 1 );
5726}
while( !
lr->AtEOF() );
5739}
else if(
line1->GetY() >
line2->GetY()) {
5742 if(
line1->GetMinX() <
line2->GetMinX()) {
5757}
else if(
cmp< 0) {
5771}
else if(
line1->GetY() >
line2->GetY()) {
5773}
if(
line1->GetMinX() <
line2->GetMinX()) {
5775}
else if(
line1->GetMinX() >
line2->GetMinX()) {
5777}
else if(
line1->GetMaxX() <
line2->GetMaxX()) {
5779}
else if(
line1->GetMaxX() >
line2->GetMaxX()) {
5797 const char* env_val = getenv(
"NCBI_LAT_LON_DATA_PATH");
5800data_path = (
string) env_val;
5802data_path = data_path +
"/";
5808 if(data_path.empty() || !
x_InitFromFile(data_path +
"lat_lon_water.txt")) {
5814 if(data_path.empty() || !
x_InitFromFile(data_path +
"lat_lon_country.txt")) {
5826TCountryToLinesMap countryToLinesMap;
5828countryToLinesMap[(*line_it)->GetCountry()].push_back(*line_it);
5838country_lines_it->second;
5843back_inserter(new_country_line_list));
5907}
else if(
cmp> 0) {
6008 const string& country,
6009 const string& province)
const 6022 if(best ==
NULL) {
6024}
else if(!best->
PreferTo(other, country, province, (
bool)(best->
GetArea() <= other->
GetArea()))) {
6038 #define EARTH_RADIUS 6371.0 6039 #define CONST_PI 3.14159265359 6046 return(degrees * (
CONST_PI/ 180.0));
6057 doublelat1, lon1, lat2, lon2;
6058 doubledLat, dLon,
a, c;
6068 a= sin (dLat / 2) * sin (dLat / 2) +
6069cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2);
6070c = 2 * atan2 (sqrt (
a), sqrt (1 -
a));
6081 doublelat1, lon1, lat2, lon2;
6082 doubledLat, dLon,
a, c;
6092 a= sin (dLat / 2) * sin (dLat / 2) +
6093cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2);
6094c = 2 * atan2 (sqrt (
a), sqrt (1 -
a));
6110 intmin_y = y - maxDelta;
6111 intmax_y = y + maxDelta;
6112 intmin_x = x - maxDelta;
6113 intmax_x = x + maxDelta;
6118 doubleclosest = 0.0;
6134 if(rval ==
NULL|| closest > dist
6152 doublelat,
doublelon,
6153 doublerange,
double&distance)
const 6159 intmin_y = y - maxDelta;
6160 intmax_y = y + maxDelta;
6161 intmin_x = x - maxDelta;
6162 intmax_x = x + maxDelta;
6168 doubleclosest = 0.0;
6169 intsmallest_area = -1;
6187smallest_area = ext->
GetArea();
6189}
else if(closest == dist) {
6196&& (ext->
GetArea() < smallest_area
6199smallest_area = ext->
GetArea();
6214 const string& country,
6215 const string& province)
const 6219 doubleclosest = -1.0;
6221 intmin_y = y - maxDelta;
6222 intmax_y = y + maxDelta;
6223 intmin_x = x - maxDelta;
6224 intmax_x = x + maxDelta;
6247 if(closest < 0.0 || closest > dist) {
6261 const string& country2)
const 6284}
else if(scale > 19.5 && scale < 20.5) {
6286}
else if(scale > 99.5 && scale < 100.5) {
6290 return(
int) (distance + 0.5);
6303 returns_map.
Get();
6309 returns_map.
Get();
static void s_ProcessCellLineLine(const CTempString &line)
SStaticPair< const char *, const char * > TParishMapEntry
static const char *const s_Null_Countries[]
static string s_InsertSpacesBetweenTokens(const string &old_str)
static string RepairSingleDigitMonth(const string &orig_date)
double ErrorDistance(double latA, double lonA, double scale)
static CSafeStatic< TQualFixMap > s_IsolationSourceMap
static TCellLineContaminationMap s_CellLineContaminationMap
void s_AddOneDataFile(const string &file_name, const string &data_name, const char **built_in, size_t num_built_in, TQualFixMap &qual_map)
static string s_NormalizeTokens(vector< string > &tokens, vector< double > &numbers, vector< string > &anum, vector< int > &precision, vector< string > &lat_long, vector< string > &nsew)
map< string, string, PNocase > TQualFixMap
DEFINE_STATIC_ARRAY_MAP(TWaterPairMap, sc_WaterPairMap, k_water_pair_map)
static void s_InitializeCellLineContaminationMap(void)
static const size_t k_NumLatLonCountryText
CStaticArrayMap< const char *, const char *, PNocase_CStr > TWaterPairMap
const char * sm_ValidSexQualifierTokens[]
map< string, TSpeciesContaminant > TCellLineContaminationMap
CCountries::EStateCleanup s_DoUSAStateCleanup(string &country)
SStaticPair< const char *, const char * > TStateMapEntry
static const TCStrSet s_Former_CountriesSet(s_Former_Countries, sizeof(s_Former_Countries), __FILE__, __LINE__)
static double DegreesToRadians(double degrees)
static const char * s_ReplaceableCultureNotes[]
static bool s_CellLineContaminationMapInitialized
static void s_InitializeQualMaps(void)
static const TCStrSet s_Null_CountriesSet(s_Null_Countries, sizeof(s_Null_Countries), __FILE__, __LINE__)
bool s_IsState(string &state, bool &modified)
static bool s_FailsGenusOrSpeciesTest(const string &value, const string &taxname)
string s_ShortenLatLon(string &subname)
bool s_IsParish(string &parish)
static const TWaterPairElem k_water_pair_map[]
static const SStaticPair< const char *, const char * > s_map_subregion_fixes[]
CStaticPairArrayMap< const char *, const char *, PCase_CStr > TCStringPairsMap
static bool s_init_UseGeoLocNameForCountry(void)
static void s_ProcessQualMapLine(const CTempString &line, TQualFixMap &qual_map)
CStaticArraySet< const char *, PCase_CStr > TCStrSet
static const SStaticPair< const char *, const char * > s_map_old_country_name_fixes[]
static const TStaticQualFixPair kCellTypePairs[]
static const TStaticQualFixPair kDevStagePairs[]
static string s_RemoveSpacesWithinNumbers(const string &old_str)
static bool s_QualFixupMapsInitialized
static const char * s_RemovableCultureNotes[]
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TParishMap
static string x_FindSurroundingOcean(string &water)
SStaticPair< const char *, const char * > TWaterPairElem
DEFINE_STATIC_FAST_MUTEX(s_CellLineContaminationMutex)
static bool exceptions_initialized
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TStaticQualFixMap
static void s_GetLatLong(const string &new_str, vector< double > &numbers, vector< int > &precision)
bool s_SuppressCountryFix(const string &test)
static bool s_IsNumber(const string &token, double *result=NULL)
static const TParishMapEntry parish_abbrev_array[]
bool s_ChooseMonthAndDay(const string &token1, const string &token2, bool month_first, string &month, int &day, bool &month_ambiguous)
static const TCStrSet s_CountriesSet(s_Countries, sizeof(s_Countries), __FILE__, __LINE__)
const char * sm_ValidSexQualifierPhrases[]
static const char *const s_Countries[]
map< string, TContaminatingCellLine > TSpeciesContaminant
CRowReader< CRowReaderStream_NCBI_TSV > TNCBITSVStream
static CSafeStatic< CCountries::TUsaExceptionMap > exception_map
static const SStaticPair< const char *, const char * > s_map_country_name_fixes[]
bool s_IsValidSexQualifierPhrase(const string &value)
bool s_CompressRunsOfSpaces(string &val)
static const SStaticPair< const char *, const char * > s_map_whole_country_fixes[]
static void s_ReorderNorthSouthEastWest(vector< double > &numbers, vector< int > &precision, const vector< string > &lat_long, vector< string > &nsew)
static const char * s_USAStates[]
SStaticPair< const char *, const char * > TStaticQualFixPair
pair< string, string > TContaminatingCellLine
MAKE_CONST_SET(s_Null_CollectionDatesSet, ct::tagStrCase, { "missing", "missing: control sample", "missing: data agreement established pre-2023", "missing: endangered species", "missing: human-identifiable", "missing: lab stock", "missing: sample group", "missing: synthetic construct", "missing: third party data", "not applicable", "not collected", "not provided", "restricted access", }) string CSubSource
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TStateMap
void s_CollectNumberAndUnits(const string &value, string &number, string &units)
bool s_ContainsWholeWord(const CTempString test, const CTempString word, NStr::ECase case_sense)
static double DistanceOnGlobe(double latA, double lonA, double latB, double lonB)
static const char *const s_Former_Countries[]
static const TStateMapEntry state_abbrev_array[]
static const size_t k_NumLatLonWaterText
static vector< string > x_Tokenize(const string &val)
static string NewFixCountry(const string &input, bool us_territories=false)
static bool WasValid(const string &country)
static string USAStateCleanup(const string &country)
static string WholeCountryFix(string country)
static void x_RemoveDelimitersFromEnds(string &val, bool except_paren=false)
static bool IsValid(const string &country)
static bool ContainsMultipleCountryNames(const string &phrase)
static bool IsSubstringOfStringInList(const string &phrase, const string &country1, size_t pos1)
static void x_FindCountryName(const TCStringPairsMap &fix_map, const vector< string > &countries, string &valid_country, string &orig_valid_country, bool &too_many_countries, bool &bad_cap)
static void ReadUSAExceptionMap(TUsaExceptionMap &exceptions, const string &filepath)
static bool ChangeExtraColonsToCommas(string &country)
static string CapitalizeFirstLetterOfEveryWord(const string &phrase)
static string CountryFixupItem(const string &input, bool capitalize_after_colon)
static string GetCorrectedCountryCapitalization(const string &country)
static void LoadUSAExceptionMap(const TUsaExceptionMap &exceptions)
void AddLine(const CCountryLine *line)
bool DoesOverlap(const CCountryExtreme *other_block) const
string GetLevel0(void) const
CCountryExtreme(const string &country_name, int min_x, int min_y, int max_x, int max_y)
string GetCountry(void) const
bool PreferTo(const CCountryExtreme *other_block, const string country, const string province, const bool prefer_new) const
string GetLevel1(void) const
static int ConvertLat(double y, double scale)
int x_ConvertLat(double y)
CCountryLine(const string &country_name, double y, double min_x, double max_x, double scale)
int x_ConvertLon(double x)
static int ConvertLon(double x, double scale)
ECompare Compare(const CDate &date) const
@ eCompare_before
*this comes first.
@ eCompare_after
*this comes second.
int GetClaimedDistance(void) const
string GetClosestProvince(void) const
void SetFullGuess(string guess)
string GetClaimedFull(void) const
string GetClosestWater(void) const
void SetGuessProvince(string guess)
CLatLonCountryId(float lat, float lon)
int TClassificationFlags
Bitwise OR of "EClassificationFlags".
string GetGuessCountry(void) const
string GetGuessWater(void) const
CLatLonCountryId::TClassificationFlags Classify(string country, string province)
string GetClosestFull(void) const
int GetLandDistance(void) const
string GetClosestCountry(void) const
string GetGuessProvince(void) const
void SetGuessCountry(string guess)
TCountryExtremeList m_CountryExtremes
bool DoCountryBoxesOverlap(const string &country1, const string &country2) const
const CCountryExtreme * GuessRegionForLatLon(double lat, double lon, const string &country=kEmptyStr, const string &province=kEmptyStr) const
const CCountryExtreme * x_FindCountryExtreme(const string &country) const
int TLatLonAdjustFlags
Bitwise OR of "ELatLonAdjustFlags".
bool IsCountryInLatLon(const string &country, double lat, double lon) const
static bool s_CompareTwoLinesByLatLonOnly(const CCountryLine *line1, const CCountryLine *line2)
static bool s_CompareTwoLinesByCountry(const CCountryLine *line1, const CCountryLine *line2)
CLatLonMap_Base(bool is_water)
bool HaveLatLonForRegion(const string &country) const
TCountryLineList m_CountryLineList
const CCountryExtreme * IsNearLatLon(double lat, double lon, double range, double &distance, const string &country, const string &province=kEmptyStr) const
TCountryLineList m_LatLonSortedList
bool IsClosestToLatLon(const string &country, double lat, double lon, double range, double &distance) const
void x_InitFromDefaultList(const char *const *list, int num)
vector< CCountryLine * > TCountryLineList
static bool s_CompareTwoLinesByLatLonThenCountry(const CCountryLine *line1, const CCountryLine *line2)
size_t x_GetLatStartIndex(int y) const
bool x_InitFromFile(const string &filename)
const CCountryExtreme * FindClosestToLatLon(double lat, double lon, double range, double &distance)
static int AdjustAndRoundDistance(double distance, double scale)
static CNcbiApplication * Instance(void)
Singleton method.
static string FixHostCapitalization(const string &value)
Callback style template to iterate over a row stream.
T & Get(void)
Create the variable if not created yet, return the reference.
Root class for all serialization exceptions.
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
TBase::const_iterator const_iterator
const_iterator find(const key_type &key) const
Return a const_iterator pointing to the specified element, or to the end if the element is not found.
const_iterator end() const
Return the end of the controlled sequence.
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
TBase::const_iterator const_iterator
static bool IsISOFormatDate(const string &orig_date)
static bool NCBI_UseGeoLocNameForCountry(void)
static string FixTissueTypeCapitalization(const string &value)
static string FixLatLonPrecision(const string &orig)
static string x_RemoveIsoTime(const string &orig_date)
static string x_ParseDateRangeWithDelimiter(const string &orig_date, CTempString delim)
static string FixSexQualifierValue(const string &value)
static bool IsISOFormatTime(const string &orig_time, int &hour, int &min, int &sec, bool require_time_zone=true)
@ eDateFormatFlag_bad_format
@ eDateFormatFlag_in_future
@ eDateFormatFlag_out_of_order
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
static string GetCollectionDateProblem(const string &date_string)
static bool x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(const string &value, const string &taxname)
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
static string FixLatLonFormat(string orig_lat_lon, bool guess=false)
static bool IsPlasmidNameValid(const string &value, const string &taxname)
static bool x_IsFixableIsoDate(const string &orig_date)
static CRef< CDate > GetDateFromISODate(const string &orig_date)
static string FixIsolationSourceCapitalization(const string &value)
static bool HasCultureNotes(const string &value)
static bool IsValidSexQualifierValue(const string &value)
static string FixCellTypeCapitalization(const string &value)
static vector< string > x_GetDateTokens(const string &orig_date)
void GetLabel(string *str) const
static bool IsMultipleValuesAllowed(TSubtype)
bool IsDeprecated() const
@ eLatLonCountryErr_Value
@ eLatLonCountryErr_State
@ eLatLonCountryErr_Water
@ eLatLonCountryErr_Country
static CLatLonCountryId * x_CalculateLatLonId(float lat_value, float lon_value, string country, string province)
static bool IsISOFormatDateOnly(const string &date)
static bool IsDayValueOkForMonth(int day, int month, int year)
Determine whether day number could occur in month.
static bool IsAltitudeValid(const string &value)
static string ValidateLatLonCountry(const string &countryname, string &lat_lon, bool check_state, ELatLonCountryErr &errcode)
static string FixDateFormat(const string &orig_date)
Attempt to fix the format of the date Returns a blank if the format of the date cannot be determined.
static string CheckCellLine(const string &cell_line, const string &organism)
static string MakeLatLon(double lat_value, double lon_value, int lat_precision=2, int lon_precision=2)
static bool IsCollectionDateAfterTime(const string &collection_date, time_t t, bool &bad_format)
static size_t CheckDateFormat(const string &date_string)
static string x_FormatWithPrecision(double val, int precision)
static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
static int x_GetPrecision(const string &num_str)
static bool NeedsNoText(const TSubtype &subtype)
static bool IsEndogenousVirusNameValid(const string &value)
static bool IsChromosomeNameValid(const string &value, const string &taxname)
static bool x_GenericRepliconNameValid(const string &value)
static CLatLonWaterMap & x_GetLatLonWaterMap(void)
static void IsCorrectLatLonFormat(string lat_lon, bool &format_correct, bool &precision_correct, bool &lat_in_range, bool &lon_in_range, double &lat_value, double &lon_value)
static CRef< CDate > DateFromCollectionDate(const string &str) THROWS((CException))
static bool IsSegmentValid(const string &value)
static string FixDevStageCapitalization(const string &value)
static bool IsLinkageGroupNameValid(const string &value, const string &taxname)
static string FixAltitude(const string &value)
static bool IsDiscouraged(const TSubtype subtype)
static void RemoveCultureNotes(string &value, bool is_species_level=true)
static string FixLabHostCapitalization(const string &value)
static CLatLonCountryMap & x_GetLatLonCountryMap(void)
static void IsCorrectDateFormat(const string &date_string, bool &bad_format, bool &in_future)
static void DetectDateFormat(const string &orig_date, bool &ambiguous, bool &day_first)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
container_type::iterator iterator
static void check_state(const char name[], prfunc print, int erc)
#define test(a, b, c, d, e)
static char line1[1024 *16]
static char line2[1024 *16]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static SQLCHAR output[256]
static const char * str(char *buf, int n)
const CNcbiEnvironment & GetEnvironment(void) const
Get the application's cached environment.
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define NCBI_CATCH(message)
Catch CExceptions as well This macro is deprecated - use *_X or *_XX variant instead of it.
void Warning(CExceptionArgs_Base &args)
#define ENUM_METHOD_NAME(EnumName)
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
virtual string GetString(const string §ion, const string &name, const string &default_value, TFlags flags=0) const
Get the parameter string value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static EEncoding GuessEncoding(const CTempString &src)
Guess the encoding of the C/C++ string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static string ParseEscapes(const CTempString str, EEscSeqRange mode=eEscSeqRange_Standard, char user_char='?')
Parse C-style escape sequences in the specified string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
const char * data(void) const
Return a pointer to the array represented.
char32_t TUnicodeSymbol
Unicode character.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static void TrimSuffixInPlace(string &str, const CTempString suffix, ECase use_case=eCase)
Trim suffix from a string (in-place)
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
size_type length(void) const
Return the length of the represented array.
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
static TUnicodeSymbol Decode(const char *&src)
Convert sequence of UTF8 code units into Unicode code point.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static void TrimPrefixInPlace(string &str, const CTempString prefix, ECase use_case=eCase)
Trim prefix from a string (in-place)
ECase
Which type of string comparison.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string & ToUpper(string &str)
Convert string to upper case â string& version.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static string & ToLower(string &str)
Convert string to lower case â string& version.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ eReverseSearch
Search in a backward direction.
@ eTrunc_Both
Truncate whitespace at both begin and end of string.
@ eTrunc_Begin
Truncate leading whitespace only.
@ eNocase
Case insensitive compare.
int DaysInMonth(void) const
Get number of days in the month.
time_t GetTimeT(void) const
Get time in time_t format.
static int MonthNameToNum(const string &month)
Get numerical value of the month by name.
static string MonthNumToName(int month, ENameFormat format=eFull)
Get name of the month by numerical value.
@ eAbbr
Use abbreviated name.
const TAttrib & GetAttrib(void) const
Get the Attrib member data.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
void ResetName(void)
Reset Name data member.
TName & SetName(void)
Assign a value to Name data member.
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
const TName & GetName(void) const
Get the Name member data.
bool IsSetAttrib(void) const
attribution/source of this name Check if a value has been assigned to Attrib data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ eSubtype_collection_date
DD-MMM-YYYY format.
@ eSubtype_insertion_seq_name
@ eSubtype_transposon_name
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_lat_lon
+/- decimal degrees
@ eSubtype_rev_primer_name
@ eSubtype_collected_by
name of person who collected the sample
@ eSubtype_fwd_primer_name
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_isolation_source
@ eSubtype_environmental_sample
@ eSubtype_identified_by
name of person who identified the sample
@ eSubtype_whole_replicon
void SetYear(TYear value)
Assign a value to Year data member.
void SetMonth(TMonth value)
Assign a value to Month data member.
TStd & SetStd(void)
Select the variant.
void SetDay(TDay value)
Assign a value to Day data member.
unsigned int
A callback function used to compare two keys in a database.
where both of them are integers Note
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
std::true_type tagStrCase
std::false_type tagStrNocase
constexpr auto sort(_Init &&init)
const GenericPointer< typename T::ValueType > T2 value
Static variables safety - create on demand, destroy on application termination.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Uint4 TFieldNo
Field number (zero based)
Generic utility macros and templates for exploring NCBI objects.
#define BEGIN_COMMA_END(container)
static SLJIT_INLINE sljit_ins lr(sljit_gpr dst, sljit_gpr src)
#define row(bind, expected)
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
static const char * type_name(CS_INT value)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4