protot/src/string_utils.h

190 lines
5.8 KiB
C++

/*
* Puppeteer - A Motion Capture Mapping Tool
* Copyright (c) 2013-2015 Martin Felis <martin.felis@iwr.uni-heidelberg.de>.
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE*
*/
#ifndef _STRING_UTILS_H
#define _STRING_UTILS_H
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>
const std::string whitespaces_std (" \t\n\r");
const std::string invalid_name_characters = "{}[],;: \r\n\t#";
inline std::string strip_comments (const std::string &line) {
return line.substr (0, line.find ('#'));
}
inline std::string strip_whitespaces (const std::string &line, std::string whitespaces = whitespaces_std) {
std::string result (line);
if (result.find_first_of (whitespaces) != std::string::npos) {
result = result.substr (result.find_first_not_of (whitespaces), result.size());
}
while (whitespaces.find (result[result.size() - 1]) != std::string::npos) {
result = result.substr (0, result.size() - 1);
}
return result;
}
inline std::string tolower (const std::string &line) {
std::string result (line);
for (size_t i = 0; i < line.size(); i++)
result[i] = tolower(result[i]);
return result;
}
inline std::string trim_line (const std::string &line) {
return tolower (strip_whitespaces (strip_comments (line)));
}
inline std::vector<std::string> tokenize (const std::string &line_in, std::string delimiter=whitespaces_std) {
std::vector<std::string> result;
std::string line = line_in;
while (line.find_first_of (delimiter) != std::string::npos) {
std::string token = line.substr (0, line.find_first_of (delimiter));
line = line.substr (token.size() + 1, line.size());
result.push_back (token);
}
if (line.size() > 0)
result.push_back (line);
return result;
}
inline std::vector<std::string> tokenize_strip_whitespaces (const std::string &line_in, std::string delimiter=",\t\r\n", std::string whitespaces=whitespaces_std) {
std::vector<std::string> result;
std::string line = line_in;
while (line.find_first_of (delimiter) != std::string::npos) {
std::string token = line.substr (0, line.find_first_of (delimiter));
line = line.substr (token.size() + 1, line.size());
result.push_back (strip_whitespaces(token, whitespaces));
}
if (line.size() > 0)
result.push_back (strip_whitespaces(line, whitespaces));
return result;
}
inline std::vector<std::string> tokenize_csv_strip_whitespaces (const std::string &line_in, const std::string whitespaces=whitespaces_std) {
std::vector<std::string> result;
std::string line = line_in;
size_t search_start = 0;
// first replace all whitespaces by regular spaces to simplify things
search_start = line.find_first_of (whitespaces);
while (search_start != std::string::npos) {
line[search_start] = ' ';
search_start = line.find_first_of (whitespaces, search_start + 1);
}
search_start = 0;
while (line.size() > 0) {
size_t separator_pos = std::string::npos;
separator_pos = line.find (", ");
// std::cout << " search '" << ", " << "' in '" << search_start << "' found at " << separator_pos << std::endl;
if (separator_pos == std::string::npos) {
separator_pos = line.size();
}
std::string token = line.substr (0, separator_pos);
// std::cout << "token = '" << token << "' separator_pos = " << separator_pos << std::endl;
token = strip_whitespaces (token, whitespaces);
if (token[token.size() - 1] == ',')
token = token.substr (0, token.size() - 1);
result.push_back (token);
if (line.size() <= separator_pos + 2)
break;
line = line.substr (separator_pos + 2, line.size());
}
return result;
}
/** Counts the number of occurrences of a list of characters.
*
* \param line_in The hay-stack to search for.
* \param characters The needles.
* \return The sum of occurrences of all needles found in the hay-stack.
*/
inline int count_char (const std::string &line_in, const std::string characters) {
int count = 0;
size_t index = 0;
size_t char_pos = line_in.find_first_of (characters, index);
while (char_pos != std::string::npos) {
index = char_pos + 1;
count ++;
char_pos = line_in.find_first_of (characters, index) ;
}
return count;
}
inline bool is_numeric (const std::string &line) {
for (unsigned int i = 0; i < line.size(); i++) {
if (!isdigit(line[i]))
return false;
}
return true;
}
inline std::string sanitize_name (const std::string &name) {
std::string name_sanitized = name;
if (is_numeric(name)) {
std::cerr << "Warning invalid name '" << name << "': name should not be numeric only!" << std::endl;
name_sanitized = std::string("_") + name;
}
// check for invalid characters
if (name.find_first_of (invalid_name_characters) != std::string::npos) {
std::cerr << "Error: Found invalid character '"
<< name[name.find_first_of (invalid_name_characters)]
<< "' in name '" << name << "'!" << std::endl;
abort();
}
return name_sanitized;
}
#endif