hdx.utilities.text
Text processing utilities.
remove_end_characters
def remove_end_characters(string: str,
characters_to_remove: str = punctuation) -> str
Remove any characters at end of string that are in characters_to_remove.
Arguments:
string
str - Input stringcharacters_to_remove
str - Characters to remove. Defaults to punctuation.
Returns:
str
- String with any characters at end of string that are in characters_to_remove removed
remove_from_end
def remove_from_end(string: str,
things_to_remove: List[str],
logging_text: Optional[str] = None,
whole_words: bool = True) -> str
Remove list of items from end of string, stripping any whitespace.
Arguments:
string
str - Input stringthings_to_remove
List[str] - Things to remove from the end of stringlogging_text
Optional[str] - Text to log. Defaults to None.whole_words
bool - Remove parts of or whole words. Defaults to True (whole words only).
Returns:
str
- String with text removed
remove_string
def remove_string(string: str,
toremove: str,
end_characters_to_remove: str = punctuation) -> str
Remove string from another string and delete any preceding end characters - by default punctuation (eg. comma) and any whitespace following the punctuation
Arguments:
string
str - String to processtoremove
str - String to removeend_characters_to_remove
str - Characters to remove. Defaults to punctuation.
Returns:
str
- String with other string removed
multiple_replace
def multiple_replace(string: str, replacements: Dict[str, str]) -> str
Simultaneously replace multiple strings in a string.
Arguments:
string
str - Input stringreplacements
Dict[str,str] - Replacements dictionary
Returns:
str
- String with replacements
get_words_in_sentence
def get_words_in_sentence(sentence: str) -> List[str]
Returns list of words in a sentence.
Arguments:
sentence
str - Sentence
Returns:
List[str]
- List of words in sentence
get_matching_text_in_strs
def get_matching_text_in_strs(a: str,
b: str,
match_min_size: int = 30,
ignore: str = "",
end_characters: str = "") -> List[str]
Returns a list of matching blocks of text in a and b.
Arguments:
a
str - First string to matchb
str - Second string to matchmatch_min_size
int - Minimum block size to match on. Defaults to 30.ignore
str - Any characters to ignore in matching. Defaults to ''.end_characters
str - End characters to look for. Defaults to ''.
Returns:
List[str]
- List of matching blocks of text
get_matching_text
def get_matching_text(string_list: List[str],
match_min_size: int = 30,
ignore: str = "",
end_characters: str = ".!\r\n") -> str
Returns a string containing matching blocks of text in a list of strings followed by non-matching.
Arguments:
string_list
List[str] - List of strings to matchmatch_min_size
int - Minimum block size to match on. Defaults to 30.ignore
str - Any characters to ignore in matching. Defaults to ''.end_characters
str - End characters to look for. Defaults to '. '.
Returns:
str
- String containing matching blocks of text followed by non-matching
get_matching_then_nonmatching_text
def get_matching_then_nonmatching_text(string_list: List[str],
separator: str = "",
match_min_size: int = 30,
ignore: str = "",
end_characters: str = ".!\r\n") -> str
Returns a string containing matching blocks of text in a list of strings followed by non-matching.
Arguments:
string_list
List[str] - List of strings to matchseparator
str - Separator to add between blocks of text. Defaults to ''.match_min_size
int - Minimum block size to match on. Defaults to 30.ignore
str - Any characters to ignore in matching. Defaults to ''.end_characters
str - End characters to look for. Defaults to '. '.
Returns:
str
- String containing matching blocks of text followed by non-matching
number_format
def number_format(val: Any,
format: str = "%.4f",
trailing_zeros: bool = True) -> str
Format float-castable input as string.
Arguments:
val
float - Number to formatformat
str - Format to use. Defaults to %.4f.trailing_zeros
bool - Leave trailing zeros. Defaults to True.
Returns:
str
- Formatted number as string
get_fraction_str
def get_fraction_str(numerator: Any,
denominator: Optional[Any] = None,
format: str = "%.4f",
trailing_zeros: bool = True) -> str
Given float-castable numerator and optional float-castable denominator, format as string, returning '' for invalid numerator or 0 denominator.
Arguments:
numerator
float - Numeratordenominator
Optional[float] - Denominator. Defaults to None.format
str - Format to use. Defaults to %.4f.trailing_zeros
bool - Leave trailing zeros. Defaults to True.
Returns:
str
- Formatted number as string
only_allowed_in_str
def only_allowed_in_str(test_str: str, allowed_chars: Set) -> bool
Returns True if test string contains only allowed characters, False if not.
Arguments:
test_str
str - Test stringallowed_chars
Set - Set of allowed characters
Returns:
bool
- True if test string contains only allowed characters, False if not
get_numeric_if_possible
def get_numeric_if_possible(value: Any) -> Any
Return val if it is not a string, otherwise see if it can be cast to float or int, taking into account commas and periods.
Arguments:
value
Any - Value
Returns:
Any
- Value
earliest_index
def earliest_index(string_to_search: str,
strings_to_try: ListTuple[str]) -> Optional[int]
Search a string for each of a list of strings and return the earliest index.
Arguments:
string_to_search
str - String to searchstrings_to_try
ListTuple[str] - Strings to try
Returns:
Optional[int]
- Earliest index of the strings to try in string to search or None
match_template_variables
def match_template_variables(
string: str) -> Tuple[Optional[str], Optional[str]]
Try to match {{XXX}} in input string.
Arguments:
string
str - String in which to look for template
Returns:
Tuple[Optional[str], Optional[str]]: (Matched string with brackets, matched string without brackets)