Source code for finntk.omor.tok

"""
Functions for basic processing of OMorFi tokens.
"""


[docs]def get_token_positions(tokenised, text): """ Returns the start positions of a series of tokens produced by Omorfi.tokenise(...) """ starts = [] start = 0 for token in tokenised: start = text.index(token["surf"], start) starts.append(start) start += len(token["surf"]) return starts
def form_of_tok(token): if isinstance(token, str): return token.lower() else: return token["surf"].lower()