Шаги для преобразования: Document-> Sentences-> Tokens-> POS-> Lemmas
blockquote>import nltk from nltk.stem import WordNetLemmatizer from nltk.corpus import wordnet #example text text = 'What can I say about this place. The staff of these restaurants is nice and the eggplant is not bad' class Splitter(object): """ split the document into sentences and tokenize each sentence """ def __init__(self): self.splitter = nltk.data.load('tokenizers/punkt/english.pickle') self.tokenizer = nltk.tokenize.TreebankWordTokenizer() def split(self,text): """ out : ['What', 'can', 'I', 'say', 'about', 'this', 'place', '.'] """ # split into single sentence sentences = self.splitter.tokenize(text) # tokenization in each sentences tokens = [self.tokenizer.tokenize(sent) for sent in sentences] return tokens class LemmatizationWithPOSTagger(object): def __init__(self): pass def get_wordnet_pos(self,treebank_tag): """ return WORDNET POS compliance to WORDENT lemmatization (a,n,r,v) """ if treebank_tag.startswith('J'): return wordnet.ADJ elif treebank_tag.startswith('V'): return wordnet.VERB elif treebank_tag.startswith('N'): return wordnet.NOUN elif treebank_tag.startswith('R'): return wordnet.ADV else: # As default pos in lemmatization is Noun return wordnet.NOUN def pos_tag(self,tokens): # find the pos tagginf for each tokens [('What', 'WP'), ('can', 'MD'), ('I', 'PRP') .... pos_tokens = [nltk.pos_tag(token) for token in tokens] # lemmatization using pos tagg # convert into feature set of [('What', 'What', ['WP']), ('can', 'can', ['MD']), ... ie [original WORD, Lemmatized word, POS tag] pos_tokens = [ [(word, lemmatizer.lemmatize(word,self.get_wordnet_pos(pos_tag)), [pos_tag]) for (word,pos_tag) in pos] for pos in pos_tokens] return pos_tokens lemmatizer = WordNetLemmatizer() splitter = Splitter() lemmatization_using_pos_tagger = LemmatizationWithPOSTagger() #step 1 split document into sentence followed by tokenization tokens = splitter.split(text) #step 2 lemmatization using pos tagger lemma_pos_token = lemmatization_using_pos_tagger.pos_tag(tokens) print(lemma_pos_token)
Вы можете использовать @Query для сложных запросов, которые довольно просты в использовании.
Используя @Query, ваш код будет выглядеть примерно так:
@Query(SELECT v FROM vwFact v WHERE v.applicationId = :applicationId and v.mdName01 = :mdName01)
Page<vwFact> findAllByApplicationIdAndMdName01StartingWithOrApplicationIdAndMdName02StartingWithOrApplicationIdAndMdName03StartingWithOrApplicationIdAndMdName04StartingWith (
@Param(value = "applicationId") String applicationId,
@Param(value = "mdName01") String mdName01,
@Param(value = "mdName02") String mdName02,
@Param(value = "mdName03") String mdName03,
@Param(value = "mdName04") String mdName04,
Pageable pageable
);