Skip to content

Commit

Permalink
Merge pull request facebookresearch#104 from dfenglei/master
Browse files Browse the repository at this point in the history
continue to fix for membership in the set
  • Loading branch information
ajfisch committed Mar 28, 2018
2 parents 8a3da1f + a9f3385 commit 36a6e3f
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions drqa/tokenizers/spacy_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@ def __init__(self, **kwargs):
model = kwargs.get('model', 'en')
self.annotators = copy.deepcopy(kwargs.get('annotators', set()))
nlp_kwargs = {'parser': False}
if not {'lemma', 'pos', 'ner'} in self.annotators:
if not any([p in self.annotators for p in ['lemma', 'pos', 'ner']]):
nlp_kwargs['tagger'] = False
if not {'ner'} in self.annotators:
if 'ner' not in self.annotators:
nlp_kwargs['entity'] = False
self.nlp = spacy.load(model, **nlp_kwargs)

def tokenize(self, text):
# We don't treat new lines as tokens.
clean_text = text.replace('\n', ' ')
tokens = self.nlp.tokenizer(clean_text)
if {'lemma', 'pos', 'ner'} in self.annotators:
if any([p in self.annotators for p in ['lemma', 'pos', 'ner']]):
self.nlp.tagger(tokens)
if {'ner'} in self.annotators:
if 'ner' in self.annotators:
self.nlp.entity(tokens)

data = []
Expand Down

0 comments on commit 36a6e3f

Please sign in to comment.