Coverage for gramex\apps\nlg\grammar.py : 80%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from inflect import engine
2from tornado.template import Template
4from gramex.apps.nlg.nlgutils import load_spacy_model, set_nlg_gramopt, get_lemmatizer
6infl = engine()
9def is_plural_noun(text):
10 """Whether given text is a plural noun."""
11 doc = load_spacy_model()(text)
12 for t in list(doc)[::-1]: 12 ↛ 15line 12 didn't jump to line 15, because the loop on line 12 didn't complete
13 if not t.is_punct: 13 ↛ 12line 13 didn't jump to line 12, because the condition on line 13 was never false
14 return t.tag_ in ('NNS', 'NNPS')
15 return False
18is_singular_noun = lambda x: not is_plural_noun(x) # NOQA: E731
21@set_nlg_gramopt(source='G', fe_name="Concate Items")
22def concatenate_items(items, sep=", "):
23 """Concatenate a sequence of tokens into an English string.
25 Parameters
26 ----------
28 items : list-like
29 List / sequence of items to be printed.
30 sep : str, optional
31 Separator to use when generating the string
33 Returns
34 -------
35 str
36 """
37 if len(items) == 0:
38 return ""
39 if len(items) == 1: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true
40 return items[0]
41 items = list(map(str, items))
42 if sep == ", ":
43 s = sep.join(items[:-1])
44 s += " and " + items[-1]
45 else:
46 s = sep.join(items)
47 return s
50@set_nlg_gramopt(source='G', fe_name="Pluralize")
51def plural(word):
52 """Pluralize a word.
54 Parameters
55 ----------
57 word : str
58 word to pluralize
60 Returns
61 -------
62 str
63 Plural of `word`
64 """
65 if not is_plural_noun(word):
66 word = infl.plural(word)
67 return word
70@set_nlg_gramopt(source='G', fe_name="Singularize")
71def singular(word):
72 """
73 Singularize a word.
75 Parameters
76 ----------
77 word : str
78 Word to singularize.
80 Returns
81 -------
82 str
83 Singular of `word`.
84 """
86 if is_plural_noun(word):
87 word = infl.singular_noun(word)
88 return word
91# @set_nlg_gramopt(source='G', fe_name="Pluralize by")
92def pluralize_by(word, by):
93 """
94 Pluralize a word depending on another argument.
96 Parameters
97 ----------
98 word : str
99 Word to pluralize
100 by : any
101 Any object checked for a pluralish value. If a sequence, it must have
102 length greater than 1 to qualify as plural.
104 Returns
105 -------
106 str
107 Plural or singular of `word`.
108 """
110 if hasattr(by, '__iter__'):
111 if len(by) > 1:
112 word = plural(word)
113 else:
114 word = singular(word)
115 else:
116 if by > 1:
117 word = plural(word)
118 else:
119 word = singular(word)
120 return word
123# @set_nlg_gramopt(source='G', fe_name="Pluralize like")
124def pluralize_like(x, y):
125 """
126 Pluralize a word if another is a plural.
128 Parameters
129 ----------
130 x : str
131 The word to pluralize.
132 y : str
133 The word to check.
135 Returns
136 -------
137 str
138 Plural of `x` if `y` is plural, else singular.
139 """
141 if not is_plural_noun(y):
142 return singular(x)
143 return plural(x)
146@set_nlg_gramopt(source='str', fe_name="Capitalize")
147def capitalize(word):
148 return word.capitalize()
151@set_nlg_gramopt(source='str', fe_name="Lowercase")
152def lower(word):
153 return word.lower()
156@set_nlg_gramopt(source='str', fe_name="Swapcase")
157def swapcase(word):
158 return word.swapcase()
161@set_nlg_gramopt(source='str', fe_name="Title")
162def title(word):
163 return word.title()
166@set_nlg_gramopt(source='str', fe_name="Uppercase")
167def upper(word):
168 return word.upper()
171# @set_nlg_gramopt(source="G", fe_name="Lemmatize")
172def lemmatize(word, target_pos):
173 return get_lemmatizer()(word, target_pos)
176def _token_inflections(x, y):
177 """
178 If two words share the same root, find lexical changes required for turning
179 one into another.
181 Parameters
182 ----------
183 x : spacy.token.Tokens
184 y : spacy.token.Tokens
186 Examples
187 --------
188 >>> _token_inflections('language', 'Language')
189 'upper'
190 >>> _token_inflections('language', 'languages')
191 'plural'
192 """
193 if x.lemma_ != y.lemma_: 193 ↛ 194line 193 didn't jump to line 194, because the condition on line 193 was never true
194 return False
195 if len(x.text) == len(y.text):
196 for methname in ['capitalize', 'lower', 'swapcase', 'title', 'upper']:
197 func = lambda x: getattr(x, methname)() # NOQA: E731
198 if func(x.text) == y.text: 198 ↛ 199line 198 didn't jump to line 199, because the condition on line 198 was never true
199 return globals()[methname]
200 # check if x and y are singulars or plurals of each other.
201 if is_singular_noun(y.text): 201 ↛ 204line 201 didn't jump to line 204, because the condition on line 201 was never false
202 if singular(x.text).lower() == y.text.lower():
203 return singular
204 elif is_plural_noun(y.text):
205 if plural(x.text).lower() == y.text.lower():
206 return plural
207 # Disable detecting inflections until they can be
208 # processed without intervention.
209 # if x.pos_ != y.pos_:
210 # return lemmatize
211 return False
214def find_inflections(text, search, fh_args, df):
215 """
216 Find lexical inflections between words in input text and the search results
217 obtained from FormHandler arguments and dataframes.
219 Parameters
220 ----------
221 text : str
222 Input text
223 search : gramex.apps.nlg.search.DFSearchResults
224 The DFSearchResults object corresponding to `text` and `df`
225 fh_args : dict
226 FormHandler arguments.
227 df : pandas.DataFrame
228 The source dataframe.
230 Returns
231 -------
232 dict
233 With keys as tokens found in the dataframe or FH args, and values as
234 list of inflections applied on them to make them closer match tokens in `text`.
235 """
236 nlp = load_spacy_model()
237 text = nlp(text)
238 inflections = {}
239 for token, tklist in search.items():
240 tmpl = [t['tmpl'] for t in tklist if t.get('enabled', False)][0]
241 rendered = Template('{{{{ {} }}}}'.format(tmpl)).generate(
242 df=df, fh_args=fh_args).decode('utf8')
243 if rendered != token:
244 x = nlp(rendered)[0]
245 y = text[[c.text for c in text].index(token)]
246 infl = _token_inflections(x, y)
247 if infl:
248 inflections[token] = [infl]
249 return inflections