Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from inflect import engine 

2from tornado.template import Template 

3 

4from gramex.apps.nlg.nlgutils import load_spacy_model, set_nlg_gramopt, get_lemmatizer 

5 

6infl = engine() 

7 

8 

9def is_plural_noun(text): 

10 """Whether given text is a plural noun.""" 

11 doc = load_spacy_model()(text) 

12 for t in list(doc)[::-1]: 12 ↛ 15line 12 didn't jump to line 15, because the loop on line 12 didn't complete

13 if not t.is_punct: 13 ↛ 12line 13 didn't jump to line 12, because the condition on line 13 was never false

14 return t.tag_ in ('NNS', 'NNPS') 

15 return False 

16 

17 

18is_singular_noun = lambda x: not is_plural_noun(x) # NOQA: E731 

19 

20 

21@set_nlg_gramopt(source='G', fe_name="Concate Items") 

22def concatenate_items(items, sep=", "): 

23 """Concatenate a sequence of tokens into an English string. 

24 

25 Parameters 

26 ---------- 

27 

28 items : list-like 

29 List / sequence of items to be printed. 

30 sep : str, optional 

31 Separator to use when generating the string 

32 

33 Returns 

34 ------- 

35 str 

36 """ 

37 if len(items) == 0: 

38 return "" 

39 if len(items) == 1: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 return items[0] 

41 items = list(map(str, items)) 

42 if sep == ", ": 

43 s = sep.join(items[:-1]) 

44 s += " and " + items[-1] 

45 else: 

46 s = sep.join(items) 

47 return s 

48 

49 

50@set_nlg_gramopt(source='G', fe_name="Pluralize") 

51def plural(word): 

52 """Pluralize a word. 

53 

54 Parameters 

55 ---------- 

56 

57 word : str 

58 word to pluralize 

59 

60 Returns 

61 ------- 

62 str 

63 Plural of `word` 

64 """ 

65 if not is_plural_noun(word): 

66 word = infl.plural(word) 

67 return word 

68 

69 

70@set_nlg_gramopt(source='G', fe_name="Singularize") 

71def singular(word): 

72 """ 

73 Singularize a word. 

74 

75 Parameters 

76 ---------- 

77 word : str 

78 Word to singularize. 

79 

80 Returns 

81 ------- 

82 str 

83 Singular of `word`. 

84 """ 

85 

86 if is_plural_noun(word): 

87 word = infl.singular_noun(word) 

88 return word 

89 

90 

91# @set_nlg_gramopt(source='G', fe_name="Pluralize by") 

92def pluralize_by(word, by): 

93 """ 

94 Pluralize a word depending on another argument. 

95 

96 Parameters 

97 ---------- 

98 word : str 

99 Word to pluralize 

100 by : any 

101 Any object checked for a pluralish value. If a sequence, it must have 

102 length greater than 1 to qualify as plural. 

103 

104 Returns 

105 ------- 

106 str 

107 Plural or singular of `word`. 

108 """ 

109 

110 if hasattr(by, '__iter__'): 

111 if len(by) > 1: 

112 word = plural(word) 

113 else: 

114 word = singular(word) 

115 else: 

116 if by > 1: 

117 word = plural(word) 

118 else: 

119 word = singular(word) 

120 return word 

121 

122 

123# @set_nlg_gramopt(source='G', fe_name="Pluralize like") 

124def pluralize_like(x, y): 

125 """ 

126 Pluralize a word if another is a plural. 

127 

128 Parameters 

129 ---------- 

130 x : str 

131 The word to pluralize. 

132 y : str 

133 The word to check. 

134 

135 Returns 

136 ------- 

137 str 

138 Plural of `x` if `y` is plural, else singular. 

139 """ 

140 

141 if not is_plural_noun(y): 

142 return singular(x) 

143 return plural(x) 

144 

145 

146@set_nlg_gramopt(source='str', fe_name="Capitalize") 

147def capitalize(word): 

148 return word.capitalize() 

149 

150 

151@set_nlg_gramopt(source='str', fe_name="Lowercase") 

152def lower(word): 

153 return word.lower() 

154 

155 

156@set_nlg_gramopt(source='str', fe_name="Swapcase") 

157def swapcase(word): 

158 return word.swapcase() 

159 

160 

161@set_nlg_gramopt(source='str', fe_name="Title") 

162def title(word): 

163 return word.title() 

164 

165 

166@set_nlg_gramopt(source='str', fe_name="Uppercase") 

167def upper(word): 

168 return word.upper() 

169 

170 

171# @set_nlg_gramopt(source="G", fe_name="Lemmatize") 

172def lemmatize(word, target_pos): 

173 return get_lemmatizer()(word, target_pos) 

174 

175 

176def _token_inflections(x, y): 

177 """ 

178 If two words share the same root, find lexical changes required for turning 

179 one into another. 

180 

181 Parameters 

182 ---------- 

183 x : spacy.token.Tokens 

184 y : spacy.token.Tokens 

185 

186 Examples 

187 -------- 

188 >>> _token_inflections('language', 'Language') 

189 'upper' 

190 >>> _token_inflections('language', 'languages') 

191 'plural' 

192 """ 

193 if x.lemma_ != y.lemma_: 193 ↛ 194line 193 didn't jump to line 194, because the condition on line 193 was never true

194 return False 

195 if len(x.text) == len(y.text): 

196 for methname in ['capitalize', 'lower', 'swapcase', 'title', 'upper']: 

197 func = lambda x: getattr(x, methname)() # NOQA: E731 

198 if func(x.text) == y.text: 198 ↛ 199line 198 didn't jump to line 199, because the condition on line 198 was never true

199 return globals()[methname] 

200 # check if x and y are singulars or plurals of each other. 

201 if is_singular_noun(y.text): 201 ↛ 204line 201 didn't jump to line 204, because the condition on line 201 was never false

202 if singular(x.text).lower() == y.text.lower(): 

203 return singular 

204 elif is_plural_noun(y.text): 

205 if plural(x.text).lower() == y.text.lower(): 

206 return plural 

207 # Disable detecting inflections until they can be 

208 # processed without intervention. 

209 # if x.pos_ != y.pos_: 

210 # return lemmatize 

211 return False 

212 

213 

214def find_inflections(text, search, fh_args, df): 

215 """ 

216 Find lexical inflections between words in input text and the search results 

217 obtained from FormHandler arguments and dataframes. 

218 

219 Parameters 

220 ---------- 

221 text : str 

222 Input text 

223 search : gramex.apps.nlg.search.DFSearchResults 

224 The DFSearchResults object corresponding to `text` and `df` 

225 fh_args : dict 

226 FormHandler arguments. 

227 df : pandas.DataFrame 

228 The source dataframe. 

229 

230 Returns 

231 ------- 

232 dict 

233 With keys as tokens found in the dataframe or FH args, and values as 

234 list of inflections applied on them to make them closer match tokens in `text`. 

235 """ 

236 nlp = load_spacy_model() 

237 text = nlp(text) 

238 inflections = {} 

239 for token, tklist in search.items(): 

240 tmpl = [t['tmpl'] for t in tklist if t.get('enabled', False)][0] 

241 rendered = Template('{{{{ {} }}}}'.format(tmpl)).generate( 

242 df=df, fh_args=fh_args).decode('utf8') 

243 if rendered != token: 

244 x = nlp(rendered)[0] 

245 y = text[[c.text for c in text].index(token)] 

246 infl = _token_inflections(x, y) 

247 if infl: 

248 inflections[token] = [infl] 

249 return inflections