#!/usr/bin/env python

# -*- coding: utf-8 -*-

# Copyright 2015 moco_beta
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from janome.tokenizer import Tokenizer
from argparse import ArgumentParser
import sys

PY3 = sys.version_info[0] == 3

import sys, signal
def signal_handler(signal, frame):
    sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)


def main():
    parser = ArgumentParser()
    parser.add_argument("-e", "--enc", dest="enc", default="utf8", help="input encoding. default is utf8")
    parser.add_argument("--udic", dest="udic", default="", help="path to user dictionary file")
    parser.add_argument("--udic-enc", dest="udic_enc", default="utf8", help="user dictionary encoding. default is utf8")
    parser.add_argument("--udic-type", dest="udic_type", default="ipadic", help="user dictionary type, 'ipadic' or 'simpledic.' default is 'ipadic'")
    args = parser.parse_args()

    t = Tokenizer(udic=args.udic,
                  udic_enc=args.udic_enc,
                  udic_type=args.udic_type)
    while True:
        try:
            line = input() if PY3 else raw_input().decode(args.enc)
            for token in t.tokenize(line):
                print(token)
        except EOFError:
            break
        
if __name__ == '__main__':
    main()
    
