cabo_cha v1.0.0 CaboCha View Source

Elixir bindings for CaboCha, a Japanese dependency structure analyzer.

Parse function resturns a list of map. The map’s keys meaning is a follows.

  • chunk: 文節(Chunk) — This is map which includes follows.

    • id: 文節id(Chunk id)
    • link: 係り先の文節id(Linked chunk id)
    • rel: 不明(Unknown)
    • head: 主辞の形態素id(Head morpheme id)
    • func: 機能語の形態素id(Function word morpheme id)
    • score: 係り関係のスコア(Relational score)
  • morphs: 分節の中の単語の形態素のリスト(List of morpheme in chunk) — The list have few maps which includes follows.

    • id: 形態素id(Morpheme id)
    • surface: 表層系(Surface)
    • pos: 品詞(Part of speech)
    • pos1: 品詞細分類1(Part of speech subcategory1)
    • pos2: 品詞細分類2(Part of speech subcategory2)
    • pos3: 品詞細分類3(Part of speech subcategory3)
    • conjugation_form: 活用形(Conjugation form)
    • conjugation: 活用形(conjucation)
    • base: 基本形・原型(Lexical form)
    • yomi: 読み(Reading)
    • pronunciation: 発音(Pronunciation)

Link to this section Summary

Functions

Parse given string and returns CaboCha’s list

Link to this section Functions

Link to this function parse(str, read_from_file \\ false) View Source
parse(String.t(), boolean()) :: [Map.t(), ...]

Parse given string and returns CaboCha’s list.

If read_from_file is true, The file is parsed using str as the filename

Examples

iex> CaboCha.parse("太郎は花子が読んでいる本を次郎に渡した")
[
  %{
    "chunk" => %{
      "func" => 1,
      "head" => 0,
      "id" => 0,
      "link" => 5,
      "rel" => "D",
      "score" => -0.742128
    },
    "morphs" => [
      %{
        "base" => "太郎",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 0,
        "pos" => "名詞",
        "pos1" => "固有名詞",
        "pos2" => "人名",
        "pos3" => "名",
        "pronunciation" => "タロー",
        "surface" => "太郎",
        "yomi" => "タロウ"
      },
      %{
        "base" => "は",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 1,
        "pos" => "助詞",
        "pos1" => "係助詞",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "ワ",
        "surface" => "は",
        "yomi" => "ハ"
      }
    ]
  },
  %{
    "chunk" => %{
      "func" => 3,
      "head" => 2,
      "id" => 1,
      "link" => 2,
      "rel" => "D",
      "score" => 1.700175
    },
    "morphs" => [
      %{
        "base" => "花子",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 2,
        "pos" => "名詞",
        "pos1" => "固有名詞",
        "pos2" => "人名",
        "pos3" => "名",
        "pronunciation" => "ハナコ",
        "surface" => "花子",
        "yomi" => "ハナコ"
      },
      %{
        "base" => "が",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 3,
        "pos" => "助詞",
        "pos1" => "格助詞",
        "pos2" => "一般",
        "pos3" => "",
        "pronunciation" => "ガ",
        "surface" => "が",
        "yomi" => "ガ"
      }
    ]
  },
  %{
    "chunk" => %{
      "func" => 6,
      "head" => 4,
      "id" => 2,
      "link" => 3,
      "rel" => "D",
      "score" => 1.825021
    },
    "morphs" => [
      %{
        "base" => "読む",
        "conjugation" => "連用タ接続",
        "conjugation_form" => "五段・マ行",
        "id" => 4,
        "pos" => "動詞",
        "pos1" => "自立",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "ヨン",
        "surface" => "読ん",
        "yomi" => "ヨン"
      },
      %{
        "base" => "で",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 5,
        "pos" => "助詞",
        "pos1" => "接続助詞",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "デ",
        "surface" => "で",
        "yomi" => "デ"
      },
      %{
        "base" => "いる",
        "conjugation" => "基本形",
        "conjugation_form" => "一段",
        "id" => 6,
        "pos" => "動詞",
        "pos1" => "非自立",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "イル",
        "surface" => "いる",
        "yomi" => "イル"
      }
    ]
  },
  %{
    "chunk" => %{
      "func" => 8,
      "head" => 7,
      "id" => 3,
      "link" => 5,
      "rel" => "D",
      "score" => -0.742128
    },
    "morphs" => [
      %{
        "base" => "本",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 7,
        "pos" => "名詞",
        "pos1" => "一般",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "ホン",
        "surface" => "本",
        "yomi" => "ホン"
      },
      %{
        "base" => "を",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 8,
        "pos" => "助詞",
        "pos1" => "格助詞",
        "pos2" => "一般",
        "pos3" => "",
        "pronunciation" => "ヲ",
        "surface" => "を",
        "yomi" => "ヲ"
      }
    ]
  },
  %{
    "chunk" => %{
      "func" => 11,
      "head" => 10,
      "id" => 4,
      "link" => 5,
      "rel" => "D",
      "score" => -0.742128
    },
    "morphs" => [
      %{
        "base" => "次",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 9,
        "pos" => "名詞",
        "pos1" => "一般",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "ツギ",
        "surface" => "次",
        "yomi" => "ツギ"
      },
      %{
        "base" => "郎",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 10,
        "pos" => "名詞",
        "pos1" => "一般",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "ロー",
        "surface" => "郎",
        "yomi" => "ロウ"
      },
      %{
        "base" => "に",
        "conjugation" => "",
        "conjugation_form" => "",
        "id" => 11,
        "pos" => "助詞",
        "pos1" => "格助詞",
        "pos2" => "一般",
        "pos3" => "",
        "pronunciation" => "ニ",
        "surface" => "に",
        "yomi" => "ニ"
      }
    ]
  },
  %{
    "chunk" => %{
      "func" => 13,
      "head" => 12,
      "id" => 5,
      "link" => -1,
      "rel" => "D",
      "score" => 0.0
    },
    "morphs" => [
      %{
        "base" => "渡す",
        "conjugation" => "連用形",
        "conjugation_form" => "五段・サ行",
        "id" => 12,
        "pos" => "動詞",
        "pos1" => "自立",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "ワタシ",
        "surface" => "渡し",
        "yomi" => "ワタシ"
      },
      %{
        "base" => "た",
        "conjugation" => "基本形",
        "conjugation_form" => "特殊・タ",
        "id" => 13,
        "pos" => "助動詞",
        "pos1" => "",
        "pos2" => "",
        "pos3" => "",
        "pronunciation" => "タ",
        "surface" => "た",
        "yomi" => "タ"
      }
    ]
  }
]