/* cabocha.c 2013/10/17
   Copyright (c) SOFNEC Co., Ltd.
 */
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <cabocha.h>
#include "aze_api.h"

#define AZCABOCHA_VERSION   "0.6"

#define AZCABOCHA_NEW_MAX_ARG_NUM          30
#define AZCABOCHA_MAX_PARSE_STRING_LEN   4096

#define MAX_TOKEN_FEATURE_NUM    250
#define MAX_CHUNK_FEATURE_NUM    250


#define IS_EMPTY(s)     ((s) == NULL || (s)[0] == '\0')

#define ADD_STRING_ATOM_TO_LIST(s) \
  head = aze_make_list_add_item(Env, head, &car);\
  if (head == NULL) goto err;\
  a = aze_cstring_to_atom(Env, (s));\
  r = aze_unify_atom(Env, a, car);\
  if (r != 0) goto err;

#define ADD_INT_TO_LIST(i) \
  head = aze_make_list_add_item(Env, head, &car); \
  if (head == NULL) goto err; \
  r = aze_unify_int(Env, (int )(i), car);\
  if (r != 0)  goto err;

#define ADD_DOUBLE_TO_LIST(d) \
  head = aze_make_list_add_item(Env, head, &car); \
  if (head == NULL) goto err; \
  r = aze_unify_double(Env, (d), car);\
  if (r != 0)  goto err;



static BASEINT CABOCHA_VERSION_ATOM;

/* cabocha_new(-CABOCHA, +ARGS) */
static AZE_C_PRED P2_cabocha_new(AZE_ENV Env)
{
  char *buf[AZCABOCHA_NEW_MAX_ARG_NUM];
  int i, n;
  AZE_TERM arg_cabocha, arg_args;
  cabocha_t *cabo;

  AZE_PRED_BEGIN(Env);

  arg_cabocha = AZE_ARG(2, 0);
  arg_args    = AZE_ARG(2, 1);

  n = aze_atom_list_to_cstring_array(Env, arg_args
				     , buf, AZCABOCHA_NEW_MAX_ARG_NUM);

  cabo = cabocha_new(n, buf);

  for (i = 0; i < n; i++) {
    if (buf[i] != 0) free(buf[i]);
  }

  if (cabo == (cabocha_t* )NULL) AZE_PRED_FAIL;

  if (aze_unify_int(Env, (AZE_INT )cabo, arg_cabocha) != 0)
    AZE_PRED_FAIL;

  AZE_PRED_DET_SUCC;
}

/* cabocha_destroy(+CABOCHA) */
static AZE_C_PRED P1_cabocha_destroy(AZE_ENV Env)
{
  AZE_TERM arg_cabocha;
  cabocha_t *cabo;

  AZE_PRED_BEGIN(Env);

  arg_cabocha = AZE_ARG(1, 0);
  cabo = (cabocha_t* )aze_term_value_int(Env, arg_cabocha);
  if (cabo == (cabocha_t* )NULL) AZE_PRED_FAIL;

  cabocha_destroy(cabo);
  AZE_PRED_DET_SUCC;
}

#if 0
static AZE_C_PRED P1_cabocha_tree_destroy(AZE_ENV Env)
{
  AZE_TERM arg_cabocha_tree;
  cabocha_tree_t *tree;

  AZE_PRED_BEGIN(Env);

  arg_cabocha_tree = AZE_ARG(1, 0);
  tree = (cabocha_tree_t* )aze_term_value_int(Env, arg_cabocha_tree);
  if (tree == (cabocha_tree_t* )NULL) AZE_PRED_FAIL;

  cabocha_tree_destroy(tree);
  AZE_PRED_DET_SUCC;
}
#endif

/* cabocha_strerror(+CABOCHA, -ERROR) */
static AZE_C_PRED P2_cabocha_strerror(AZE_ENV Env)
{
  AZE_TERM arg_cabocha, arg_str;
  cabocha_t *cabo;
  const char *err;

  AZE_PRED_BEGIN(Env);

  arg_cabocha = AZE_ARG(2, 0);
  arg_str     = AZE_ARG(2, 1);

  cabo = (cabocha_t* )aze_term_value_int(Env, arg_cabocha);
  err = cabocha_strerror(cabo);
  if (err == NULL) AZE_PRED_FAIL;

  if (aze_unify_atom(Env, aze_cstring_to_atom(Env, err), arg_str) != 0)
    AZE_PRED_FAIL;

  AZE_PRED_DET_SUCC;
}

/* cabocha_sparse_tostr(+CABOCHA, +SENTENCE, -RESULT) */
static AZE_C_PRED P3_cabocha_sparse_tostr(AZE_ENV Env)
{
  AZE_TERM arg_cabocha, arg_source, arg_result, result_term;
  cabocha_t *cabo;
  const char *result;
  int len;
  char* source;

  AZE_PRED_BEGIN(Env);

  arg_cabocha = AZE_ARG(3, 0);
  arg_source  = AZE_ARG(3, 1);
  arg_result  = AZE_ARG(3, 2);

  cabo = (cabocha_t* )aze_term_value_int(Env, arg_cabocha);

  len = az_term_to_cstring_length(Env, arg_source);
  if (len <= 0) AZE_PRED_FAIL;
  source = (char* )malloc((size_t )(len + 1));
  if (source == 0) AZE_PRED_FAIL;
  az_term_to_cstring(Env, arg_source, source, len + 1);
  result = cabocha_sparse_tostr2(cabo, source, (unsigned int )len);
  free(source);
  if (result == NULL) AZE_PRED_FAIL;

  result_term = aze_cstring_to_list(Env, result);
  if (result_term == AZE_NULL_TERM
      || aze_unify_term(Env, result_term, arg_result) != 0)
    AZE_PRED_FAIL;

  AZE_PRED_DET_SUCC;
}

/* cabocha_sparse_totree(+CABOCHA, +SENTENCE, -TREE) */
static AZE_C_PRED P3_cabocha_sparse_totree(AZE_ENV Env)
{
  AZE_TERM arg_cabocha, arg_source, arg_result;
  cabocha_t *cabo;
  const cabocha_tree_t *tree;
  char *source;
  int len;

  AZE_PRED_BEGIN(Env);

  arg_cabocha = AZE_ARG(3, 0);
  arg_source  = AZE_ARG(3, 1);
  arg_result  = AZE_ARG(3, 2);

  cabo = (cabocha_t* )aze_term_value_int(Env, arg_cabocha);
  len = az_term_to_cstring_length(Env, arg_source);
  if (len <= 0) AZE_PRED_FAIL;
  source = (char* )malloc((size_t )(len + 1));
  if (source == 0) AZE_PRED_FAIL;
  az_term_to_cstring(Env, arg_source, source, len + 1);
  tree = cabocha_sparse_totree2(cabo, source, (size_t )len);
  free(source);
  if (tree == NULL) AZE_PRED_FAIL;
  if (aze_unify_int(Env, (AZE_INT )tree, arg_result) != 0)
    AZE_PRED_FAIL;

  AZE_PRED_DET_SUCC;
}

/* cabocha_tree_size(+TREE, -SIZE) */
static AZE_C_PRED P2_cabocha_tree_size(AZE_ENV Env)
{
  AZE_TERM arg_tree, arg_size;
  cabocha_tree_t *tree;
  int size;

  AZE_PRED_BEGIN(Env);

  arg_tree = AZE_ARG(2, 0);
  arg_size = AZE_ARG(2, 1);

  tree = (cabocha_tree_t* )aze_term_value_int(Env, arg_tree);
  size = (int )cabocha_tree_size(tree);
  if (aze_unify_int(Env, size, arg_size) != 0)
    AZE_PRED_FAIL;

  AZE_PRED_DET_SUCC;
}

static int make_feature_list_from_token(AZE_ENV Env, const cabocha_token_t *tok
				    , AZE_TERM unify_term)
{
  AZE_ATOM fa[MAX_TOKEN_FEATURE_NUM];
  int i, r, fn;

  fn = 0;
  if (tok->feature_list != (const char** )NULL) {
    for (i = 0; i < MAX_TOKEN_FEATURE_NUM && i < (int )tok->feature_list_size; i++) {
      AZE_ATOM a;
      const char* s = tok->feature_list[i];
      if (IS_EMPTY(s)) continue;

      a = aze_cstring_to_atom(Env, s);
      fa[fn] = a;
      fn++;
    }
  }

  r = aze_make_prolog_list_from_atom_array(Env, unify_term, fa, fn);
  if (r != 0) return r;

  return 0;
}

static int make_list_from_token(AZE_ENV Env, const cabocha_token_t *tok
				, AZE_TERM unify_term)
{
  AZE_TERM head, car;
  AZE_ATOM a;
  int r;
  const char* s;

  head = aze_make_list_init(Env, unify_term);

  ADD_STRING_ATOM_TO_LIST(tok->surface);
  ADD_STRING_ATOM_TO_LIST(tok->normalized_surface);

  /* don't use because feature contents is same as feature_list. */
#if 0
  s = IS_EMPTY(tok->feature) ? "*" : tok->feature;
  ADD_STRING_ATOM_TO_LIST(s);
#endif

  head = aze_make_list_add_item(Env, head, &car);
  if (head == NULL) goto err;
  r = make_feature_list_from_token(Env, tok, car);
  if (r != 0) goto err;

  s = IS_EMPTY(tok->ne) ? "*" : tok->ne;
  ADD_STRING_ATOM_TO_LIST(s);

  s = IS_EMPTY(tok->additional_info) ? "*" : tok->additional_info;
  ADD_STRING_ATOM_TO_LIST(s);

  aze_make_list_end(Env, head);
  return 0;

 err:
  aze_make_list_error_end(Env);
  return -1;
}

/* cabocha_tree_token(+TREE, +I, -TOKEN) */
static AZE_C_PRED P3_cabocha_tree_token(AZE_ENV Env)
{
  AZE_TERM arg_tree, arg_id, arg_token;
  cabocha_tree_t *tree;
  const cabocha_token_t *token;
  int r, id;

  AZE_PRED_BEGIN(Env);

  arg_tree  = AZE_ARG(3, 0);
  arg_id    = AZE_ARG(3, 1);
  arg_token = AZE_ARG(3, 2);

  tree = (cabocha_tree_t* )aze_term_value_int(Env, arg_tree);
  id   = (int )aze_term_value_int(Env, arg_id);
  token = cabocha_tree_token(tree, id);
  if (token == (cabocha_token_t* )NULL)
    AZE_PRED_FAIL;

  r = make_list_from_token(Env, token, arg_token);
  if (r != 0)  AZE_PRED_FAIL;

  AZE_PRED_DET_SUCC;
}

static int make_chunk_feature_list(AZE_ENV Env, cabocha_chunk_t *chunk
				   , AZE_TERM unify_term)
{
  AZE_ATOM fa[MAX_TOKEN_FEATURE_NUM];
  int i, r, fn;

  fn = 0;
  if (chunk->feature_list != (const char** )NULL) {
    for (i = 0; i < MAX_CHUNK_FEATURE_NUM && i < chunk->feature_list_size; i++) {
      AZE_ATOM a;
      const char* s = chunk->feature_list[i];
      if (IS_EMPTY(s)) continue;

      a = aze_cstring_to_atom(Env, s);
      fa[fn] = a;
      fn++;
    }
  }

  r = aze_make_prolog_list_from_atom_array(Env, unify_term, fa, fn);
  if (r != 0) return r;

  return 0;
}

static int make_chunk_from_tree(AZE_ENV Env, cabocha_tree_t *tree
				, int start, int end, AZE_TERM unify_term)
{
  AZE_TERM head, car;
  AZE_ATOM a;
  const cabocha_token_t *token;
  cabocha_chunk_t *chunk;
  int i, r;
  const char* s;

  token = cabocha_tree_token(tree, start);
  if (token == (cabocha_token_t* )NULL) return -2;

  chunk = token->chunk;
  if (chunk == (cabocha_chunk_t* )NULL) return -3;

  head = aze_make_list_init(Env, unify_term);

  /* 1: chunk property part */
  ADD_INT_TO_LIST(chunk->link);
  ADD_INT_TO_LIST(chunk->head_pos);
  ADD_INT_TO_LIST(chunk->func_pos);
  ADD_INT_TO_LIST(chunk->token_size);
  ADD_INT_TO_LIST(chunk->token_pos);
  ADD_DOUBLE_TO_LIST(chunk->score);

  head = aze_make_list_add_item(Env, head, &car);
  if (head == NULL) goto err;
  r = make_chunk_feature_list(Env, chunk, car);
  if (r != 0)  goto err;

  s = IS_EMPTY(chunk->additional_info) ? "*" : chunk->additional_info;
  ADD_STRING_ATOM_TO_LIST(s);


  /* 2: morph list part*/
  for (i = start; i <= end; i++) {
    token = cabocha_tree_token(tree, i);
    head = aze_make_list_add_item(Env, head, &car);
    if (head == NULL) goto err;
    r = make_list_from_token(Env, token, car);
    if (r != 0)  goto err;
  }

  aze_make_list_end(Env, head);
  return 0;

 err:
  aze_make_list_error_end(Env);
  return -1;
}

/* cabocha_tree_get_chunk_list(+TREE, -LIST) */
static AZE_C_PRED P2_cabocha_tree_get_chunk_list(AZE_ENV Env)
{
  AZE_TERM arg_tree, arg_list;
  AZE_TERM head, car;
  cabocha_tree_t *tree;
  const cabocha_token_t *token;
  int i, r, morph_num, start;

  AZE_PRED_BEGIN(Env);

  arg_tree = AZE_ARG(2, 0);
  arg_list = AZE_ARG(2, 1);

  tree = (cabocha_tree_t* )aze_term_value_int(Env, arg_tree);
  morph_num = (int )cabocha_tree_size(tree);
  if (morph_num <= 0) AZE_PRED_FAIL;

  head = aze_make_list_init(Env, arg_list);

  start = -1;
  for (i = 0; i < morph_num; i++) {
    token = cabocha_tree_token(tree, i);
    if (token == (cabocha_token_t* )NULL) goto err;

    /* Only chunk's head token have chunk member value.
       (See CaboCha source cabocha-X.XX/example/example.c file.)
     */
    if (token->chunk != (cabocha_chunk_t* )NULL) {
      if (start >= 0) {
        head = aze_make_list_add_item(Env, head, &car);
        if (head == NULL) goto err;
        r = make_chunk_from_tree(Env, tree, start, i - 1, car);
        if (r != 0) goto err;
      }

      start = i;
    }
  }

  head = aze_make_list_add_item(Env, head, &car);
  if (head == NULL) goto err;
  r = make_chunk_from_tree(Env, tree, start, morph_num - 1, car);
  if (r != 0) goto err;

  aze_make_list_end(Env, head);
  AZE_PRED_DET_SUCC;

 err:
  aze_make_list_error_end(Env);
  AZE_PRED_FAIL;
}

/* ?-cabocha_version(-VERSION). */
extern pred
P1_cabocha_version(Frame *Env)
{
  AZE_PRED_BEGIN(Env);

  if (aze_unify_atom(Env, CABOCHA_VERSION_ATOM, AZE_ARG(1, 0)) != 0)
    AZE_PRED_FAIL;

  AZE_PRED_DET_SUCC;
}

#ifdef WIN32
__declspec(dllexport) int initiate_cabocha(Frame *Env)
#else
extern int initiate_cabocha(Frame *Env)
#endif
{
#define DEFP(name,arity,f)   aze_define_pred(Env, name, arity, f)

  char buf[256];

  DEFP("cabocha_strerror",            2, P2_cabocha_strerror);
  DEFP("cabocha_new",                 2, P2_cabocha_new);
  DEFP("cabocha_destroy",             1, P1_cabocha_destroy);
  DEFP("cabocha_sparse_tostr",        3, P3_cabocha_sparse_tostr);
  DEFP("cabocha_sparse_totree",       3, P3_cabocha_sparse_totree);
  DEFP("cabocha_tree_size",           2, P2_cabocha_tree_size);
  DEFP("cabocha_tree_token",          3, P3_cabocha_tree_token);
#if 0
  DEFP("cabocha_tree_destroy",        1, P1_cabocha_tree_destroy);
#endif
  DEFP("cabocha_tree_get_chunk_list", 2, P2_cabocha_tree_get_chunk_list);
  DEFP("cabocha_version",             1, P1_cabocha_version);

  sprintf(buf, "cabocha-ext: %s", AZCABOCHA_VERSION);
  CABOCHA_VERSION_ATOM = PutSystemAtom(Env, buf);

  return 1;
}
