/* oniguruma.c  2013/11/15 SOFNEC */
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <stdio.h>
#include <ctype.h>
#ifdef __APPLE__
#include <malloc/malloc.h>
#else
#include <malloc.h>
#endif

#include <azprolog.h>
#include <oniguruma.h>

#define AZONIGURUMA_VERSION   "0.6"

static OnigEncoding AZOnigEncoding;

#define ATOM_MAX_SIZE        AZ_MAX_ATOM_LENGTH

#define ALL_BEGIN(fp) \
  if (HP_GE(next_var_cell, var_bottom))	\
    LocalOverflow();			\
  fp->Local = next_var_cell;	\
  fp->Global = next_gvar_cell;	\
  fp->Trail = trail_register;	\
  fp->nVars = 0


#define PARG(n,i)   (next_var_cell - n + i)
#define PREALVALUE(item,n,i)		\
	item = next_var_cell-n+i;	\
	while (TAG(item) == link_tag)	\
		(item) = BODY(item)

#define GET_NEW_CELL(Env, t)    MakeUndef(Env); (t) = next_var_cell - 1

#define GET_ENCODING(term)  ((OnigEncoding )GetInt(term))

#define PROLOG_LINESEP        31

#define CHECK_NULL_RETURN(p) if (p == NULL) return NULL;
#define CHECK_NULL_FAIL(p)   if (p == NULL) YIELD(FAIL)

static BASEINT ATOM_END;
static BASEINT ATOM_NON;

static BASEINT ONIGURUMA_VERSION_ATOM;

static UChar* EMPTY_STRING = (UChar* )"";


static void* Malloc(int size)
{
  return malloc(size);
}

static void Free(void* p)
{
  if (p != 0) free(p);
}

static int prolog_make_sublist_from_string(Frame *Env, TERM* t,
                   unsigned char* str, int from, int end)
{
  int i, len;
  unsigned char* s;
  TERM *tcar, *tcdr, *head;
  TERM *tsave;

  tsave = next_var_cell;

  if (from == ONIG_REGION_NOTPOS) { /* not match region */
    if (! UnifyAtom(t, ATOM_NON)) return -1;
    return 0;
  }

  len = end - from;
  s = str + from;
  head = t;
  i = 0;
  while (i < len) {
    MakeUndef(Env); tcar = next_var_cell - 1;
    if (! UnifyInt(tcar, (BASEINT )(*s)))
      goto fail;

    MakeUndef(Env); tcdr = next_var_cell - 1;
    if (! UnifyCons(Env, head, tcar, tcdr))
      goto fail;

    head = tcdr;
    i++;
    s++;
  }
  if (UnifyAtom(head, ATOM_NIL) == 0) {
  fail:
    next_var_cell = tsave;
    return -1;
  }

  next_var_cell = tsave;
  return 0;
}

static int conv_linesep(UChar* s, int len)
{
  int clen;
  UChar prev = '\0';
  UChar *to = s;
  UChar *p = s;
  UChar *end = s + len;

  (void)end;

  while (len > 0) {
    clen = onig_enc_len(AZOnigEncoding, p, end);
    if (clen > 1) {
      int i;
      UChar *q = p;
      for (i = 0; i < clen; i++)
	*to++ = *q++;
    }
    else if (*p == '\n' && prev == '\r') {
      *(to - 1) = '\n';
    }
    else if (*p == PROLOG_LINESEP) {
      *to++ = '\n';
    }
    else {
      *to++ = *p;
    }

    prev = *p;
    p   += clen;
    len -= clen;
  }

  *to = '\0';
  return to - s;
}

static int term2str(Frame* Env, TERM* arg, int conv_crnl, int* rlen, UChar** rs)
{
  UChar *s;
  int len;

  len = az_term_to_cstring_length(Env, arg);
  if (len <= 0) {
    s = (UChar* )NULL;
    len = 0;
  }
  else {
    s = (UChar* )malloc(len + 1);
    if (s == 0) return -1;
    az_term_to_cstring(Env, arg, (char* )s, len + 1);

    if (conv_crnl != 0 && len > 0)
      len = conv_linesep(s, len);
  }

  *rlen = len;
  *rs   = s;
  return 0;
}

typedef struct {
  unsigned char* s;
  int len;
} OSTR;

pred P2_onig_make_target_string(Frame *Env)
{
  int r, slen;
  unsigned char* s;
  TERM *ts, *tr;
  OSTR* ostr;

  ts = PARG(2, 0);
  tr = PARG(2, 1);

  r = term2str(Env, ts, 1, &slen, &s);
  if (r != 0) YIELD(FAIL);
  ostr = (OSTR* )Malloc(sizeof(OSTR));
  CHECK_NULL_FAIL(ostr);

  ostr->s   = s;
  ostr->len = slen;
  if (! UnifyInt(tr, (BASEINT )ostr))
    YIELD(FAIL);

  YIELD(DET_SUCC);
}

pred P1_onig_free_target_string(Frame *Env)
{
  TERM *t;
  OSTR* ostr;

  t = PARG(1, 0);

  ostr = (OSTR* )GetInt(t);
  if (ostr->s != EMPTY_STRING)
    Free(ostr->s);
  Free(ostr);
  YIELD(DET_SUCC);
}

pred P4_onig_target_string_part(Frame *Env)
{
  int start, end;
  unsigned char *s, *es;
  TERM *tstart, *tend, *tlist, *tostr;
  TERM *tcar, *tcdr, *head, *tsave;
  OSTR* ostr;

  tostr  = PARG(4, 0);
  tstart = PARG(4, 1);
  tend   = PARG(4, 2);
  tlist  = PARG(4, 3);

  ostr = (OSTR* )GetInt(tostr);
  start = (int )GetInt(tstart);
  if (IsUndef(tend))
    end = ostr->len;
  else {
    end = (int )GetInt(tend);
    if (end > ostr->len) end = ostr->len;
  }

  tsave = next_var_cell;
  es = ostr->s + end;
  s  = ostr->s + start;
  head = tlist;
  while (s < es) {
    MakeUndef(Env); tcar = next_var_cell - 1;
    if (! UnifyInt(tcar, (BASEINT )(*s)))
      goto fail;

    MakeUndef(Env); tcdr = next_var_cell - 1;
    if (! UnifyCons(Env, head, tcar, tcdr))
      goto fail;

    *head = *tcdr;
    next_var_cell = tsave;
    s++;
  }
  if (UnifyAtom(head, ATOM_NIL) == 0) {
  fail:
    next_var_cell = tsave;
    YIELD(FAIL);
  }

  next_var_cell = tsave;
  YIELD(DET_SUCC);
}

pred P3_onig_target_string_next_pos(Frame *Env)
{
  int len, nowp;
  OnigEncoding enc;
  TERM *tnow, *tnext, *tostr;
  OSTR* ostr;

  ALL_BEGIN(Env);

  tostr = PARG(3, 0);
  tnow  = PARG(3, 1);
  tnext = PARG(3, 2);
  enc = AZOnigEncoding;

  ostr = (OSTR* )GetInt(tostr);
  nowp  = (int )GetInt(tnow);
  if (nowp >= ostr->len) YIELD(FAIL);
  len = onig_enc_len(enc, ostr->s + nowp, ostr->s + ostr->len);
  if (! UnifyInt(tnext, nowp + len)) YIELD(FAIL);

  YIELD(DET_SUCC);
}

pred P3_onig_target_string_length(Frame *Env)
{
  TERM *tostr, *tfrom, *tlen;
  int from, len;
  OnigEncoding enc;
  OSTR* ostr;

  ALL_BEGIN(Env);

  tostr = PARG(3, 0);
  tfrom = PARG(3, 1);
  tlen  = PARG(3, 2);
  enc = AZOnigEncoding;

  ostr = (OSTR* )GetInt(tostr);
  from = (IsUndef(tfrom) ? 0 : (int )GetInt(tfrom));

  if (from >= ostr->len) {
    len = 0;
  }
  else {
    len = onigenc_strlen(enc, ostr->s + from, ostr->s + ostr->len);
  }

  if (! UnifyInt(tlen, (BASEINT )len)) YIELD(FAIL);
  YIELD(DET_SUCC);
}


pred P2_onig_pattern_error_info(Frame *Env)
{
  int r, patlen;
  UChar *pat;
  regex_t* reg;
  OnigErrorInfo einfo;
  TERM *tpat, *terr;

  ALL_BEGIN(Env);

  tpat = PARG(2, 0);
  terr = PARG(2, 1);

  r = term2str(Env, tpat, 0, &patlen, &pat);
  if (r != 0) YIELD(FAIL);

  r = onig_new(&reg, pat, pat + patlen, ONIG_OPTION_NONE, AZOnigEncoding,
               ONIG_SYNTAX_RUBY, &einfo);
  if (r != ONIG_NORMAL) {
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r, &einfo);
    UnifyAtom(terr, Asciz2Atom(Env, (char* )s)); /* ignore unify fail */
    Free(pat);
    YIELD(DET_SUCC);
  }
  else {
    Free(pat);
    YIELD(FAIL);
  }
}

static int reg_new(Frame *Env, TERM *tpat, TERM *treg, OnigOptionType option)
{
  int r, patlen;
  UChar *pat;
  regex_t* reg;
  OnigErrorInfo einfo;

  r = term2str(Env, tpat, 0, &patlen, &pat);
  if (r != 0) return 1;

  r = onig_new(&reg, pat, pat + patlen, option, AZOnigEncoding,
               ONIG_SYNTAX_RUBY, &einfo);
  if (r != ONIG_NORMAL) {
    Free(pat);
    return 2;
  }

  Free(pat);
  if (! UnifyInt(treg, (BASEINT )((void* )reg))) return 3;

  return 0;
}

pred P2_onig_pattern_compile(Frame *Env)
{
  int r;
  TERM *tpat, *treg;

  ALL_BEGIN(Env);

  tpat = PARG(2, 0);
  treg = PARG(2, 1);

  r = reg_new(Env, tpat, treg, ONIG_OPTION_NONE);
  if (r != 0) YIELD(FAIL);

  YIELD(DET_SUCC);
}

pred P3_onig_pattern_compile(Frame *Env)
{
  int r;
  TERM *tpat, *treg, *toption;
  OnigOptionType option;

  ALL_BEGIN(Env);

  tpat    = PARG(3, 0);
  treg    = PARG(3, 1);
  toption = PARG(3, 2);

  option = (OnigOptionType )GetInt(toption);

  r = reg_new(Env, tpat, treg, option);
  if (r != 0) YIELD(FAIL);

  YIELD(DET_SUCC);
}

pred P1_onig_pattern_free(Frame *Env)
{
  regex_t* reg;
  TERM *treg;

  ALL_BEGIN(Env);

  treg = PARG(1, 0);
  reg = (regex_t* )GetInt(treg);

  onig_free(reg);
  YIELD(DET_SUCC);
}

static int search(Frame *Env, TERM *treg, TERM *tstr,
                  TERM *tstart, TERM *trange,
                  TERM *tmstart, TERM *tmend, TERM *tregion)
{
  int i, r;
  UChar *str, *end, *start, *range;
  regex_t* reg;
  OnigRegion* region;
  OSTR* ostr;

  reg = (regex_t* )GetInt(treg);
  ostr = (OSTR* )GetInt(tstr);
  str   = ostr->s;
  end   = str + ostr->len;

  start = str + (IsUndef(tstart) ? 0 : (int )GetInt(tstart));
  if (start > end) return ONIG_MISMATCH;

  if (IsUndef(trange))
    range = end;
  else
    range = str + (int )GetInt(trange);

  region = onig_region_new();
  r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
  if (r >= 0) {
    if (IsUndef(tregion) || IsCons(tregion)) {
      TERM *tcar, *tcdr, *head;
      TERM *tsave;

      tsave = next_var_cell;
      head = tregion;
      for (i = 1; i < region->num_regs; i++) {
        MakeUndef(Env); tcar = next_var_cell - 1;
        r = prolog_make_sublist_from_string(Env, tcar, str,
                                            region->beg[i], region->end[i]);
        if (r != 0) {
          next_var_cell = tsave;
          goto fail;
        }

        MakeUndef(Env); tcdr = next_var_cell - 1;
        if (! UnifyCons(Env, head, tcar, tcdr)) {
          next_var_cell = tsave;
          goto fail;
        }
    
        *head = *tcdr;
        next_var_cell = tsave;
      }
      if (UnifyAtom(head, ATOM_NIL) == 0) {
        next_var_cell = tsave;
        goto fail;
      }
      next_var_cell = tsave;
    }

    if (! UnifyInt(tmstart, (BASEINT )region->beg[0])) goto fail;
    if (! UnifyInt(tmend,   (BASEINT )region->end[0])) goto fail;
    onig_region_free(region, 1);
    return 0;
  }
  else if (r == ONIG_MISMATCH) {
  fail:
    onig_region_free(region, 1);
    return r;
  }
  else { /* error */
#if 0
    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
    onig_error_code_to_str(s, r);
    fprintf(stderr, "ERROR: %s\n", s);
#endif
    goto fail;
  }
}


pred P7_onig_search(Frame *Env)
{
  int r;
  TERM *treg, *tstr, *tstart, *trange, *tregion;
  TERM *tmstart, *tmend;

  ALL_BEGIN(Env);

  treg    = PARG(7, 0);
  tstr    = PARG(7, 1);
  tstart  = PARG(7, 2);
  trange  = PARG(7, 3);
  tmstart = PARG(7, 4);
  tmend   = PARG(7, 5);
  tregion = PARG(7, 6);

  r = search(Env, treg, tstr, tstart, trange, tmstart, tmend, tregion);
  if (r == 0) {
    YIELD(DET_SUCC);
  }
  else {
    YIELD(FAIL);
  }
}

pred P8_onig_search_success(Frame *Env)
{
  int r;
  TERM *treg, *tstr, *tstart, *trange, *tregion;
  TERM *tmstart, *tmend, *tnext;
  OSTR* ostr;

  ALL_BEGIN(Env);

  treg    = PARG(8, 0);
  tstr    = PARG(8, 1);
  tstart  = PARG(8, 2);
  trange  = PARG(8, 3);
  tmstart = PARG(8, 4);
  tmend   = PARG(8, 5);
  tregion = PARG(8, 6);
  tnext   = PARG(8, 7);

  ostr = (OSTR* )GetInt(tstr);

  r = search(Env, treg, tstr, tstart, trange, tmstart, tmend, tregion);

  if (r == 0) {
    OnigEncoding enc = AZOnigEncoding;
    int last = (int )GetInt(tmend);

    if (last >= ostr->len) {
      UnifyAtom(tnext, ATOM_END);
    }
    else {
      int start = (int )GetInt(tmstart);

      if (last == start) {
        int len = onig_enc_len(enc, ostr->s + last, ostr->s + ostr->len);
        UnifyInt(tnext, (BASEINT )(last + len));
      }
      else {
        UnifyInt(tnext, (BASEINT )last);
      }
    }
  }
  else {
    UnifyAtom(tregion, ATOM_NIL);
    UnifyAtom(tnext,    ATOM_END);

    if (IsUndef(tmstart))
      UnifyAtom(tmstart,  ATOM_NON);
    if (IsUndef(tmend))
      UnifyAtom(tmend,    ATOM_NON);
  }

  YIELD(DET_SUCC);
}

/* ?-onig_version(-VERSION). */
extern pred
P1_onig_version(Frame *Env)
{
  ALL_BEGIN(Env);

  if (UnifyAtom(PARG(1,0), ONIGURUMA_VERSION_ATOM) == 0)
    YIELD(FAIL);

	YIELD(DET_SUCC);
}
#ifdef WIN32
__declspec(dllexport) int initiate_oniguruma(Frame *Env)
#else
extern int initiate_oniguruma(Frame *Env)
#endif
{
  int  enc;
  char buf[256];

  ATOM_END = PutSystemAtom(Env, "end");
  ATOM_NON = PutSystemAtom(Env, "non");

#ifdef AZONIGURUMA_ENC_EUC_JP
  AZOnigEncoding = ONIG_ENCODING_EUC_JP;
#else
#ifdef AZONIGURUMA_ENC_SJIS
  AZOnigEncoding = ONIG_ENCODING_SJIS;
#else
  AZOnigEncoding = ONIG_ENCODING_UTF8;
#endif
#endif

  enc = az_charsetmode();
  switch (enc) {
  case AZ_ENC_SJIS:
    AZOnigEncoding = ONIG_ENCODING_SJIS;
    break;
  case AZ_ENC_EUCJP:
    AZOnigEncoding = ONIG_ENCODING_EUC_JP;
    break;
  case AZ_ENC_UTF8:
    AZOnigEncoding = ONIG_ENCODING_UTF8;
    break;  
  default:
    break;
  }

  /* put_bltn("onig_set_encoding",         1, P1_onig_set_encoding); */
  put_bltn("onig_pattern_error_info",      2, P2_onig_pattern_error_info);
  put_bltn("onig_pattern_compile",         2, P2_onig_pattern_compile);
  put_bltn("onig_pattern_compile",         3, P3_onig_pattern_compile);
  put_bltn("onig_pattern_free",            1, P1_onig_pattern_free);

  put_bltn("onig_make_target_string",      2, P2_onig_make_target_string);
  put_bltn("onig_free_target_string",      1, P1_onig_free_target_string);
  put_bltn("onig_target_string_part",      4, P4_onig_target_string_part);
  put_bltn("onig_target_string_next_pos",  3, P3_onig_target_string_next_pos);
  put_bltn("onig_target_string_length",    3, P3_onig_target_string_length);

  put_bltn("onig_search",                  7, P7_onig_search);
  put_bltn("onig_search_success",          8, P8_onig_search_success);
  put_bltn("onig_version",                 1, P1_onig_version);

  sprintf(buf, "oniguruma-ext: %s, oniguruma: %d.%d.%d", AZONIGURUMA_VERSION,
          ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
  ONIGURUMA_VERSION_ATOM = PutSystemAtom(Env, buf);

  return 1;
}
