/*-
 * Copyright (c) 2015 Taylor R. Campbell
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

%{
#include <assert.h>
#include <err.h>
#include <inttypes.h>
#include <math.h>
#include <stdlib.h>

#include "syntax.h"

/* Must come after syntax.h, thanks to yecc/lax bletcherosity.  */
#include "parse.h"

const char *yyfilename;
int yylineno;
int yycolumn;

#define	YY_USER_ACTION	yycolumn += yyleng;

/* XXX Does not belong here.  */
static enum comment_style comment_style = CMTSTY_CPP;

static const char *const comment_styles[] = {
	[CMTSTY_CPP] = "c++",
	[CMTSTY_SH] = "sh",
};

static void
scan_cmtsty(enum comment_style style)
{

	if (style != comment_style)
		yyerrorf("comment style is set to %s not %s",
		    comment_styles[comment_style], comment_styles[style]);
}

static struct ast_srcloc
yysrcloc(void)
{

	return (struct ast_srcloc){
		.filename = yyfilename,
		.lineno = yylineno,
		.column = yycolumn - yyleng,
	};
}

static int
lex_token(int t)
{

	yylval.srcloc = yysrcloc();
        return t;
}

static int
lex_keyword(int k)
{

	yylval.srcloc = yysrcloc();
        return k;
}

static int
lex_id(void)
{

	yylval.id.loc = yysrcloc();
	yylval.id.id = string_edupz(yytext, yyleng);
	assert(string_ptr(yylval.id.id)[yyleng] == '\0');
	return L_ID;
}

static int
lex_integer(uintmax_t (*scan)(const char *, size_t), const char *s, size_t n)
{

	yylval.integer = (*scan)(s, n);
	return L_INTEGER;
}

static uintmax_t
scan_oct(const char *s, size_t n)
{
	uintmax_t x = 0;
	size_t i;

	assert(s[n] == '\0');
	for (i = 0; i < n; i++) {
		assert('0' <= s[i]);
		assert(s[i] <= '7');
		if (x > (UINTMAX_MAX >> 3))
			yyerrorf("truncating integer constant: %s", s);
		x <<= 3;
		x |= (s[i] - '0');
	}

	return x;
}

static uintmax_t
scan_dec(const char *s, size_t n)
{
	uintmax_t x = 0;
	size_t i;

	assert(s[n] == '\0');
	for (i = 0; i < n; i++) {
		assert('0' <= s[i]);
		assert(s[i] <= '9');
		if (x > (UINTMAX_MAX / 10))
			yyerrorf("truncating integer constant: %s", s);
		x *= 10;
		x += (s[i] - '0');
	}

	return x;
}

static uintmax_t
scan_hex(const char *s, size_t n)
{
	uintmax_t x = 0;
	size_t i;

	assert(s[n] == '\0');
	for (i = 0; i < n; i++) {
		assert(('0' <= s[i] && s[i] <= '9') ||
		    ('a' <= s[i] && s[i] <= 'f') ||
		    ('A' <= s[i] && s[i] <= 'F'));
		if (x > (UINTMAX_MAX >> 4))
			yyerrorf("truncating integer constant"
			    ": %s (%"PRIxMAX")",
			    s, x);
		x <<= 4;
		if ('0' <= s[i] && s[i] <= '9')
			x |= (s[i] - '0');
		else if ('a' <= s[i] && s[i] <= 'f')
			x |= (0xa + (s[i] - 'a'));
		else if ('A' <= s[i] && s[i] <= 'F')
			x |= (0xa + (s[i] - 'A'));
	}

	return x;
}

/* XXX Remember string bounds.  */
static char current_string_delimiter = '\0';
static struct string current_string = STRING_NULL;

static void
scan_string_begin(char delimiter)
{

	assert(current_string_delimiter == '\0');
	assert(delimiter != '\0');
	current_string_delimiter = delimiter;
	current_string = string_emalloc(0);
}

static void
scan_string_text(const char *text, size_t n)
{
	size_t i = string_len(current_string);
	char *p;

	assert(current_string_delimiter != '\0');
	if (n > (SIZE_MAX - i - 1)) {
		yyerrorf("string too large: %zu + %zu bytes", i, n);
		return;
	}
	current_string = string_erealloc(current_string, (i + n));
	p = &string_ptr(current_string)[i];
	assert(p[0] == '\0');
	(void)memcpy(p, text, n);
	p += n;
	*p++ = '\0';
	assert(string_ptr(current_string) < p);
	assert((i + n + 1) == (size_t)(p - string_ptr(current_string)));
}

static void
scan_string_char(char ch)
{

	scan_string_text(&ch, 1);
}

static void
scan_string_oct(const char *s, size_t n)
{

	/* Assertions guarantee non-truncation.  */
	assert(1 <= n);
	assert(n <= 3);
	scan_string_char(scan_oct(s, n));
}

static void
scan_string_hex(const char *s, size_t n)
{

	/* Assertions guarantee non-truncation.  */
	assert(1 <= n);
	assert(n <= 2);
	scan_string_char(scan_hex(s, n));
}

static void
scan_unicode_hex(const char *s attr_unused, size_t n attr_unused)
{

	yyerror("XXX: Unicode code points not yet implemented");
}

static bool
scan_string_delimiter(char ch, struct string *ret)
{

	assert(current_string_delimiter != '\0');

	if (ch != current_string_delimiter) {
		scan_string_char(ch);
		return false;
	}

	*ret = current_string;
	current_string = string_null;
	current_string_delimiter = '\0';
	return true;
}

%}

%option noyywrap nounput noinput

ID		[a-zA-Z_][a-zA-Z0-9_]*

OCT		[0-7]
DEC		[0-9]
HEX		[0-9a-fA-F]

REAL_EXP	[eE][-+]?{DEC}+|[fF]

%x BLKCMT
%x STRING
%x STRESC

%%

\n			yylineno++; yycolumn = 0;
[ \t]			/* eat whitespace */

#.*$			scan_cmtsty(CMTSTY_SH);
"//".*$			scan_cmtsty(CMTSTY_CPP);
"/*"			scan_cmtsty(CMTSTY_CPP); BEGIN BLKCMT;

<BLKCMT>"/*"		yyerror("nested block comments are not allowed");
<BLKCMT>"*/"		BEGIN INITIAL;
<BLKCMT>\n		yylineno++; yycolumn = 0;
<BLKCMT>.		/* eat text */

<BLKCMT><<EOF>>		yyerror("end of file in block comment"); yyterminate();

"default"		return lex_keyword(K_DEFAULT);
"enum"			return lex_keyword(K_ENUM);
"extend"		return lex_keyword(K_EXTEND);
"extensions"		return lex_keyword(K_EXTENSIONS);
"false"			return lex_keyword(K_FALSE);
"group"			return lex_keyword(K_GROUP);
"import"		return lex_keyword(K_IMPORT);
"max"			return lex_keyword(K_MAX);
"message"		return lex_keyword(K_MESSAGE);
"option"		return lex_keyword(K_OPTION);
"optional"		return lex_keyword(K_OPTIONAL);
"package"		return lex_keyword(K_PACKAGE);
"public"		return lex_keyword(K_PUBLIC);
"repeated"		return lex_keyword(K_REPEATED);
"required"		return lex_keyword(K_REQUIRED);
"returns"		return lex_keyword(K_RETURNS);
"rpc"			return lex_keyword(K_RPC);
"service"		return lex_keyword(K_SERVICE);
"syntax"		return lex_keyword(K_SYNTAX);
"to"			return lex_keyword(K_TO);
"true"			return lex_keyword(K_TRUE);
"weak"			return lex_keyword(K_WEAK);

{DEC}+(\.{DEC}*)?{REAL_EXP}|{DEC}*\.{DEC}+{REAL_EXP}?|"inf"|"nan" {
				assert(yytext[yyleng] == '\0');
				errno = 0;
				yylval.real = strtod(yytext, NULL);
				assert((yylval.real != HUGE_VAL) ||
				    (errno != ERANGE));
				return L_REAL;
			}

{ID}"."[0-9]		yyerror("id and number must be separated by space");
{ID}			return lex_id();

0{OCT}*			return lex_integer(&scan_oct, yytext+1, yyleng-1);
0[xX]{HEX}+		return lex_integer(&scan_hex, yytext+2, yyleng-2);
[1-9]{DEC}*		return lex_integer(&scan_dec, yytext, yyleng);

"("			return lex_token(T_LROUND);
")"			return lex_token(T_RROUND);
"["			return lex_token(T_LSQUARE);
"]"			return lex_token(T_RSQUARE);
"{"			return lex_token(T_LCURLY);
"}"			return lex_token(T_RCURLY);
","			return lex_token(T_COMMA);
";"			return lex_token(T_SEMI);
"."			return lex_token(T_DOT);
"-"			return lex_token(T_HYPHEN);
"="			return lex_token(T_EQUAL);

[\"\']			scan_string_begin(yytext[0]); BEGIN STRING;
<STRING>[\"\']		{
				if (scan_string_delimiter(yytext[0],
					&yylval.string)) {
					BEGIN INITIAL;
					return L_STRING;
				}
			}
<STRING>\\		BEGIN STRESC;
<STRING>\n		yyerror("newline is not allowed in string");
<STRING>[^\"\'\\]*	scan_string_text(yytext, yyleng);

<STRING><<EOF>>		yyerror("end of file in string"); yyterminate();

<STRESC>[0-7]{1,3}	scan_string_oct(yytext, yyleng); BEGIN STRING;
<STRESC>[xX]{HEX}{1,2}	scan_string_hex(yytext+1, yyleng-1); BEGIN STRING;
<STRESC>u{HEX}{4}	scan_unicode_hex(yytext+1, 4); BEGIN STRING;
<STRESC>U{HEX}{8}	scan_unicode_hex(yytext+1, 8); BEGIN STRING;
<STRESC>\"		scan_string_char('\"'); BEGIN STRING;
<STRESC>\'		scan_string_char('\''); BEGIN STRING;
<STRESC>\?		scan_string_char('\?'); BEGIN STRING;
<STRESC>\\		scan_string_char('\\'); BEGIN STRING;
<STRESC>a		scan_string_char('\a'); BEGIN STRING;
<STRESC>b		scan_string_char('\b'); BEGIN STRING;
<STRESC>f		scan_string_char('\f'); BEGIN STRING;
<STRESC>n		scan_string_char('\n'); BEGIN STRING;
<STRESC>r		scan_string_char('\r'); BEGIN STRING;
<STRESC>t		scan_string_char('\t'); BEGIN STRING;
<STRESC>v		scan_string_char('\v'); BEGIN STRING;
<STRESC>.		yyerrorf("invalid string escape: `\\%c'", yytext[0]);

<STRESC><<EOF>>		yyerror("end of file in string escape"); yyterminate();

.			yyerrorf("invalid character: `%c'", yytext[0]);

%%

void
picopbc_scan_init(FILE *file, const char *filename,
    const struct syntaxopts *opts)
{

	yyin = file;
	yylineno = 1;
	yycolumn = 0;
	yyfilename = filename;
	comment_style = opts->so_cmtsty;
}

void
picopbc_scan(FILE *file, const char *filename,
    const struct syntaxopts *opts)
{
	int token;

	picopbc_scan_init(file, filename, opts);

	while ((token = yylex()) != 0) {
		(void)fprintf(stderr, "%s:%d:%d-%d: %d %s\n", yyfilename,
		    yylineno, yycolumn - yyleng, yycolumn, token, yytext);
	}
}
