import Formatter from 'src/formatter/Formatter';
import Tokenizer from 'src/lexer/Tokenizer';
import { EOF_TOKEN, isReserved, isToken, type Token, TokenType } from 'src/lexer/token';
import { dedupe } from 'src/utils';

// https://docs.oracle.com/cd/B19306_01/server.102/b14200/functions001.htm
const reservedFunctions = {
  numeric: [
    'ABS',
    'ACOS',
    'ASIN',
    'ATAN',
    'ATAN2',
    'BITAND',
    'CEIL',
    'COS',
    'COSH',
    'EXP',
    'FLOOR',
    'LN',
    'LOG',
    'MOD',
    'NANVL',
    'POWER',
    'REMAINDER',
    'ROUND',
    'SIGN',
    'SIN',
    'SINH',
    'SQRT',
    'TAN',
    'TANH',
    'TRUNC',
    'WIDTH_BUCKET',
  ],
  character: [
    'CHR',
    'CONCAT',
    'INITCAP',
    'LOWER',
    'LPAD',
    'LTRIM',
    'NLS_INITCAP',
    'NLS_LOWER',
    'NLSSORT',
    'NLS_UPPER',
    'REGEXP_REPLACE',
    'REGEXP_SUBSTR',
    'REPLACE',
    'RPAD',
    'RTRIM',
    'SOUNDEX',
    'SUBSTR',
    'TRANSLATE',
    'TREAT',
    'TRIM',
    'UPPER',

    'NLS_CHARSET_DECL_LEN',
    'NLS_CHARSET_ID',
    'NLS_CHARSET_NAME',

    'ASCII',
    'INSTR',
    'LENGTH',
    'REGEXP_INSTR',
  ],
  datetime: [
    'ADD_MONTHS',
    'CURRENT_DATE',
    'CURRENT_TIMESTAMP',
    'DBTIMEZONE',
    'EXTRACT',
    'FROM_TZ',
    'LAST_DAY',
    'LOCALTIMESTAMP',
    'MONTHS_BETWEEN',
    'NEW_TIME',
    'NEXT_DAY',
    'NUMTODSINTERVAL',
    'NUMTOYMINTERVAL',
    'ROUND',
    'SESSIONTIMEZONE',
    'SYS_EXTRACT_UTC',
    'SYSDATE',
    'SYSTIMESTAMP',
    'TO_CHAR',
    'TO_TIMESTAMP',
    'TO_TIMESTAMP_TZ',
    'TO_DSINTERVAL',
    'TO_YMINTERVAL',
    'TRUNC',
    'TZ_OFFSET',
  ],
  comparison: ['GREATEST', 'LEAST'],
  conversion: [
    'ASCIISTR',
    'BIN_TO_NUM',
    'CAST',
    'CHARTOROWID',
    'COMPOSE',
    'CONVERT',
    'DECOMPOSE',
    'HEXTORAW',
    'NUMTODSINTERVAL',
    'NUMTOYMINTERVAL',
    'RAWTOHEX',
    'RAWTONHEX',
    'ROWIDTOCHAR',
    'ROWIDTONCHAR',
    'SCN_TO_TIMESTAMP',
    'TIMESTAMP_TO_SCN',
    'TO_BINARY_DOUBLE',
    'TO_BINARY_FLOAT',
    'TO_CHAR',
    'TO_CLOB',
    'TO_DATE',
    'TO_DSINTERVAL',
    'TO_LOB',
    'TO_MULTI_BYTE',
    'TO_NCHAR',
    'TO_NCLOB',
    'TO_NUMBER',
    'TO_DSINTERVAL',
    'TO_SINGLE_BYTE',
    'TO_TIMESTAMP',
    'TO_TIMESTAMP_TZ',
    'TO_YMINTERVAL',
    'TO_YMINTERVAL',
    'TRANSLATE',
    'UNISTR',
  ],
  largeObject: ['BFILENAME', 'EMPTY_BLOB,', 'EMPTY_CLOB'],
  collection: ['CARDINALITY', 'COLLECT', 'POWERMULTISET', 'POWERMULTISET_BY_CARDINALITY', 'SET'],
  hierarchical: ['SYS_CONNECT_BY_PATH'],
  dataMining: [
    'CLUSTER_ID',
    'CLUSTER_PROBABILITY',
    'CLUSTER_SET',
    'FEATURE_ID',
    'FEATURE_SET',
    'FEATURE_VALUE',
    'PREDICTION',
    'PREDICTION_COST',
    'PREDICTION_DETAILS',
    'PREDICTION_PROBABILITY',
    'PREDICTION_SET',
  ],
  xml: [
    'APPENDCHILDXML',
    'DELETEXML',
    'DEPTH',
    'EXTRACT',
    'EXISTSNODE',
    'EXTRACTVALUE',
    'INSERTCHILDXML',
    'INSERTXMLBEFORE',
    'PATH',
    'SYS_DBURIGEN',
    'SYS_XMLAGG',
    'SYS_XMLGEN',
    'UPDATEXML',
    'XMLAGG',
    'XMLCDATA',
    'XMLCOLATTVAL',
    'XMLCOMMENT',
    'XMLCONCAT',
    'XMLFOREST',
    'XMLPARSE',
    'XMLPI',
    'XMLQUERY',
    'XMLROOT',
    'XMLSEQUENCE',
    'XMLSERIALIZE',
    'XMLTABLE',
    'XMLTRANSFORM',
  ],
  encoding: ['DECODE', 'DUMP', 'ORA_HASH', 'VSIZE'],
  nullRelated: ['COALESCE', 'LNNVL', 'NULLIF', 'NVL', 'NVL2'],
  env: ['SYS_CONTEXT', 'SYS_GUID', 'SYS_TYPEID', 'UID', 'USER', 'USERENV'],
  aggregate: [
    'AVG',
    'COLLECT',
    'CORR',
    'CORR_S',
    'CORR_K',
    'COUNT',
    'COVAR_POP',
    'COVAR_SAMP',
    'CUME_DIST',
    'DENSE_RANK',
    'FIRST',
    'GROUP_ID',
    'GROUPING',
    'GROUPING_ID',
    'LAST',
    'MAX',
    'MEDIAN',
    'MIN',
    'PERCENTILE_CONT',
    'PERCENTILE_DISC',
    'PERCENT_RANK',
    'RANK',
    'REGR_SLOPE',
    'REGR_INTERCEPT',
    'REGR_COUNT',
    'REGR_R2',
    'REGR_AVGX',
    'REGR_AVGY',
    'REGR_SXX',
    'REGR_SYY',
    'REGR_SXY',
    'STATS_BINOMIAL_TEST',
    'STATS_CROSSTAB',
    'STATS_F_TEST',
    'STATS_KS_TEST',
    'STATS_MODE',
    'STATS_MW_TEST',
    'STATS_ONE_WAY_ANOVA',
    'STATS_T_TEST_ONE',
    'STATS_T_TEST_PAIRED',
    'STATS_T_TEST_INDEP',
    'STATS_T_TEST_INDEPU',
    'STATS_WSR_TEST',
    'STDDEV',
    'STDDEV_POP',
    'STDDEV_SAMP',
    'SUM',
    'VAR_POP',
    'VAR_SAMP',
    'VARIANCE',
  ],
  // Windowing functions (minus the ones already listed in aggregates)
  window: ['FIRST_VALUE', 'LAG', 'LAST_VALUE', 'LEAD', 'NTILE', 'RATIO_TO_REPORT', 'ROW_NUMBER'],
  objectReference: ['DEREF', 'MAKE_REF', 'REF', 'REFTOHEX', 'VALUE'],
  model: ['CV', 'ITERATION_NUMBER', 'PRESENTNNV', 'PRESENTV', 'PREVIOUS'],
};

/**
 * Priority 5 (last)
 * Full list of reserved words
 * any words that are in a higher priority are removed
 */
const reservedKeywords = [
  // 'A',
  'ACCESSIBLE',
  'AGENT',
  'AGGREGATE',
  'ALL',
  'ALTER',
  'ANY',
  'ARRAY',
  'AS',
  'ASC',
  'AT',
  'ATTRIBUTE',
  'AUTHID',
  'AVG',
  'BETWEEN',
  'BFILE_BASE',
  'BINARY',
  'BINARY_INTEGER',
  'BLOB_BASE',
  'BLOCK',
  'BODY',
  'BOOLEAN',
  'BOTH',
  'BOUND',
  'BREADTH',
  'BULK',
  'BY',
  'BYTE',
  // 'C',
  'CALL',
  'CALLING',
  'CASCADE',
  'CAST',
  'CHAR',
  'CHARACTER',
  'CHARSET',
  'CHARSETFORM',
  'CHARSETID',
  'CHAR_BASE',
  'CHECK',
  'CLOB_BASE',
  'CLONE',
  'CLOSE',
  'CLUSTER',
  'CLUSTERS',
  'COALESCE',
  'COLAUTH',
  'COLLECT',
  'COLUMNS',
  'COMMENT',
  'COMMIT',
  'COMMITTED',
  'COMPILED',
  'COMPRESS',
  'CONNECT',
  'CONSTANT',
  'CONSTRUCTOR',
  'CONTEXT',
  'CONTINUE',
  'CONVERT',
  'COUNT',
  'CRASH',
  'CREATE',
  'CREDENTIAL',
  'CURRENT',
  'CURRVAL',
  'CURSOR',
  'CUSTOMDATUM',
  'DANGLING',
  'DATA',
  'DATE',
  'DATE_BASE',
  'DAY',
  'DECIMAL',
  'DEFAULT',
  'DEFINE',
  'DEPTH',
  'DESC',
  'DETERMINISTIC',
  'DIRECTORY',
  'DISTINCT',
  'DO',
  'DOUBLE',
  'DROP',
  'DURATION',
  'ELEMENT',
  'ELSIF',
  'EMPTY',
  'ESCAPE',
  'EXCEPTIONS',
  'EXCLUSIVE',
  'EXECUTE',
  'EXISTS',
  'EXIT',
  'EXTENDS',
  'EXTERNAL',
  'EXTRACT',
  'FALSE',
  'FETCH',
  'FINAL',
  'FIRST',
  'FIXED',
  'FLOAT',
  'FOR',
  'FORALL',
  'FORCE',
  'FUNCTION',
  'GENERAL',
  'GOTO',
  'GRANT',
  'GROUP',
  'HASH',
  'HEAP',
  'HIDDEN',
  'HOUR',
  'IDENTIFIED',
  'IF',
  'IMMEDIATE',
  'IN',
  'INCLUDING',
  'INDEX',
  'INDEXES',
  'INDICATOR',
  'INDICES',
  'INFINITE',
  'INSTANTIABLE',
  'INT',
  'INTEGER',
  'INTERFACE',
  'INTERVAL',
  'INTO',
  'INVALIDATE',
  'IS',
  'ISOLATION',
  'JAVA',
  'LANGUAGE',
  'LARGE',
  'LEADING',
  'LENGTH',
  'LEVEL',
  'LIBRARY',
  'LIKE',
  'LIKE2',
  'LIKE4',
  'LIKEC',
  'LIMITED',
  'LOCAL',
  'LOCK',
  'LONG',
  'MAP',
  'MAX',
  'MAXLEN',
  'MEMBER',
  'MERGE',
  'MIN',
  'MINUTE',
  'MLSLABEL',
  'MOD',
  'MODE',
  'MONTH',
  'MULTISET',
  'NAME',
  'NAN',
  'NATIONAL',
  'NATIVE',
  'NATURAL',
  'NATURALN',
  'NCHAR',
  'NEW',
  'NEXTVAL',
  'NOCOMPRESS',
  'NOCOPY',
  'NOT',
  'NOWAIT',
  'NULL',
  'NULLIF',
  'NUMBER',
  'NUMBER_BASE',
  'OBJECT',
  'OCICOLL',
  'OCIDATE',
  'OCIDATETIME',
  'OCIDURATION',
  'OCIINTERVAL',
  'OCILOBLOCATOR',
  'OCINUMBER',
  'OCIRAW',
  'OCIREF',
  'OCIREFCURSOR',
  'OCIROWID',
  'OCISTRING',
  'OCITYPE',
  'OF',
  'OLD',
  'ON DELETE',
  'ON UPDATE',
  'ONLY',
  'OPAQUE',
  'OPEN',
  'OPERATOR',
  'OPTION',
  'ORACLE',
  'ORADATA',
  'ORDER',
  'ORGANIZATION',
  'ORLANY',
  'ORLVARY',
  'OTHERS',
  'OUT',
  'OVERLAPS',
  'OVERRIDING',
  'PACKAGE',
  'PARALLEL_ENABLE',
  'PARAMETER',
  'PARAMETERS',
  'PARENT',
  'PARTITION',
  'PASCAL',
  'PCTFREE',
  'PIPE',
  'PIPELINED',
  'PLS_INTEGER',
  'PLUGGABLE',
  'POSITIVE',
  'POSITIVEN',
  'PRAGMA',
  'PRECISION',
  'PRIOR',
  'PRIVATE',
  'PROCEDURE',
  'PUBLIC',
  'RAISE',
  'RANGE',
  'RAW',
  'READ',
  'REAL',
  'RECORD',
  'REF',
  'REFERENCE',
  'RELEASE',
  'RELIES_ON',
  'REM',
  'REMAINDER',
  'RENAME',
  'RESOURCE',
  'RESULT',
  'RESULT_CACHE',
  'RETURN',
  'REVERSE',
  'REVOKE',
  'ROLLBACK',
  'ROW',
  'ROWID',
  'ROWNUM',
  'ROWTYPE',
  'SAMPLE',
  'SAVE',
  'SAVEPOINT',
  'SB1',
  'SB2',
  'SB4',
  'SEARCH',
  'SECOND',
  'SEGMENT',
  'SELF',
  'SEPARATE',
  'SEQUENCE',
  'SERIALIZABLE',
  'SHARE',
  'SHORT',
  'SIZE',
  'SIZE_T',
  'SMALLINT',
  'SOME',
  'SPACE',
  'SPARSE',
  'SQL',
  'SQLCODE',
  'SQLDATA',
  'SQLERRM',
  'SQLNAME',
  'SQLSTATE',
  'STANDARD',
  'START',
  'STATIC',
  'STDDEV',
  'STORED',
  'STRING',
  'STRUCT',
  'STYLE',
  'SUBMULTISET',
  'SUBPARTITION',
  'SUBSTITUTABLE',
  'SUBTYPE',
  'SUCCESSFUL',
  'SUM',
  'SYNONYM',
  'SYSDATE',
  'TABAUTH',
  'TABLE',
  'TDO',
  'THE',
  'THEN',
  'TIME',
  'TIMESTAMP',
  'TIMEZONE_ABBR',
  'TIMEZONE_HOUR',
  'TIMEZONE_MINUTE',
  'TIMEZONE_REGION',
  'TO',
  'TRAILING',
  'TRANSACTION',
  'TRANSACTIONAL',
  'TRIGGER',
  'TRUE',
  'TRUSTED',
  'TYPE',
  'UB1',
  'UB2',
  'UB4',
  'UID',
  'UNDER',
  'UNIQUE',
  'UNPLUG',
  'UNSIGNED',
  'UNTRUSTED',
  'USE',
  'USER',
  'VALIDATE',
  'VALIST',
  'VALUE',
  'VARCHAR',
  'VARCHAR2',
  'VARIABLE',
  'VARIANCE',
  'VARRAY',
  'VARYING',
  'VIEW',
  'VIEWS',
  'VOID',
  'WHENEVER',
  'WHILE',
  'WORK',
  'WRAPPED',
  'WRITE',
  'YEAR',
  'ZONE',
];

/**
 * Priority 1 (first)
 * keywords that begin a new statement
 * will begin new indented block
 */
const reservedCommands = [
  'ADD',
  'ALTER COLUMN',
  'ALTER TABLE',
  'BEGIN',
  'CONNECT BY',
  'CREATE TABLE', // verify
  'DROP TABLE', // verify
  'DECLARE',
  'DELETE',
  'DELETE FROM',
  'EXCEPT',
  'EXCEPTION',
  'FETCH FIRST',
  'FROM',
  'GROUP BY',
  'HAVING',
  'INSERT INTO',
  'INSERT',
  'LIMIT',
  'OFFSET',
  'LOOP',
  'MODIFY',
  'ORDER BY',
  'RETURNING',
  'SELECT',
  'SET CURRENT SCHEMA',
  'SET SCHEMA',
  'SET',
  'START WITH',
  'UPDATE',
  'VALUES',
  'WHERE',
  'WITH',
];

const reservedBinaryCommands = [
  // set booleans
  'INTERSECT',
  'INTERSECT ALL',
  'INTERSECT DISTINCT',
  'UNION',
  'UNION ALL',
  'UNION DISTINCT',
  'EXCEPT',
  'EXCEPT ALL',
  'EXCEPT DISTINCT',
  'MINUS',
  'MINUS ALL',
  'MINUS DISTINCT',
  // apply
  'CROSS APPLY',
  'OUTER APPLY',
];

const reservedJoins = [
  'JOIN',
  'INNER JOIN',
  'LEFT JOIN',
  'LEFT OUTER JOIN',
  'RIGHT JOIN',
  'RIGHT OUTER JOIN',
  'FULL JOIN',
  'FULL OUTER JOIN',
  'CROSS JOIN',
  'NATURAL JOIN',
];

/**
 * Priority 3
 * keywords that follow a previous Statement, must be attached to subsequent data
 * can be fully inline or on newline with optional indent
 */
const reservedDependentClauses = ['WHEN', 'ELSE'];

export default class PlSqlFormatter extends Formatter {
  static operators = [
    '||',
    '**',
    ':=',
    '~=',
    '^=',
    '>>',
    '<<',
    '=>',
    //  '..' // breaks operator test, handled by .
  ];

  tokenizer() {
    return new Tokenizer({
      reservedCommands,
      reservedBinaryCommands,
      reservedJoins,
      reservedDependentClauses,
      reservedLogicalOperators: ['AND', 'OR', 'XOR'],
      reservedKeywords: dedupe([...reservedKeywords, ...Object.values(reservedFunctions).flat()]),
      // TODO: support custom-delimited strings: Q'{..}' q'<..>' etc
      stringTypes: [{ quote: "''", prefixes: ['N'] }],
      identTypes: [`""`],
      identChars: { rest: '$#' },
      variableTypes: [{ regex: '&{1,2}[A-Za-z][A-Za-z0-9_$#]*' }],
      numberedParamTypes: [':'],
      namedParamTypes: [':'],
      paramChars: {}, // Empty object used on purpose to not allow $ and # chars as specified in identChars
      operators: PlSqlFormatter.operators,
      postProcess,
    });
  }
}

function postProcess(tokens: Token[]) {
  let previousReservedToken: Token = EOF_TOKEN;

  return tokens.map(token => {
    // BY [SET]
    if (isToken.SET(token) && isToken.BY(previousReservedToken)) {
      return { ...token, type: TokenType.RESERVED_KEYWORD };
    }

    if (isReserved(token)) {
      previousReservedToken = token;
    }

    return token;
  });
}
