import * as visTypes from './vistypes';
import { QuickDashModalsProvider } from './quickdash_modals';
import { QuickDashMakeVisProvider } from './make_visualizations';

import { ProgressMapProvider } from 'ui/kibi/modals/progress_map';
import { sortContext } from 'ui/kibi/directives/sort_icon';
import { allSelected } from 'ui/kibi/directives/tristate_checkbox';
import { fieldSpec, queryIsAnalyzed } from 'ui/kibi/utils/field';
import { promiseMapSeries } from 'ui/kibi/utils/promise';
import { EntityType } from 'ui/kibi/components/ontology/entity_type';

import errors from 'request-promise/errors';
import Bluebird from 'bluebird';
import _ from 'lodash';


function composeScores(tests) {
  // Individual ranking scores will be combined to a power-weighted product.
  //
  // Using products lets a score be compensated by its inverse, whereas using sums
  // compensating a high score would require a negative score, which is somewhat weird.
  //
  // We also prefer to avoid correcting for the number of tests (ie averaging) since
  // the number of tests is small, and we want the resulting overall score to be
  // correctible at a later stage (akin to adding tests inline).
  //
  // Note furthermore that scoring a 0 in some test will result in the field being
  // filtered out of the results.

  return tests.reduce((result, test) =>
    result * Math.pow(test.score, test.weight), 1);
}


export function GuessFieldsProvider(
  Private, createNotifier, savedSearches, ontologyModel, mappings, es, sessionId) {

  const visMaker = Private(QuickDashMakeVisProvider);
  const progressMap = Private(ProgressMapProvider);
  const quickDashModals = Private(QuickDashModalsProvider);

  const notify = createNotifier({ location: 'Guess Fields' });

  const samplesCount  = 50;
  const termsCount    = 50;

  const termsHighRatioCut   = 0.95;
  const termsLowRatioScore  = 0.2;
  const termsLowRatioCut    = 0.05;

  const RelatedEnum = {
    notRelated: 0,
    directRelated: 1,
    eidRelated: 2
  };


  // Preprocessing

  /**
   * Extracts the (root) saved search from the associated input
   * index pattern
   */
  function getRootSavedSearch(args) {
    const { index } = args;

    function ssearchIndexPattern(ssearch) {
      return ssearch.searchSource.get('index');
    }

    function getAllSavedSearches() {
      return Bluebird.resolve(ontologyModel.getEntityList())
        .filter(entity => entity.type === EntityType.SAVED_SEARCH && !entity.parentId)
        .map(entity => savedSearches.get(entity.id)
          // Inaccessible searches resolve to undefined
          .catch(_.noop))
        .filter(_.identity);
    }


    return getAllSavedSearches()
      .then(ssearches => {
        args.rootSavedSearch = _.find(ssearches,
          ssearch => ssearchIndexPattern(ssearch).id === index.id);

        return args;
      });
  }

  /**
   * Non-aggregatable fields cannot be visualized effectively in most cases.
   *
   * However, if a non-aggregatable have an aggregatable among their
   * multifields, then that will be used in its place.
   */
  function useMultifieldsForNonAggregatables(args) {
    const { index, workStats } = args;

    // Note that we'll be getting the multifields straight from the index, the
    // referenced ones from the parent field don't look complete.

    workStats.forEach(fieldStats => {
      const { field } = fieldStats;
      if (field.aggregatable) { return; }

      const substituteField = _(field.multifields || [])
        .map(mfStandIn => index.fields.byName[mfStandIn.name])
        .find(mField => mField && mField.aggregatable);

      if (substituteField) { fieldStats.field = substituteField; }
    });

    return args;
  }


  // Filtering

  function makeIsMultifield(args) {
    // Input fields may be multifields - that is, alternate representations of some
    // parent fields. Find out those whose parent is already in the supplied fields list.

    const multifieldNames = _.chain(args.workStats)
      .map(fieldStats => fieldStats.origField.multifields)
      .flatten()
      .map('name')
      .indexBy()
      .value();

    return fieldStats => !!multifieldNames[fieldStats.origField.name];
  }

  function makeIsMetaField(args) {
    const { index } = args;

    const metaFieldNames = _.indexBy(index.metaFields);
    return fieldStats => !!metaFieldNames[fieldStats.origField.name];
  }

  function markAcceptableFields(args) {
    const isMultifield = makeIsMultifield(args);
    const isMetaField = makeIsMetaField(args);

    args.workStats.forEach(fieldStats => {
      const { field } = fieldStats;
      const notesLen = fieldStats.notes.length;

      if (!field.searchable) { fieldStats.notes.push('Not searchable'); }
      if (!field.aggregatable) { fieldStats.notes.push('Not aggregatable'); }
      if (field.scripted) { fieldStats.notes.push('Scripted'); }
      if (isMultifield(fieldStats)) { fieldStats.notes.push('Multifield'); }
      if (isMetaField(fieldStats)) { fieldStats.notes.push('Meta Field'); }

      fieldStats.acceptable = (fieldStats.notes.length === notesLen);
    });
  }


  // Queries

  function msearchRequest(indexPattern, body) {
    return [{
      index: indexPattern.title,
      ignore_unavailable: true,
      preference: sessionId
    }, body];
  }

  function samplesRequest(index, field, query) {
    const body = {
      size: samplesCount,
      query: _.cloneDeep(query),
      _source: true,                  // Include stored fields not in the original
      stored_fields: '*'              // document, plus the original document itself
    };

    // eslint-disable-next-line camelcase
    const { must, must_not } = body.query.bool;

    must.push({ exists: fieldSpec(field) });

    if (field.type === 'string') {
      // No empty strings
      must_not.push({ term: { [field.name]: '' } });
    }

    return msearchRequest(index, body);
  }

  function toFieldType(field, value) {
    switch (field.type) {
      case 'string':
        return '' + value;

      case 'number':
        return +value;
    }

    return value;
  }

  function samplesResolve(index, fieldStats, resp) {
    const { origField } = fieldStats;

    const docSamples = _.map(resp.hits.hits, hit => index.flattenHit(hit));

    const fieldSamples = _(docSamples)
      // Fields that were not in the original document and were NOT stored
      // appear as searchable, but their values are never accessible as
      // samples.  We're filtering them out early on.
      .filter(hit => hit[origField.name] !== undefined)
      // Also, we're casting to the expected field type, that could be
      // different than what's in the original document
      .map(hit => toFieldType(origField, hit[origField.name]))
      .value();

    return {
      nonEmptyDocsCount: resp.hits.total,
      docSamples,
      fieldSamples
    };
  }

  function termsRequest(index, field, query) {
    return msearchRequest(index, {
      size: 0,
      query,
      aggs: {
        result: {
          terms: _.assign({
            size: termsCount,
            order: { _count: 'desc' }
          }, fieldSpec(field))
        }
      }
    });
  }

  function termsResolve(index, fieldStats, resp) {
    return {
      docsCount: resp.hits.total,
      terms: resp.aggregations.result.buckets
    };
  }

  function evalMSearch(requests, evalResp) {
    const body = _(requests)
      .map('body')
      .flatten()
      .value();

    return es.msearch({ body })
      .then(allResp => {
        const { responses } = allResp;
        let error;

        _.forEach(allResp.responses, (resp, r) => {
          if (resp.error) {
            error = Promise.reject(new errors.StatusCodeError(resp.status, resp));
            return false;
          }

          evalResp(resp, r);
        });

        return error;
      });
  }


  function queryRelated(args) {
    const { rootSavedSearch } = args;

    return ontologyModel.getRelationsByDomain(rootSavedSearch.id)
      .then(relations => {
        const relsByField = _.groupBy(relations, rel => rel.domain.field);

        args.workStats.forEach(fieldStats => {
          const fieldRels = relsByField[fieldStats.origField.name];

          fieldStats.related = +!!fieldRels;
          fieldStats.related += fieldStats.related &&
            _.some(fieldRels, rel => rel.range.type === EntityType.VIRTUAL_ENTITY);
        });
      });
  }

  const termTypes = _.indexBy(['boolean', 'keyword', 'ip']);

  function queryDataType(args, fieldStats) {
    const { field } = fieldStats;
    const { type } = field;

    if (type !== 'string') { return fieldStats.dataType = type; }

    return queryIsAnalyzed(mappings, field)
      .then(analyzed =>
        fieldStats.dataType = analyzed ? 'text' : 'keyword');
  }

  function queryFieldStats(args, fieldStats) {
    const { index, query } = args;
    const { field } = fieldStats;

    const requests = [{
      body: samplesRequest(index, field, query),
      resolve: samplesResolve
    }];

    if (termTypes[fieldStats.dataType]) {
      requests.push({
        body: termsRequest(index, field, query),
        resolve: termsResolve
      });
    }

    return evalMSearch(requests, (resp, r) =>
      Object.assign(fieldStats, requests[r].resolve(index, fieldStats, resp)));
  }

  function querySavedVis(args, fieldStats) {
    const { index, query } = args;
    const { field } = fieldStats;

    const ssearch = args.savedSearch || args.rootSavedSearch;

    return visMaker.makeSavedVisualizations(ssearch, [ field ], {
      query,
      addSirenDataTable: false,
      addSirenMultiChart: false
    })
      .then(savedVises => fieldStats.sVis = savedVises[0]);
  }


  // Ranking Helpers

  function makeHasDuplicate(args) {
    // String fields can sometimes be imported from a datasource both as keyword and
    // text types, but *not* as multi-field. This tries to identify these duplicate
    // string field cases.

    function searchDupsFor(fieldType, searchType) {
      const searchFields = _(args.workStats)
        .filter(fieldStats => fieldStats.dataType === searchType)
        .map('origField')
        .value();

      args.workStats.forEach(fieldStats => {
        if (fieldStats.dataType !== fieldType) { return; }

        const { origField } = fieldStats;
        const candidateSearchDups = _.indexBy(searchFields, 'name');

        // NOTE: Not using lodash chaining with forEach due to subtle chainable change
        // between lodash v3 and v4

        fieldStats.docSamples.forEach(doc => {
          const fieldValue = doc[origField.name];

          searchFields
            .filter(sField => doc[sField.name] !== fieldValue)
            .forEach(sField => delete candidateSearchDups[sField.name]);
        });

        fieldStats.hasDuplicate = !_.isEmpty(candidateSearchDups);
      });
    }

    searchDupsFor('text', 'keyword');
    searchDupsFor('keyword', 'text');
  }


  // Ranking Functions

  function scoreName(fieldStats) {
    // Discriminate suspect field names, such as those starting with underscores
    return fieldStats.origField.name.startsWith('_') ? 0.2 : 1;
  }

  function scoreDocsCount(fieldStats) {
    // Fields with more non-empty documents should score higher
    return (fieldStats.nonEmptyDocsCount <= 1)
      ? 0 : (1 + Math.log(fieldStats.nonEmptyDocsCount));
  }

  function scoreTerms(fieldStats) {
    if (fieldStats.terms) {
      if (fieldStats.terms.length <= 1) {
        fieldStats.notes.push('Field has only one term');
        return 0;
      }

      if (fieldStats.terms[0].doc_count === 1) {
        fieldStats.notes.push('All values are unique');
        return 0;
      }

      const biggestTermRelSize = fieldStats.terms[0].doc_count / fieldStats.docsCount;

      if (biggestTermRelSize >= termsHighRatioCut) {
        const perc = Math.floor(100 * biggestTermRelSize);
        fieldStats.notes.push(`Same term in ${perc}% of documents`);
        return 0;
      }

      const termsRelSize = _.sum(fieldStats.terms, 'doc_count') / fieldStats.docsCount;

      if (termsRelSize <= termsLowRatioScore) {
        const perc = Math.max(1, Math.ceil(100 * termsRelSize));
        fieldStats.notes.push(`Less than ${perc}% of documents in 50 terms`);

        return Math.max(0,
          (termsRelSize - termsLowRatioCut) /
          (termsLowRatioScore - termsLowRatioCut));
      }

      if (biggestTermRelSize / termsRelSize >= termsHighRatioCut) {
        fieldStats.notes.push('Flat distribution after one big term');
        return 0;
      }
    }

    return 1;
  }

  function scoreGeneratedVis(fieldStats) {
    if (!fieldStats.sVis) {
      fieldStats.notes.push('No suitable visualization');
      return 0;
    }

    // TODO: Add a 'scores' parameter to the visualizations generation function,
    // that it can use to report the vis's desirability, on its own merits.
    //
    // Use linear interpolation between vis score tiers to incorporate that number.

    switch (fieldStats.sVis.vis.type.name) {
      case visTypes.LINE:
      case visTypes.TILE_MAP:
        return 4;

      case visTypes.PIE:
      case visTypes.HISTOGRAM:
      case visTypes.TAGCLOUD:
        return 3;

      case visTypes.TABLE:
        return 1;

      default:
        fieldStats.notes.push('No score for associated visualization');
        return 0;
    }
  }

  function scoreUnusualSamples(fieldStats) {
    // Fields with unusual values should score lower

    switch (fieldStats.dataType) {
      case 'text':
        // Full-text fields with many digits tend to be harder to read

        const notDigitsRe = /\D/g;

        const { fieldSamples: samples } = fieldStats;

        const sumLengths = _.sum(samples, 'length');
        if (!sumLengths) { return 0; }
        // Should never happen, non-stored fields should have already been
        // removed, and samples are non-empty by explicit request - I'm keeping
        // this just to be extra sure

        const sumDigitCounts = _.sum(samples,
          sample => sample.replace(notDigitsRe, '').length);

        const digitsRatio = sumDigitCounts / sumLengths;

        return (1 - digitsRatio) * 1 + digitsRatio * 0.5;

      default:
        return 1;
    }
  }

  function scoreStringTermsAdequacy(fieldStats) {
    // In case of a string field with duplicate, we have to choose which field we prefer;
    // for fields whose terms are generally multi-word, we prefer the text one,
    // for single-words we'll take the keyword one.
    //
    // The discarded duplicate field will be filtered out.

    // TODO: Use terms aggregation rather than samples

    if (!fieldStats.hasDuplicate) { return 1; }

    const fieldName = fieldStats.origField.name;
    const duplicateNote = 'Duplicate string field';

    const { fieldSamples: samples } = fieldStats;

    const nonWordRe = /\W+/;

    const avgWordCounts =
      _.sum(samples, sample => sample.split(nonWordRe).filter(_.identity).length)
      / samples.length;

    const wordsBoundary = 5;

    switch (fieldStats.dataType) {
      case 'text':
        if (avgWordCounts < wordsBoundary) {
          fieldStats.notes.push(duplicateNote);
          return 0;
        }

      case 'keyword':
        if (avgWordCounts >= wordsBoundary) {
          fieldStats.notes.push(duplicateNote);
          return 0;
        }
    }

    return 1;
  }

  function scoreRelation(fieldStats) {
    switch (fieldStats.related) {
      case RelatedEnum.directRelated:
        fieldStats.notes.push('Direct relation endpoint');
        return 0.5;

      case RelatedEnum.eidRelated:
        fieldStats.notes.push('Related to Entity Identifier');
        return 2;
    }

    return 1;
  }

  const scoreFunctions = [
    { weight: 1, fn: scoreName },
    { weight: 1, fn: scoreDocsCount },
    { weight: 1, fn: scoreTerms },
    { weight: 1, fn: scoreGeneratedVis },
    { weight: 1, fn: scoreUnusualSamples },
    { weight: 1, fn: scoreStringTermsAdequacy },
    { weight: 1, fn: scoreRelation },
  ];


  // Reporting

  function makeReportSortContext(args) {
    const { report } = args;

    report.sort = sortContext({
      acceptable:   fieldStats => fieldStats.acceptable,
      type:         fieldStats => fieldStats.dataType || fieldStats.field.type,
      name:         fieldStats => fieldStats.origField.displayName.toLowerCase(),
      score:        fieldStats => fieldStats.score,
      chart:        fieldStats => fieldStats.sVis && fieldStats.sVis.vis.type.title,
      notes:        fieldStats => fieldStats.notes.join()
    });

    return args;
  }

  function prepareStatsForReporting(args) {
    const { sort } = args.report;

    args.stats.forEach(fieldStats => {
      fieldStats.scoreStr = '' + _.round(fieldStats.score, 3);

      fieldStats.typeField = _.clone(fieldStats.origField);
      fieldStats.typeField.name = '';

      if (fieldStats.dataType === 'text') { fieldStats.notes.push('Analyzed'); }
    });

    const sorters = _.map([
      sort.acceptable, sort.type, sort.score, sort.name
    ], 'sorter');

    const orders = ['desc', 'asc', 'desc', 'asc'];

    args.stats = _.sortByOrder(args.stats, sorters, orders);
    return args;
  }


  // Main Algorithm

  /**
   * Preliminary filtering for processable fields
   */
  function filterAcceptableFields(args) {
    markAcceptableFields(args);
    args.workStats = _.filter(args.workStats, 'acceptable');

    return args;
  }

  /**
   * Loads current query associated to the saved search, and applies it
   * to subsequent queries made by the autoselect procedure
   */
  function makeFilteringQuery(args) {
    const { savedSearch } = args;

    const queryDefault = { bool: { must: [], must_not: [] } };

    const queryPromise = savedSearch
      ? savedSearch.searchSource._flatten().then(req => req.body.query)
      : Promise.resolve(queryDefault);

    return queryPromise
      .then(query => {
        if (!query.bool) {
          query = { bool: { must: [ query ] } };
        }

        args.query = _.merge(query, queryDefault);
        return args;
      });
  }

  /**
   * Makes all necessary queries to evaluate fields to select
   */
  function makeQueries(args) {
    const { index, query, workStats } = args;

    const fieldQueryFunctions = [
      queryDataType,
      queryFieldStats,
      querySavedVis
    ];

    function fieldTextMap(fieldStats) {
      return `Testing "${fieldStats.origField.displayName}"`;
    }

    function fieldValueMap(fieldStats, progress) {
      return promiseMapSeries(fieldQueryFunctions,
        fn => fn(args, fieldStats, progress)
      );
    }

    const operations = [{
      val: args,
      textMap: _.constant('Querying index pattern'),
      valueMap: queryRelated
    }].concat(workStats.map(fieldStats => ({
      val: fieldStats,
      textMap: fieldTextMap,
      valueMap: fieldValueMap
    })));

    return progressMap(operations, {
      title: 'Autoselect Top 10 (beta)',
      valueMap: (op, o, progress) => op.valueMap(op.val, progress),
      stepMap: (op, o, progress) => op.textMap(op.val),
    })
      .then(() => args);
  }

  /**
   * We'll filter non-stored fields early on. Non-stored fields are available
   * for aggregation purposes, and can be used in searches and aggregations;
   * however, they aren't available in hits (don't show in doc tables).
   */
  function filterNonStoredFields(args) {
    const { workStats } = args;

    args.workStats = _.filter(args.workStats, fieldStats => {
      const hasSomeSamples = _.some(fieldStats.fieldSamples,
        sample => sample !== undefined);

      if (!hasSomeSamples) {
        fieldStats.notes.push('Field not stored in documents');
      }

      return hasSomeSamples;
    });

    return args;
  }

  /**
   * Calculates additional data to be used when calculating scores
   */
  function makeScoreHelpers(args) {
    makeHasDuplicate(args);
    return args;
  }

  /**
   * Calculates a score for each processable field. Scores are calculated
   * as a composition of several scoring functions.
   */
  function makeScores(args) {
    const { workStats } = args;
    if (!workStats.length) { return args; }

    workStats.forEach(fieldStats => {
      fieldStats.individualScores = scoreFunctions.map(
        ({ weight, fn }) => ({ weight, score: fn(fieldStats, args) }));

      fieldStats.score = composeScores(fieldStats.individualScores);
    });

    return args;
  }

  /**
   * Sorts and filters fields after scores have been calculated. Fields
   * with score 0 are essentially explicitly to be NOT selected, the
   * resulting scores are sorted by decreasing score.
   */
  function sortAndPruneByScore(args) {
    args.workStats = _(args.workStats)
      .filter('score')
      .sortBy('score')
      .reverse()
      .value();

    return args;
  }

  /**
   * Interleaves the array of ranked fields sorted by dataType,
   * under the constraint that candidate fields for interleaving must have
   * a score at least half that of current field.
   *
   * This tends to result in a more complete picture of the index pattern,
   * making sure that the resulting selection will not have fields of a single
   * type.
   */
  function interleaveByDatatype(args) {
    if (!args.interleaveByDatatype) { return args; }

    const { workStats } = args;
    if (!workStats.length) { return args; }

    const groupsHash = _.groupBy(workStats, fieldStats => fieldStats.field.type);
    const groups = _.values(groupsHash);

    args.groupsHash = groupsHash;

    const result = [];

    let currG = groups.indexOf(groupsHash[workStats[0].field.type]) - 1;
    let currScore = -1;

    let nextG = currG;
    let nextGroup = null;
    let nextScore = currScore;

    while (result.length !== workStats.length) {              // Must process all fields
      do {                                                    // Cycle groups from current
        nextG = (nextG + 1) % groups.length;
        nextGroup = groups[nextG];
        nextScore = nextGroup[0].score;
      } while (                                               // Pick first ok candidate
        nextG !== currG &&
        2 * nextScore < currScore
      );

      result.push(nextGroup.shift());                         // Pull & output group head

      if (!nextGroup.length) {                                // Drop group if empty
        _.pullAt(groups, nextG);
        nextG = (nextG + groups.length - 1) % groups.length;
      }

      currG = nextG;                                          // Update iterators
      currScore = nextScore;
    }

    args.workStats = result;
    return args;
  }

  /**
   * Selects only the first handful of fields sorted by relevance
   */
  function makeSelection(args) {
    _.chain(args.workStats)
      .take(args.takeCount)
      .forEach(fieldStats => { fieldStats.selected = true; })
      .commit();

    return args;
  }

  /**
   * Displays the final report of the procedure
   */
  function showReport(args) {
    if (!args.showReport) { return args; }

    const { stats } = args;
    args.report = {};

    makeReportSortContext(args);
    prepareStatsForReporting(args);

    return quickDashModals.guessReport({
      stats,
      sort: args.report.sort,
      allSelected: allSelected(stats)
    })
      .show()
      .then(ok => ok || Promise.reject())
      .then(() => {
        args.workStats = stats;
        return args;
      });
  }

  /**
   * Finalizes the fields selection and outputs it
   */
  function toResult(args) {
    return _(args.workStats)
      .filter('selected')
      .map('origField')
      .value();
  }


  return function guessFields(index, fields, options = {}) {
    _.defaults(options, {
      savedSearch: null,
      takeCount: 10,
      interleaveByDatatype: true,
      showReport: true
    });
    const args = _.assign({
      index,
      stats: fields.map(field => ({
        origField: field,
        field,
        score: 0,
        notes: []
      }))
    }, options);

    // A working stats list will be filtered as needed to prevent running scores over
    // inappropriate fields. Field stats objects will remain the same, so changes to
    // work stats will be reported to the output stats list, too.
    args.workStats = args.stats;

    return Promise.resolve(args)
      .then(getRootSavedSearch)
      .then(useMultifieldsForNonAggregatables)
      .then(filterAcceptableFields)
      .then(makeFilteringQuery)
      .then(makeQueries)
      .then(filterNonStoredFields)
      .then(makeScoreHelpers)
      .then(makeScores)
      .then(sortAndPruneByScore)
      .then(interleaveByDatatype)
      .then(makeSelection)
      .then(showReport)
      .then(toResult)
      .catch(err => { err && notify.error(err); });
  };
}
