import { RelationsHelperProvider } from './relations_helper';

import selectIndexPatternsTemplate from './fingerprints_select.html';
import { allSelected } from 'ui/kibi/directives/tristate_checkbox';

import './quickrelations_modal.less';
import 'ui/kibi/styles/table_sticky.less';

import { fieldSpec, queryIsAnalyzed } from 'ui/kibi/utils/field';
import { promiseMapSeries } from 'ui/kibi/utils/promise';

import { BaseModalProvider } from 'ui/kibi/modals/base_modal';
import { ProgressMapProvider } from 'ui/kibi/modals/progress_map';
import { SavedObjectsClientProvider } from 'ui/saved_objects';
import { IndexPatternsFieldProvider } from 'ui/index_patterns/_field';

import Bluebird from 'bluebird';
import _ from 'lodash';


export function FingerprintsProvider(
  Private, indexPatterns, mappings, jdbcDatasources, dataModel,
  createNotifier) {

  const relationsHelper = Private(RelationsHelperProvider);
  const baseModal = Private(BaseModalProvider);
  const progressMap = Private(ProgressMapProvider);
  const savedObjectsClient = Private(SavedObjectsClientProvider);
  const Field = Private(IndexPatternsFieldProvider);

  const notify = createNotifier({
    location: 'Fingerprints'
  });

  // Constants

  /**
   * Current version of the generated fingerprints. If this number differs from
   * the one retrieved as saved object, clients should ignore/regenerate the
   * saved fingerprints data.
   */
  const currentVersion = 5;

  const concurrency = 50;

  const hitsCount = 100;
  const termsCount = 100;

  const fpSteps = 100;


  // Local utility functions

  function sourceDocName(fieldData) {
    return fieldData.parent ? fieldData.parent.fName : fieldData.fName;
  }


  /**
   * Build index entries for the input modal
   */
  function buildEntries(args) {
    const { indexPatterns, indicesByPattern } = args;

    args.indexEntries = indexPatterns.map(indexPattern => ({
      indexPattern,
      multiple: (indicesByPattern[indexPattern.title].length > 1),
      selected: true
    }));

    return args;
  }

  /**
   * Retrieves the maximum number of indices to use for too big multi-indices
   */
  function getMaxIndicesPerPattern(args) {
    const { indexEntries } = args;
    if (!_(indexEntries).filter('selected').some('multiple')) { return args; }

    const { indicesByPattern } = args;
    const indexPatterns = _.map(indexEntries, 'indexPattern');

    return relationsHelper.chooseMaxIndicesPerPattern(indexPatterns, indicesByPattern)
      .then(maxIndicesPerPattern => {
        args.maxIndicesPerPattern = maxIndicesPerPattern;
        return args;
      });
  }

  /**
   * Format selected entries for calculate input
   */
  function formatInputState(args) {
    const { indexEntries, maxIndicesPerPattern } = args;

    return {
      indexPatterns: _(indexEntries).filter('selected').map('indexPattern').value(),
      maxIndicesPerPattern
    };
  }

  /**
   * Retrieves the number of indexes in each index pattern
   */
  function getIndicesByPattern(args) {
    const { indexPatterns } = args;

    return relationsHelper.getIndicesByPattern(indexPatterns)
      .then(indicesByPattern => {
        args.indicesByPattern = indicesByPattern;
        return args;
      });
  }

  /**
   * Retrieves the list of virtual index patterns. It's important because some
   * categorizations are forced for virtual indices (e.g. single_value always true).
   */
  function getVirtualIndices(args) {
    const { indexPatterns, indicesByPattern } = args;

    return jdbcDatasources.listVirtualIndices()
      .then(virtualIndices => {
        args.virtualIndices = _.map(virtualIndices, '_id');
        return args;
      }, () => {
        // Couldn't retrieve the list of virtual indices. This could be due to
        // ACL, in which case it's possible ACL was miconfigured because the
        // dataset is completely ES-based.
        //
        // We're going to assume that no virtual indices exist and warn about
        // it. If some virtual indices *do* exist, we should encounter a
        // separate error later on when the single_value aggregation will be
        // used.

        notify.warning(`Couldn't retrieve the list of configured virtual indices - assuming no virtual index`);

        args.virtualIndices = [];
        return args;
      });
  }

  /**
   * Create mediator for msearch bulk requests
   */
  function makeRequestsMediator(args) {
    const { indicesByPattern, maxIndicesPerPattern } = args;

    args.mediator = relationsHelper.makeRequestsMediator(
      indicesByPattern, maxIndicesPerPattern);

    return Promise.resolve(args);
  }

  /**
   * Create a structured collection of significant indices and fields, to store
   * additional metadata throughout the procedure.
   */
  function buildDataset(args) {
    const { indexPatterns, indicesByPattern, virtualIndices } = args;

    const dataset = args.dataset = {
      indices: {},
      fieldsById: {}
    };

    for (const indexPattern of indexPatterns) {
      const iName = indexPattern.title;
      const indices = indicesByPattern[indexPattern.title];

      const indexData = dataset.indices[iName] = {
        orig: indexPattern,
        fields: {},
        save: true,
        virtual: _.intersection(indices, virtualIndices).length > 0
      };

      const isMetaField = _(indexPattern.metaFields)
        .indexBy()
        .mapValues(_.constant(true))
        .value();

      for (const field of indexPattern.fields) {
        const fName = field.name;

        const skip = !fName || isMetaField.hasOwnProperty(fName) ||
          !field.searchable || !field.aggregatable || field.scripted;

        if (skip) { continue; }

        const fieldData = {
          iName,
          fName,
          type: field.type,
          orig: field,
          id: iName + '//' + fName,
          processable: true,
          processed: false
        };

        indexData.fields[fName] = fieldData;

        dataset.fieldsById[fieldData.id] = fieldData;
      }

      relationsHelper.addMultifieldsHierarchy(indexData);
    }

    dataset.fieldsByType = _.groupBy(dataset.fieldsById, 'type');

    return args;
  }

  /**
   * Calculate 'processable' field attribute -- unprocessable fields are skipped
   */
  function filterFields(args) {
    const { dataset } = args;

    function filterField(field) {
      return queryIsAnalyzed(mappings, field.orig)
        .then(isAnalyzed => { if (isAnalyzed) { field.processable = false; } });
    };

    const stringFields = dataset.fieldsByType.string || [];

    return Bluebird.resolve(stringFields)
      .map(filterField, { concurrency })
      .then(() => args);
  }

  /**
   * Count number of documents for each index
   */
  function countDocuments(args, progress) {
    const { dataset, mediator, onError } = args;

    function makeQuery(iName) {
      return relationsHelper.msearchRequest(iName, { size: 0 }, {
        context: 'fingerprints-count-docs', iName
      });
    }

    function doChunk(reqChunk) {
      return relationsHelper.cancelableMSearch(reqChunk, progress.canceledPromise)
        .each((resp, r) => {
          const request = reqChunk[r];

          const { iName } = request;
          const indexData = dataset.indices[iName];

          if (resp.error) {
            onError(request, resp);
            indexData.save = false;
            return;
          }

          indexData.docCount = resp.hits.total;
        });
    };

    return promiseMapSeries(
      _(dataset.indices).keys().map(makeQuery).thru(mediator.array).value(), doChunk)
      .then(() => args);
  }

  /**
   * Values categorization
   */
  function regexMatcher(tag, re) {
    return { tag, match(str) { return re.test(str); } };
  }

  const stringCategories = _.map(relationsHelper.defaultRegExpStrings,
    (reStr, tag) => regexMatcher(tag, new RegExp(reStr)));

  function categorizeStringTerms(buckets) {
    if (_.isEmpty(buckets)) { return {}; }

    return _.reduce(stringCategories, function (tags, category) {
      if (_(buckets).map('key').every(category.match)) {
        tags[category.tag] = true;
      }

      return tags;
    }, {});
  }

  const categorizeTermsByType = {
    string: categorizeStringTerms
  };

  function fingerprintsBuilder(args, progress, fieldsByType, type, stepsPerField) {
    const { dataset, mediator, onError } = args;

    const isIntegerEsType = _(['long', 'integer', 'short', 'byte'])
      .indexBy().mapValues(_.constant(true)).value();

    function makeQuery(fieldData) {
      const indexData = dataset.indices[fieldData.iName];
      const sourceFieldName = sourceDocName(fieldData);

      const query = {
        bool: { must: { exists: { field: fieldData.fName } } }
      };

      const aggs = {
        terms: { terms: {
          field: fieldData.fName,
          size: termsCount,
          shard_size: termsCount,
          order: { _count: 'desc' }
        } },
        cardinality: { cardinality: {
          field: fieldData.fName,
          precision_threshold: 100
        } }
      };

      if (fieldData.type === 'string') {
        // No empty strings
        query.bool.must_not = { term: { [fieldData.fName]: '' } };
      }

      if (fieldData.type === 'number' || fieldData.type === 'date') {
        aggs.min = { min: { field: fieldData.fName } };
        aggs.max = { max: { field: fieldData.fName } };
      }

      if (!indexData.virtual) {
        aggs.value_count = { value_count: {
          field: fieldData.fName
        } };
      }

      return relationsHelper.msearchRequest(
        fieldData.iName, {
          size: hitsCount,
          _source: sourceFieldName,
          stored_fields: sourceFieldName,
          query,
          aggs
        }, {
          context: 'fingerprints-field-data-get', fieldData
        });
    }

    function processField(fieldData, result) {
      const indexData = dataset.indices[fieldData.iName];

      fieldData.processed = true;

      const { hits, aggregations: aggs } = result;
      if (!aggs) { return; }

      const { buckets } = aggs.terms;

      // Terms contains the first 10 not-empty values in string form
      fieldData.terms = _(buckets).take(10).map('key').value();

      // Cardinality
      fieldData.cardinality = aggs.cardinality.value;

      // Range (numbers and dates)
      if (buckets.length > 0 && aggs.min && aggs.max) {
        fieldData.range = [ aggs.min.value, aggs.max.value ];
      }

      // Relative estimate of frequent terms in the field
      fieldData.frequentTermsPerDoc = hits.total
        ? _.sum(buckets, 'doc_count') / hits.total
        : undefined;

      // Calculate tags
      const tags = fieldData.tags = {};

      const notFloatingPoint = fieldData.type !== 'number' || (
        isIntegerEsType[fieldData.orig.esType] &&
        _(buckets).map('key').all(Number.isInteger));
      // Checking terms due to JDBC indices sometimes representing floats as long.
      // See https://github.com/sirensolutions/siren-platform/issues/1774

      // 'unique' when all terms appear in exactly one document. We're
      // excluding floating point numbers because though they are often
      // technically unique, they shouldn't be used for relational purposes
      if (notFloatingPoint && buckets.length > 0 && buckets[0].doc_count === 1) {
        tags.unique = true;

        // NOTE: Since term buckets are first calculated in-shard and then
        // merged, it's possible to have the same term in 2 different shards
        // but not picked out of a shard for merging.
        //
        // So this value actually identifies when values are unique *in-shard*,
        // though in practice the case where this value doesn't extend to the
        // whole index pattern should be quite improbable.
      }

      // 'primary_key' when the field is 'unique' *and* has no null/empty values
      if (tags.unique && indexData.docCount === hits.total) {
        tags.primary_key = true;
      }

      // 'enum' is for when terms are few enough
      const indexDocCount = dataset.indices[fieldData.iName].docCount;

      if (1 < buckets.length && buckets.length < 30
        && buckets[0].doc_count * buckets.length >= indexDocCount
        && 3 * buckets.length < indexDocCount) {
        tags.enum = true;
      }

      // 'hash_num' is for full-range 64 bit long integers
      if (fieldData.range &&
        (fieldData.range[0] < -Number.MAX_SAFE_INTEGER ||
        Number.MAX_SAFE_INTEGER < fieldData.range[1])) {
        tags.hash_num = true;
      }

      // 'single_value' is when all values contain a single value. Fields where
      // we find some array values are also excluded (we'll take that as an
      // indication that the field could take multiple values, say on future
      // ingested documents).
      const valueCountMatchesDocsCount =
        !aggs.value_count || aggs.value_count.value === hits.total;

      const sourceFieldName = sourceDocName(fieldData);
      const noSampledArrays = !_(hits.hits)
        .map(hit => indexData.orig.flattenHit(hit)[sourceFieldName])
        .some(_.isArray);

      if (valueCountMatchesDocsCount && noSampledArrays) {
        tags.single_value = true;
      }

      // Further tags categorization specific to the field type
      const categorizeTerms = categorizeTermsByType[type];
      if (categorizeTerms) {
        _.assign(tags, categorizeTerms(buckets));
      }
    }

    function evalRequest(reqChunk) {
      return relationsHelper.cancelableMSearch(reqChunk, progress.canceledPromise)
        .each((resp, r) => {
          const request = reqChunk[r];
          const { fieldData } = request;

          if (resp.error) {
            onError(request, resp);
            dataset.indices[fieldData.iName].save = false;
            return;
          }

          processField(fieldData, resp);
        });
    }


    const typeFields = fieldsByType[type];
    const chunks = _(typeFields).map(makeQuery).thru(mediator.array).value();

    return promiseMapSeries(chunks, (reqChunk, c) => {
      if (!progress.notifyStart(
        `process ${type} fields (${c + 1}/${chunks.length})`,
        chunks[c].length * stepsPerField)) {
        return Promise.reject();
      }

      return evalRequest(reqChunk);
    })
      .then(() => args);
  }

  function buildFingerprints(args, progress) {
    let { fieldsByType } = args.dataset;

    const types = ['string', 'number', 'ip', 'date', 'boolean'];

    fieldsByType = _(fieldsByType)
      .pick(types)
      .defaults(_(types).indexBy().mapValues(() => []).value())
      .mapValues(flds => _.filter(flds, 'processable'))
      .value();

    const fieldsCount = _.sum(fieldsByType, 'length');
    if (!fieldsCount) {
      return progress.notifyStart('', fpSteps) ? args : Promise.reject();
    }

    const stepsPerField = fpSteps / fieldsCount;

    return promiseMapSeries(types, type =>
      fingerprintsBuilder(args, progress, fieldsByType, type, stepsPerField)
    ).then(() => args);
  }

  /**
   * Create the fingerprint data
   */
  function formatFingerPrints(args) {
    const { dataset } = args;
    const fingerprints = {};

    _.forEach(dataset.indices, function (index, iName) {
      const iFps = fingerprints[iName] = {};

      _.forEach(index.fields, function (field, fName) {
        if (!field.processed) { return; }

        iFps[field.fName] = {
          tags: field.tags,
          attributes: {
            cardinality: field.cardinality,
            range: field.range,
            frequentTermsPerDoc: field.frequentTermsPerDoc,
          },
          terms: field.terms ? field.terms : null
        };
      });
    });

    args.fingerprints = fingerprints;
    return args;
  }

  /**
   * Save fingerprint data
   */
  function saveFingerprints(args) {
    const { dataset, fingerprints, onError } = args;
    const pairs = _.pairs(fingerprints);

    return Bluebird.map(pairs, function saveOne([ iName, fp ]) {
      const index = dataset.indices[iName];
      if (!index.save) { return Promise.resolve(); }

      return savedObjectsClient
        .create('fingerprint', {
          version: currentVersion,
          json: JSON.stringify(fp)
        }, {
          id: iName,
          overwrite: true
        })
        .catch(error => onError('save fingerprints', { error }));
    }, { concurrency })
      .then(() => args);
  }


  // Field Metadata functions

  function getDefaultPrimaryKey(indexPattern) {
    return Promise.resolve({ indexPatterns: [ indexPattern ] })
      .then(getIndicesByPattern)
      .then(getVirtualIndices)
      .then(({ indicesByPattern, virtualIndices }) => {
        const indices = indicesByPattern[indexPattern.title];
        if (indices.length !== 1) {
          // Even if we had the same primary key field on all involved indices,
          // we couldn't know that values are unique across all indices - quit
          return;
        }

        virtualIndices = _.intersection(indices, virtualIndices);
        if (indices.length !== virtualIndices.length) {
          // We have some non-virtual index - quit
          return;
        }

        return jdbcDatasources.getVirtualIndex(indices[0])
          .then(doc => _.get(doc, '_source.key'));
      });
  }

  function makeDefaultFieldsMetadataByName(indexPattern) {
    return getDefaultPrimaryKey(indexPattern)
      .catch(err => { notify.error(err); })
      .then(primaryKey => {
        const fieldsMetadataByName = _(indexPattern.fields.raw)
          .indexBy('name')
          .mapValues(field => ({ primaryKey: false, singleValue: true }))
          .value();

        if (primaryKey) {
          const primaryKeyMeta = fieldsMetadataByName[primaryKey];

          if (primaryKeyMeta) {
            primaryKeyMeta.primaryKey = true;
          }
        }

        return fieldsMetadataByName;
      });
  }

  function fingerprintsToFieldsMetadataByName(indexPattern, fingerprintsHash) {
    return _(indexPattern.fields)
      .indexBy('name')
      .mapValues(field => {
        const fieldFps = fingerprintsHash[indexPattern.title][field.name];

        return {
          primaryKey: !!(fieldFps && fieldFps.tags.primary_key),
          singleValue: !!(fieldFps && fieldFps.tags.single_value)
        };
      })
      .value();
  }

  function applyFieldsMetadata(entity, savedSearch, fieldsMetadataByName) {
    const indexPattern = relationsHelper.ssearchIndexPattern(savedSearch);
    const { fields } = indexPattern;

    // Index pattern fields aren't mutated directly, they need to be removed
    // and recreated using the previous specs as base. See the field editor page
    // for reference.

    _.forEach(fieldsMetadataByName, (fieldMeta, fieldName) => {
      const fieldIdx = fields.findIndex(field => field.name === fieldName);
      const field = fields[fieldIdx];

      fields.splice(fieldIdx, 1,
        new Field(indexPattern, _.defaults(fieldMeta, field.$$spec)));
    });

    return indexPattern.save();

    // TODO: Once cache is implemented in the ontology, invalidate cached
    // values for search entity
  }


  //=============================================================================

  const provider = {
    currentVersion,

    /**
     * Performs preliminary checks on the input saved searches list, before formatting
     * them for consumption by the calculate() method.
     *
     * @param {SavedSearch[]} savedSearches   List of saved searches to prepare
     * @returns {Object}  Formatted input for the calculate() method
     */
    selectSavedSearches(savedSearches) {
      const indexPatterns = _.map(savedSearches, relationsHelper.ssearchIndexPattern);
      return provider.selectIndexPatterns(indexPatterns);
    },

    /**
     * Performs preliminary checks on the input index patterns list, before formatting
     * them for consumption by the calculate() method.
     *
     * @param {IndexPattern[]} indexPatterns  List of index patterns to prepare
     * @returns {Object}  Formatted input for the calculate() method
     */
    selectIndexPatterns(indexPatterns) {
      return Promise.resolve({ indexPatterns })
        .then(getIndicesByPattern)
        .then(buildEntries)
        .then(getMaxIndicesPerPattern)
        .then(formatInputState);
    },

    /**
     * @typedef {Object} FieldFingerprints
     * @property {Object} tags
     *    Notable features of the field, keyed in a hash with true values
     * @property {Object} attributes
     *    Calculated metrics of the field
     * @property {Number} attributes.cardinality
     *    Count of unique values of the field
     * @property {Number[2]} attributes.range
     *    Minimum and maximum values of a numeric field
     * @property {Number} attributes.frequentTermsPerDoc
     *    Relative estimate of frequent terms in the field
     * @property {Array} terms
     *    Sample of terms (the most frequent) extracted from the field
     */

    /**
     * Calculates fingerprints metadata for an input list of index patterns.
     * @param {Object}  args                Input parameters as named object properties
     * @param {Array} args.indexPatterns
     *    Index patterns to calculate fingerprints for
     *
     * @param {Number} [args.maxIndicesPerPattern]
     *    Specifies a maximum number of indices to analyze for multi-index patterns.
     *
     * @param {Function} [args.onError]
     *    Handler for eventual network errors, receiving the failed request and
     *    associated server response. Default is throwing the response's error property,
     *    aborting the procedure.
     *
     * @returns {Object}
     *    Generated fingerprints document for the specified index patterns. The
     *    document is also saved/merged as saved object on ES. The document is
     *    a nested hash keyed first by index pattern string and then by field
     *    name.
     *
     * @see FieldFingerprints
     */
    calculate(args = {}) {
      _.defaults(args, {
        indexPatterns: [],
        onError: (req, resp) => { throw resp.error; }
      });

      return Promise.resolve()
        .then(() => progressMap([
          { fn: getIndicesByPattern,                step: 'get indices' },
          { fn: getVirtualIndices,                  step: 'get virtual indices' },
          { fn: makeRequestsMediator,               step: 'init requests mediator' },
          { fn: buildDataset,                       step: 'build dataset' },
          { fn: filterFields,                       step: 'filter fields' },
          { fn: countDocuments,                     step: 'count documents' },
          { fn: buildFingerprints,                  step: fpSteps }
        ], {
          title: 'Fingerprints Generation - In Progress...',
          valueMap: (op, o, progress) => op.fn(args, progress),
          stepMap: 'step',
          nestedIn: args.nestedIn
        }))
        .then(() => args)
        .then(formatFingerPrints)
        .then(saveFingerprints)
        .then(() => args.fingerprints);
    },

    /**
     * Prepares default field metadata values for the specified saved search,
     * without analyzing fields.
     *
     * @param {Entity} entity
     *    Ontology entity whose field metadata have to be defaulted
     * @param {SavedSearch} savedSearch
     *    Saved search whose field metadata have to be defaulted
     * @returns {Promise}
     *    Promise to completion, resolves to input saved search
     */
    applyDefaultFieldMetadata(entity, savedSearch) {
      const indexPattern = relationsHelper.ssearchIndexPattern(savedSearch);

      return makeDefaultFieldsMetadataByName(indexPattern)
        .then(fieldsMetadataByName =>
          applyFieldsMetadata(entity, savedSearch, fieldsMetadataByName));
    },

    /**
     * Reads calculated fingerprints and applies them to field metadata (in practice
     * copying just the 'primary key' and 'single value' properties).
     *
     * @param {Entity} entity
     *    Ontology entity fields metadata will be applied to
     * @param {SavedSearch} savedSearch
     *    Saved Search fields metadata will be applied to
     * @param {Object} fingerprintsHash
     *    Fingerprints in a nested hash (by index pattern string and field name)
     * @returns {Promise}
     *    Promise to completion, no resolved value
     */
    applyToFieldMetadata(entity, savedSearch, fingerprintsHash) {
      const indexPattern = relationsHelper.ssearchIndexPattern(savedSearch);

      const fieldsMetadataByName =
        fingerprintsToFieldsMetadataByName(indexPattern, fingerprintsHash);

      return applyFieldsMetadata(entity, savedSearch, fieldsMetadataByName);
    },

    /**
     * Deletes fingerprints associated to the specified index pattern string
     *
     * @param {String} indexPatternString
     *    Index pattern string whose associated fingerprints are to be removed
     *
     * @returns {Promise}
     *    Promise to completion, no resolved value
     */
    delete(indexPatternString) {
      return savedObjectsClient.delete(
        'fingerprint', 'fingerprint:' + indexPatternString);
    },

    _notify: notify,        // For stubbing purposes
  };

  return provider;
}

