import { OntologyWrapperProvider } from './ontology_wrapper';
import { FingerprintsProvider } from './fingerprints';
import { RelationsHelperProvider } from './relations_helper';
import { ReportProvider } from './findrelations_report.js';

import welcomeTemplate from './findrelations_welcome.html';
import fingerprintsTemplate from './findrelations_fingerprints.html';
import selectIndexPatternsTemplate from './findrelations_select.html';
import findRelationsReportTemplate from './findrelations_report.html';
import findRelationsProgressTemplate from './findrelations_progress.html';

import './quickrelations_modal.less';
import './layout.less';
import 'ui/kibi/styles/table_sticky.less';

import { allSelected } from 'ui/kibi/directives/tristate_checkbox';
import { sortContext, sortSequence } from 'ui/kibi/directives/sort_icon';

import { queryIsAnalyzed, queryEsType } from 'ui/kibi/utils/field';
import { promiseMapSeries } from 'ui/kibi/utils/promise';
import { EntityType } from 'ui/kibi/components/ontology/entity_type';

import { ProgressMapProvider } from 'ui/kibi/modals/progress_map';
import { BaseModalProvider } from 'ui/kibi/modals/base_modal';
import { MessageBoxProvider } from 'ui/kibi/modals/message_box';
import { SavedObjectsClientProvider } from 'ui/saved_objects';

import { CrudType } from 'plugins/investigate_core/management/sections/data_model/services/crud_type';
import { DataModelPermissionsProvider } from 'plugins/investigate_core/management/sections/data_model/services/data_model_permissions';

import Bluebird from 'bluebird';
import _ from 'lodash';


export function FindRelationsProvider(
  Private, ontologyModel, dataModel, savedSearches, mappings, savedEids) {

  const wrappedDataModel = Private(OntologyWrapperProvider)
    .forDataModel(dataModel);

  const fingerprints = Private(FingerprintsProvider);
  const relationsHelper = Private(RelationsHelperProvider);
  const progressMap = Private(ProgressMapProvider);
  const baseModal = Private(BaseModalProvider);
  const messageBox = Private(MessageBoxProvider);
  const report = Private(ReportProvider);
  const savedObjectsClient = Private(SavedObjectsClientProvider);


  // Constants

  const inputDocType = 'mixed-state-docs';
  const inputDocId = 'findrelations-input-state';

  const suitableTypes = ['string', 'number', 'ip'];
  const suitableEsTypes = [
    'string', 'keyword', 'long', 'integer', 'short', 'ip'
  ];

  const minKeyTermsCount = 10;
  const minSrcTermsCount = 10;
  const eidMinTermsCount = 5;
  const eidMinTermDocsCount = 5;

  const maxFingerprintsSteps = 100;
  const eidRegexRelationSteps = 10;
  const directRelationSteps = 100;
  const eidRelationSteps = 50;

  const concurrency = 50;

  const maxShownErrors = 10;

  const parametersPresets = {
    // See the input selection html for explanation of each parameter
    strict: {
      eidRegexTermsCount: 200,
      accurateMatchTermsCount: 300,
      accurateMatchRequiredSuccessPercent: 95,
      accurateMatchPreference: 10,
      samplesMatchTermsCount: 10000,
      samplesMatchRequiredSuccessPercent: 75
    },
    balanced: {
      eidRegexTermsCount: 100,
      accurateMatchTermsCount: 200,
      accurateMatchRequiredSuccessPercent: 80,
      accurateMatchPreference: 20,
      samplesMatchTermsCount: 10000,
      samplesMatchRequiredSuccessPercent: 60
    },
    loose: {
      eidRegexTermsCount: 100,
      accurateMatchTermsCount: 100,
      accurateMatchRequiredSuccessPercent: 60,
      accurateMatchPreference: 40,
      samplesMatchTermsCount: 10000,
      samplesMatchRequiredSuccessPercent: 50
    },
    regexesOnly: {
      eidRegexTermsCount: 100,
      accurateMatchTermsCount: 0,
      accurateMatchRequiredSuccessPercent: 80,
      accurateMatchPreference: 20,
      samplesMatchTermsCount: 0,
      samplesMatchRequiredSuccessPercent: 60
    }
  };

  const advancedParametersDefaults = parametersPresets.balanced;

  const entityRegexesInputDefault = _.map([{
    entity: 'URI',
    regexStr: relationsHelper.defaultRegExpStrings.uri,
  }, {
    entity: 'IP_STRING',
    regexStr: relationsHelper.defaultRegExpStrings.ip,
  }, {
    entity: 'EMAIL',
    regexStr: relationsHelper.defaultRegExpStrings.email
  }], eReg => _.assign(eReg, { context: 'terms', selected: true }));


  // Local utility functions

  const {
    pairHash, pairText, fieldHash, fieldFingerprint,
    ssearchIndexPattern
  } = relationsHelper;

  function linkHash(link) {
    return pairHash(link.target.eid || link.target.id, link.source.id);
  }

  function stringToRegex(string) {
    try { return new RegExp(string, 'i'); }       // Case insensitive
    catch (err) { return null; }
  }


  function paddedString(stringsWithPadding, spacing = 3) {
    spacing = _.repeat(' ', spacing);

    return _.reduce(stringsWithPadding, function (memo, padStr) {
      return _.padRight(memo + (memo && spacing), padStr[0]) + padStr[1];
    }, '');
  }

  function linksToLog(links, args, opts = {}) {
    _.defaults(opts, { scoreText: null, scorePadding: 0, arrow: '==>' });
    const { scoreText, scorePadding, arrow } = opts;

    const padding = [0, 40, 46].map(padding => padding + scorePadding);
    const withScore = scoreText
      ? ((padStrings, link) => [ [0, scoreText(link)] ].concat(padStrings))
      : _.identity;

    function linkToLog(link) {
      const dstText = link.target.eid || link.target.text;

      return paddedString(withScore([
        [0, dstText],
        [40, arrow],
        [46, link.source.text]
      ], link));
    }

    return links.length ? _.map(links, linkToLog) : ['No relations'];
  }

  function noteLinks(links) {
    links.forEach(link => {
      const { source, target } = link;
      let relType;

      // NOTE: source/target are switched in the UI
      if (target.eid) {
        relType = 'EID';
      } else {
        relType = 'Direct';
        target.notes.push('Direct relation source');
      }

      source.notes.push(`${relType} relation target`);
    });
  }

  function storeError(args, errorsList, progress) {
    return function (request, response) {
      request = _.pick(request, 'body', 'context');

      const error = { request, response };
      errorsList.push(error);

      const errorsCount = _.sum(args.log, section =>
        section.errors ? section.errors.length : 0);

      args.errorsCount = progress.errorsCount = errorsCount;
      args.onError(error);
    };
  }


  // Select Input functions

  /**
   * Retrieve the list of all available saved searches
   */
  function getAllSavedSearches(args) {
    return Bluebird.resolve(ontologyModel.getEntityList())
      .filter(entity => entity.type === EntityType.SAVED_SEARCH && !entity.parentId)
      .map(entity => savedSearches.get(entity.id)
        // Inaccessible searches resolve to undefined
        .catch(_.noop))
      .then(allSavedSearches => {
        args.allSavedSearches = _.filter(allSavedSearches);
        return args;
      });
  }

  /**
   * Retrieve the list of saved searches we can actually work on, checking the
   * associated index patterns and making sure there are no duplicates
   */
  function getCandidateSavedSearches(args) {
    return getAllSavedSearches(args)
      .then(() => {
        // This makes sure that the *actual* pattern strings associated to each
        // search are unique, discarding eventual duplicates.
        //
        // It's probably unnecessary at this point, since duplicate index
        // pattern strings are forbidden with error message in the GUI - I'm keeping
        // this just to be on the safe side anyway.
        args.savedSearches = _(args.allSavedSearches)
          .indexBy(ssearch => ssearchIndexPattern(ssearch).title)
          .map(_.identity)
          .value();

        args.indexPatterns = _.map(args.savedSearches, ssearchIndexPattern);

        return args;
      });
  }

  /**
   * Load the state of the input modal as saved permanently in ES
   */
  function loadInputState(args) {
    args.inputState = {};

    // Errors at this point are ignored, since they are non-recoverable -
    // in case, we'll proceed with a clean state

    const docId = inputDocType + ':' + inputDocId;

    return savedObjectsClient.get(inputDocType, docId)     // Can fail with 404
      .then(doc => JSON.parse(doc.attributes.json))        // Can throw if bad doc
      .catch(_.noop)                                       // Fallback to empty doc
      .then(inputState => {
        args.inputState = _.assign({
          searchEntries: [],
          advancedParameters: {},
          eidRegexState: {}
        }, inputState);

        return args;
      });
  }

  /**
   * Prepares saved searches for display in the input modal
   */
  function hydrateSearchEntries(args) {
    const { savedSearches, inputState, indicesByPattern } = args;
    const { searchEntries } = inputState;

    const searchEntriesById = _.indexBy(searchEntries, 'searchId');

    inputState.searchEntries =  _(savedSearches)
      .map(savedSearch => _.assign({
        searchId: savedSearch.id,
        savedSearch,
        multiple: (indicesByPattern[ssearchIndexPattern(savedSearch).title].length > 1),
        selected: (savedSearch.title !== '*')
      }, searchEntriesById[savedSearch.id]))
      .sortBy('savedSearch.title')
      .value();
  }

  /**
   * Prepares advanced parameters for display in the input modal
   */
  function hydrateMatchParameters(args) {
    const { inputState } = args;
    const { advancedParameters } = inputState;

    function resetDefaults() {
      return messageBox({
        title: 'Reset Defaults',
        content: 'This will reset all parameters to their default state. Are you sure?',
        buttons: ['Cancel', 'Reset']
      })
        .show()
        .then(confirmed => {
          if (!confirmed) { return; }
          _.assign(this.data, advancedParametersDefaults);
        });
    }

    inputState.advancedParameters = _(inputState.advancedParameters)
      .pick(_.keys(advancedParametersDefaults))
      .defaults(advancedParametersDefaults)
      .value();

    const jsonPresets = _.mapValues(parametersPresets, JSON.stringify);
    const jsonPresetsInverse = _.invert(jsonPresets);

    inputState.advancedParamsState = {
      get data() { return inputState.advancedParameters; },

      get jsonPreset() {
        return jsonPresetsInverse[JSON.stringify(this.data)] || 'custom';
      },
      set jsonPreset(value) {
        _.assign(this.data, JSON.parse(jsonPresets[value]));
      },

      resetDefaults
    };
  }

  /**
   * Prepares EID regexes for display in the input modal
   */
  function hydrateEidRegexState(args) {
    const { inputState } = args;
    let { eidRegexState } = inputState;

    inputState.eidRegexState = eidRegexState = _.assign({
      entityRegexes: _.cloneDeep(entityRegexesInputDefault),
      testString: '',

      newRegex() {
        const entityRegex = {
          entity: 'New Entity',
          context: 'terms',
          regexStr: '^$',
          selected: true
        };

        this.updateRegex(entityRegex);
        this.entityRegexes.push(entityRegex);
      },

      deleteRegex(entityRegex) {
        const idx = this.entityRegexes.indexOf(entityRegex);
        if (idx < 0) { return; }

        this.entityRegexes.splice(idx, 1);
      },

      testRegex(entityRegex) {
        const { regex } = entityRegex;

        if (!regex) {
          entityRegex.testOk = 'error';
          return;
        }

        entityRegex.testOk = regex.test(this.testString) ? 'match' : 'nomatch';
      },

      updateTests() {
        this.entityRegexes.forEach(entityRegex => this.testRegex(entityRegex));
      },

      updateRegex(entityRegex) {
        entityRegex.regex = stringToRegex(entityRegex.regexStr);
        this.testRegex(entityRegex);
      },

      updateRegexes() {
        this.entityRegexes.forEach(er => this.updateRegex(er));
      },

      resetDefaults() {
        return messageBox({
          title: 'Reset Defaults',
          content: 'This will replace the patterns with the default list. Are you sure?',
          buttons: ['Cancel', 'Reset']
        })
          .show()
          .then(confirmed => {
            if (!confirmed) { return; }

            this.entityRegexes = _.cloneDeep(entityRegexesInputDefault);
            this.updateRegexes();
          });
      }
    }, eidRegexState);

    eidRegexState.updateRegexes();
  }

  /**
   * Prepares input state for display in the input modal
   */
  function hydrateInputState(args) {
    hydrateSearchEntries(args);
    hydrateMatchParameters(args);
    hydrateEidRegexState(args);

    return args;
  }

  /**
   * Deselect saved search entries in current input state, to be saved later on
   */
  function updateInputState(args) {
    const { inputState } = args;

    inputState.searchEntries.forEach(searchEntry => {
      searchEntry.selected = false;
    });

    return args;
  }

  /**
   * Saves current input state in the ES dot-index
   */
  function saveInputState(args) {
    const { inputState } = args;
    const { searchEntries, eidRegexState, advancedParameters } = inputState;

    const inputDoc = {
      json: JSON.stringify({
        searchEntries: searchEntries.map(searchEntry => _.pick(searchEntry, [
          'searchId', 'selected'
        ])),

        eidRegexState: {
          entityRegexes: eidRegexState.entityRegexes.map(entRegex => _.pick(entRegex, [
            'entity', 'context', 'regexStr', 'selected'
          ])),
          testString: eidRegexState.testString,
        },

        advancedParameters
      })
    };

    return savedObjectsClient.create(inputDocType, inputDoc, {
      id: inputDocId,
      overwrite: true
    })
      .catch(_.noop)
      .then(() => args);
  }

  /**
   * Spawns the initial welcome message for the AutoRelations procedure
   */
  function showWelcome(args) {
    return messageBox({
      class: 'kibi-findrel-welcome',
      title: '[BETA] Relations Auto-Discovery Wizard',
      content: welcomeTemplate,
      buttons: ['Got It']
    })
      .show()
      .then(() => args);
  }

  /**
   * Spawns the main input selection modal, at first in a 'loading' state
   */
  function showSelectInputModal(args) {
    const modal = baseModal(selectIndexPatternsTemplate, {
      tabs: { indices: true, eidRegexes: false, advancedParameters: false },
      ready: false
    });

    const selectModalShow = modal.show();

    args.selectModal = modal;
    args.selectModalShow = selectModalShow;

    return args;
  }

  /**
   * Transitions the main input modal from the 'loading' state to the 'ready' state
   */
  function readySelectInputModal(args) {
    const { selectModal, selectModalShow, inputState } = args;
    const { searchEntries, eidRegexState } = inputState;

    _.assign(selectModal.scope, {
      ready: true,
      overlayClass: 'kibi-findrel-overlay',
      hasMultiples: _.some(searchEntries, 'multiple'),
      allSearchesSelected: allSelected(searchEntries),
      allEntityRegexesSelected: allSelected(() => eidRegexState.entityRegexes)
    }, inputState);

    return selectModalShow
      .then(ok => {
        args.modalOk = ok;
        return args;
      });
  }

  /**
   * If some saved search includes multiple indexes (e.g. logstash-*), shows a warning
   * that this procedure is potentially slow. The modal lets users choose between a
   * 'limited' mode, restricted to only a few indices, or the 'full' mode, which uses
   * all indexes.
   */
  function getMaxIndicesPerPattern(args) {
    if (!args.modalOk) { return args; }

    const { searchEntries } = args.inputState;
    if (!_(searchEntries).filter('selected').some('multiple')) { return args; }

    const { indicesByPattern } = args;
    const indexPatterns = _.map(searchEntries,
      sEntry => ssearchIndexPattern(sEntry.savedSearch));

    return relationsHelper.chooseMaxIndicesPerPattern(indexPatterns, indicesByPattern)
      .then(maxIndicesPerPattern => {
        args.maxIndicesPerPattern = maxIndicesPerPattern;
        return args;
      });
  }

  /**
   * Transforms input state to an object that can be supplied to the main
   * calculate() method
   */
  function formatInputState(args) {
    const { inputState, modalOk, maxIndicesPerPattern } = args;
    const { searchEntries, eidRegexState, advancedParameters } = inputState;

    if (!modalOk) { return Promise.reject(); }

    const savedSearches = _(searchEntries)
      .filter('selected')
      .map('savedSearch')
      .value();

    const entityRegexes = _(eidRegexState.entityRegexes)
      .filter('selected')
      .filter('regexStr')
      .map(entityRegex => _.pick(entityRegex, ['entity', 'context', 'regex']))
      .value();

    return savedSearches.length
      ? { savedSearches, entityRegexes, advancedParameters, maxIndicesPerPattern }
      : Promise.reject();
  }


  // Main AutoRelations calculate() functions

  /**
   * Initializes index patterns list associated to the specified saved
   * searches, plus auxiliary objects/functions about them
   */
  function getIndexPatterns(args) {
    const { savedSearches } = args;

    const indexPatterns = _.map(savedSearches, ssearchIndexPattern);
    const savedSearchesByIndexTitle = _.indexBy(savedSearches,
      ssearch => ssearchIndexPattern(ssearch).title);

    _.assign(args, {
      indexPatterns,
      savedSearchesByIndexTitle,

      fieldText(field) {
        const ssearch = savedSearchesByIndexTitle[field.indexPattern.title];
        return pairText(ssearch.title, field.displayName);
      },

      indexPatternToSavedSearchTitle(indexPattern) {
        return savedSearchesByIndexTitle[indexPattern.title].title;
      }
    });

    args.log.push({
      name: 'Input Data',
      content: [
        'Saved Searches:',
        args.indexPatterns.map(indexPattern => paddedString([
          [0, savedSearchesByIndexTitle[indexPattern.title].title],
          [30, `(index pattern: ${indexPattern.title})`]
        ]))
      ]
    });

    return args;
  }

  /**
   * Builds a map of the number of indexes in each index pattern
   */
  function getIndicesByPattern(args) {
    const { indexPatterns } = args;

    return relationsHelper.getIndicesByPattern(indexPatterns)
      .then(indicesByPattern => {
        args.indicesByPattern = indicesByPattern;
        return args;
      });
  }

  /**
   * Create a requests mediator for msearch bulk requests. The mediator is responsible
   * for ensuring that shard requests never exceed the specified msearch limit number,
   * in particular when an index-pattern has multiple indexes inside.
   */
  function makeRequestsMediator(args) {
    const { indicesByPattern, maxIndicesPerPattern, log } = args;

    const mode = (maxIndicesPerPattern !== undefined)
      ? 'limited' : 'full';

    args.mediator = relationsHelper.makeRequestsMediator(
      indicesByPattern, maxIndicesPerPattern);

    log[0].content = log[0].content.concat([
      'Multi-Index mode:',
      [ `${mode} (max indices per pattern is ${maxIndicesPerPattern})` ]
    ]);

    return Promise.resolve(args);
  }

  /**
   * Compiles indices and fields into a hydrated object structure with additional
   * metadata used throughout the procedure (e.g. notes on processed fields)
   */
  function buildDataset(args) {
    const { indexPatterns, indicesByPattern, fieldText } = args;

    const dataset = args.dataset = {
      indices: {},
      fieldsById: {},

      fieldToData(field) {
        return dataset.fieldsById[fieldHash(field)];
      }
    };

    for (let i = 0; i < indexPatterns.length; i++) {
      const ip = indexPatterns[i];
      const iName = ip.title;

      const index = dataset.indices[iName] = {
        orig: ip,
        fields: {},
        multiIndex: (indicesByPattern[iName].length > 1)
      };

      for (let j = 0; j < ip.fields.length; j++) {
        const origField = ip.fields[j];
        const fName = origField.name;

        const field = {
          iName,
          fName,
          type: origField.type,
          orig: origField,
          id: fieldHash(origField),
          text: fieldText(origField),
          processable: true,
          retired: false,
          notes: [],
          maxEidMatchQuotient: -1
        };

        index.fields[fName] = field;

        dataset.fieldsById[field.id] = field;
      }

      relationsHelper.addMultifieldsHierarchy(index);
    }

    dataset.fieldsByType = _.groupBy(dataset.fieldsById, 'type');

    return args;
  }

  /**
   * Loads fingerprints metadata about each field. Fingerprints are useful
   * because they cache a snapshot of fields at a given time, that we can avoid
   * calculating again.
   */
  function fingerprintsSteps(args) {
    return args.fingerprintsSkipped ? 0 : maxFingerprintsSteps;
  }

  function loadFingerPrints(args, progress) {
    const { indexPatterns, interactive, maxIndicesPerPattern, log } = args;

    const fps = args.fps = {};

    return Bluebird.resolve(indexPatterns)
      .map(indexPattern => savedObjectsClient
        .get('fingerprint', 'fingerprint:' + indexPattern.title)
        .then(doc => {
          if (doc.attributes.version !== fingerprints.currentVersion) {
            return Promise.reject();
          }

          fps[indexPattern.title] = JSON.parse(doc.attributes.json);
        })
        .catch(() => indexPattern),
      { concurrency })
      .then(_.compact)
      .then(failedIndexPatterns => {
        const logContent = [];
        const errors = [];

        log.push({
          name: 'Fingerprints',
          content: logContent,
          errors
        });

        if (!failedIndexPatterns.length) {
          args.fingerprintsSkipped = true;
          progress.updateMax();

          logContent.push('All fingerprints were found and loaded');
          return {};
        }

        const message = interactive && messageBox({
          class: 'kibi-findrel-fingerprints-explanation',
          title: '[BETA] Relations Auto-Discovery Wizard - Fingerprints',
          content: fingerprintsTemplate
        }).show();


        logContent.push('Rebuilt fingerprints:');
        logContent.push(failedIndexPatterns.map(args.indexPatternToSavedSearchTitle));

        return Promise.resolve(message)
          .then(() => fingerprints.calculate({
            indexPatterns: failedIndexPatterns,
            maxIndicesPerPattern,
            onError: storeError(args, errors, progress),
            nestedIn: {
              progress,
              prefix: 'fingerprints - ',
              steps: maxFingerprintsSteps
            }
          }));
      })
      .then(moreFps => _.assign(fps, moreFps))
      .then(() => args);
  }

  //=============================================================================

  function queryMappings(args) {
    const { dataset } = args;
    const fieldsData = _.values(dataset.fieldsById);

    return Bluebird.map(fieldsData, fieldData =>
      Bluebird.all([
        queryEsType(mappings, fieldData.orig),
        fieldData.orig.type === 'string'
          ? queryIsAnalyzed(mappings, fieldData.orig)
          : Promise.resolve(false)
      ])
        .then(([ esType, analyzed ]) => {
          fieldData.esType = esType;
          fieldData.analyzed = analyzed;
        }), { concurrency })
      .then(() => args);
  }

  function logUnprocessable(fieldData, unprocessableReason, loggedFields) {
    fieldData.notes.push(unprocessableReason);
    loggedFields.push(paddedString([
      [0, fieldData.fName],
      [30, unprocessableReason]
    ]));
  }

  function resolveProcessableMultifields(indexData, loggedFields) {
    // Unprocessable fields with multi-field children may have the first processable
    // multi-field substitute the parent

    const unprocessableReason = 'multi-field duplicate';

    _.chain(indexData.fieldsByMultifieldStatus.parent)
      .map(fieldData => [ fieldData ].concat(fieldData.multifields))
      .forEach(fieldsData => {
        const processableIdx = _.findIndex(fieldsData, 'processable');

        for (let f = processableIdx + 1; f < fieldsData.length; ++f) {
          const fieldData = fieldsData[f];

          fieldData.processable = false;
          logUnprocessable(fieldData, unprocessableReason, loggedFields);
        }
      })
      .commit();
  }

  function calculateProcessable(args) {
    const { dataset, savedSearchesByIndexTitle, log, fieldNotes } = args;
    const { indices } = dataset;

    const logContent = [];

    log.push({
      name: 'Ignored Fields',
      content: logContent
    });

    function checkUnprocessable(fieldData, metaFields) {
      if (!fieldData.orig.searchable) { return 'not searchable'; }
      if (!fieldData.orig.aggregatable) { return 'not aggregatable'; }
      if (fieldData.orig.scripted) { return 'scripted'; }
      if (fieldData.analyzed) { return 'analyzed strings'; }
      if (metaFields[fieldData.fName] === true) { return 'meta-field'; }
      if (suitableTypes.indexOf(fieldData.orig.type) < 0) {
        return `${fieldData.orig.type} - unsuitable type`;
      }
      if (suitableEsTypes.indexOf(fieldData.esType) < 0) {
        return `${fieldData.esType} - unsuitable ES type`;
      }
    }

    _.forEach(indices, indexData => {
      const indexPattern = indexData.orig;
      const fieldsData = indexData.fields;

      const loggedFields = [];

      logContent.push(`${args.indexPatternToSavedSearchTitle(indexPattern)}:`);
      logContent.push(loggedFields);

      const metaFields = _(indexPattern.metaFields)
        .indexBy()
        .mapValues(_.constant(true))
        .value();

      _.forEach(fieldsData, fieldData => {
        const unprocessableReason = checkUnprocessable(fieldData, metaFields);
        fieldData.processable = fieldData.processable && !unprocessableReason;

        if (unprocessableReason) {
          logUnprocessable(fieldData, unprocessableReason, loggedFields);
        }
      });

      resolveProcessableMultifields(indexData, loggedFields);
    });

    return args;
  }

  /**
   * Analyzes fields to decide whether it's possible to operate on them
   */
  function filterFields(args) {
    return Promise.resolve(args)
      .then(queryMappings)
      .then(calculateProcessable);
  }

  //=============================================================================

  function fieldCandidate(field) {
    return field.processable && !field.retired;
  }

  function retireLinkEndpoints(links) {
    _.forEach(links, link => {
      link.source.retired = true;
      if (!link.target.eid) { link.target.retired = true; }
    });
  }

  /**
   * Matches fields to EIDs using regular expressions.
   *
   * Regular expressions are specified in the input configuration, and are
   * associated to an EID name. The regex can be used to match a field's
   * content (its document values) or the field name.
   */
  function buildEidRegexRelations(args, progress) {
    const { advancedParameters, fps, dataset, entityRegexes, mediator } = args;
    const { eidRegexTermsCount } = advancedParameters;
    const { fieldsById } = dataset;

    let fieldNameLinks = [];
    const termsLinks = [];

    const entityRegexesByType = _(entityRegexes)
      .groupBy('context')
      .defaults({ fieldName: [], terms: [] })
      .value();

    const errors = [];
    const onError = storeError(args, errors, progress);


    function getMatchingEntityRegex(testStrings, eRegexes) {
      return _.find(eRegexes, entityRegex => _.all(testStrings,
        testString => _.trim(testString) === '' || entityRegex.regex.test(testString)));
    }

    function makeLink(field, entityRegex) {
      const eid = entityRegex.entity;
      field.notes.push(`Matched '${eid}' EID regex`);

      return { target: { eid }, source: field };
    }

    function extractLinksWithMatchingFieldName(fields) {
      fields = _.filter(fields, field => {
        const entityRegex = getMatchingEntityRegex(
          [ field.fName ], entityRegexesByType.fieldName);

        if (!entityRegex) { return true; }

        fieldNameLinks.push(makeLink(field, entityRegex));
        return false;
      });

      fieldNameLinks = relationsHelper.splitLinkEidsByType(fieldNameLinks);
      return fields;
    }

    function hasFingerprintsMatch(field) {
      const fingerprint = fieldFingerprint(fps, field.orig);
      const terms = _.get(fingerprint, 'terms', []);

      return getMatchingEntityRegex(terms, entityRegexesByType.terms);
    }

    function toTermsRequests(field) {
      return relationsHelper.termsRequest(field.orig, eidRegexTermsCount, {
        context: 'eid-regex-terms-get', field
      });
    }

    function evalRequest(reqChunk) {
      return relationsHelper.cancelableMSearch(reqChunk, progress.canceledPromise)
        .each((resp, r) => {
          const request = reqChunk[r];

          if (resp.error) {
            onError(request, resp);
            return;
          }

          const { field } = request;

          const buckets = _.get(resp, 'aggregations.terms.buckets', []);
          const terms = _.map(buckets, 'key');

          if (!terms.length) { return; }

          const entityRegex = getMatchingEntityRegex(terms, entityRegexesByType.terms);
          if (!entityRegex) { return; }

          termsLinks.push(makeLink(field, entityRegex));
        });
    }

    function toEntityRegexLog(entityRegex) {
      return paddedString([
        [ 0, entityRegex.entity ],
        [ 30, entityRegex.regex.source.replace(/\\\//g, '\/') ]
      ]);
    }

    function initLog(args) {
      const { entityRegexes, log } = args;

      const logFieldNameLinks = [];
      const logTermLinks = [];

      let logContent = entityRegexes.length ? [
        'Fields are first tested against explicit patterns set in the "EID Regular' +
        '\nExpressions" tab.'
      ] : [
        'No regular expressions were specified, this section was skipped.'
      ];

      if (entityRegexesByType.fieldName.length) {
        logContent = logContent.concat([
          '\nPattern entries set to match a field name context can be tested directly.',

          '\nPatterns with "Field Name" context:',
          entityRegexesByType.fieldName.map(toEntityRegexLog),

          'The following relations were found:',
          logFieldNameLinks
        ]);
      }

      if (entityRegexesByType.terms.length) {
        logContent = logContent.concat([
          '\nFields are tested against patterns with the "Terms" context by extracting' +
          `\n${eidRegexTermsCount}` +
          ' terms and verifying that *all* of them match any of the specified' +
          '\nregular expressions.',

          '\nPatterns with "Terms" context:',
          entityRegexesByType.terms.map(toEntityRegexLog),

          'The following relations were found:',
          logTermLinks
        ]);
      }

      const logSection = {
        name: 'Customizable Pattern Matching',
        content: logContent,
        logFieldNameLinks,
        logTermLinks,
        errors
      };

      log.push(logSection);
      return logSection;
    }


    const fields = _(fieldsById)
      .filter(fieldCandidate)
      .thru(extractLinksWithMatchingFieldName)
      .filter(field => field.orig.type === 'string')
      .filter(hasFingerprintsMatch)
      .value();

    const termsRequestChunks = _(fields)
      .map(toTermsRequests)
      .thru(mediator.array)
      .value();

    const logSection = initLog(args);
    const stepsPerField = eidRegexRelationSteps / fields.length;


    return promiseMapSeries(termsRequestChunks, (reqChunk, c) => {
      const msg = `matching EID regexes (${c + 1}/${termsRequestChunks.length})`;

      return progress.notifyStart(msg, reqChunk.length * stepsPerField)
        ? evalRequest(reqChunk) : Promise.reject();
    })
      .then(() => {
        const { logFieldNameLinks, logTermLinks } = logSection;

        Array.prototype.push.apply(logFieldNameLinks, linksToLog(fieldNameLinks, args));
        Array.prototype.push.apply(logTermLinks, linksToLog(termsLinks, args));

        const links = fieldNameLinks.concat(termsLinks);
        retireLinkEndpoints(links);

        args.links = args.links.concat(links);
      });
  }

  //=============================================================================

  function eidFromFieldFactory(args) {
    const { log } = args;

    const logFields = [];

    log.push({
      name: 'Fixed Pattern Matching',
      content: [
        'The following fields were associated to an EID because of predefined, fixed' +
        '\npattern matching:',
        logFields.length ? logFields : ['No relations']
      ]
    });

    const reasons = {
      IP: "Field type is 'ip'"
    };

    function check(field, fingerprint) {
      // Eventual pattern matching on individual fields should go here
      if (field.type === 'ip') { return 'IP'; }
    }

    return function eidFromField(field, fingerprint, notes) {
      const eid = check(field, fingerprint);
      if (!eid) { return; }

      const reason = reasons[eid];

      logFields.push(paddedString([
        [0, args.fieldText(field)],
        [30, eid],
        [40, reason]
      ]));

      notes.push(reason);
      return eid;
    };
  }

  /**
   * Associates EIDs to some fields based on fixed, predetermined rules
   */
  function buildFixedPatternRelations(args) {
    // In current implementation, we're just identifying fields with the 'ip' ES type,
    // to be associated to the 'IP' Entity Identifier.

    const { fps, dataset } = args;
    const { fieldsById } = dataset;

    const eidFromField = eidFromFieldFactory(args);

    const links = _(fieldsById)
      .filter(fieldCandidate)
      .map(function makeRelFromField(fld) {
        const field = fld.orig;
        const fingerprint = fieldFingerprint(fps, field);

        const eid = eidFromField(field, fingerprint, fld.notes);
        return eid && { source: fld, target: { eid } };
      })
      .compact()
      .value();

    retireLinkEndpoints(links);
    args.links = args.links.concat(links);
  }

  //=============================================================================

  function directRelationStepsFn(args) {
    return args.skippedDirectRelations ? 0 : directRelationSteps;
  }

  function accurateMatchSourceCandidates(fields, args) {
    const { fps } = args;

    return _.filter(fields, field => {
      const fingerprint = fieldFingerprint(fps, field);

      const enoughTerms =
        _.get(fingerprint, 'attributes.cardinality') > minSrcTermsCount;

      return enoughTerms;
    });
  }

  function accurateMatchTargetCandidates(fields, args) {
    const { fps, dataset, advancedParameters } = args;
    const { indices } = dataset;
    const { accurateMatchPreference } = advancedParameters;

    const preferenceRatio = accurateMatchPreference / 100;

    return _.filter(fields, field => {
      const fingerprint = fieldFingerprint(fps, field);

      const enoughTerms =
        _.get(fingerprint, 'attributes.cardinality') > minKeyTermsCount;

      const lowFrequencyTerms =
        !!_.get(fingerprint, 'tags.unique') ||
        _.get(fingerprint, 'attributes.frequentTermsPerDoc') <= preferenceRatio;

      // Array-valued fields don't support *fast* multi-term matching using a single
      // request (the cardinality trick doesn't work) - we prefer to disable them as
      // target fields rather than going for the too slow multi-request approach
      const singleValueField = _.get(fingerprint, 'tags.single_value');

      // Multi-index patterns are too load-intensive, we won't allow accurate
      // matching on those.
      const notAMultiIndexPattern = !indices[field.indexPattern.title].multiIndex;

      return enoughTerms && lowFrequencyTerms && singleValueField
        && notAMultiIndexPattern;
    });
  }

  function hydrateRawField(fieldToData, endpoint) {
    return endpoint.eid ? endpoint : fieldToData(endpoint);
  }

  function hydrateLinks(links, fieldToData) {
    const hydrateEndpoint = hydrateRawField.bind(null, fieldToData);

    return _(links)
      .map(link => relationsHelper.hydrateLink(link, hydrateEndpoint))
      .sortBy(linkHash)
      .value();
  }

  function initDirectRelationsLog(args) {
    const { dataset, advancedParameters, log, indexPatternToSavedSearchTitle } = args;
    const { indices } = dataset;
    const { accurateMatchPreference } = advancedParameters;

    let logContent = [
      'Fields without enough high-frequency terms are unsuitable for approximate terms' +
      '\nmatching, and will therefore be analyzed using accurate terms matching.',

      '\nThe "Match method preference" advanced parameter acts as threshold against' +
      '\na relative estimate of the number of high-frequency terms of a field to' +
      '\nidentify which fields will be treated with accurate terms matching:\n',

      [ `Match method preference = ${accurateMatchPreference}` ]
    ];

    const multiIndexPatterns = _.filter(indices, 'multiIndex');
    if (multiIndexPatterns.length) {
      logContent = logContent.concat([
        'Furthermore, fields in multi-index patterns at the moment will not be analyzed' +
        '\nwith accurate terms matching, because they tend to be much slower than fields' +
        '\nin single-index patterns.',

        '\nFields in the following multi-index patterns will not be considered as' +
        '\nrelational sources in accurate terms matching:',

        _.map(multiIndexPatterns,
          indexData => indexPatternToSavedSearchTitle(indexData.orig))
      ]);
    };

    const logSection = {
      name: 'Accurate Terms-Matching',
      content: logContent,
      errors: []
    };

    log.push(logSection);
    return logSection;
  }

  function logDirectRelationsFields(args, logSection, sourceFields, targetFields) {
    const { advancedParameters, fieldText, dataset } = args;
    const { fieldToData } = dataset;
    const { content: logContent } = logSection;

    const {
      accurateMatchTermsCount, accurateMatchRequiredSuccessPercent
    } = advancedParameters;


    logContent.push(
      'The following fields were found to be suitable for accurate terms-matching' +
      '\nas relational source endpoints:');

    if (!targetFields.length) {
      logContent.push([ "No field was found for accurate terms-matching" ]);
    } else {
      logContent.push.apply(logContent, [
        targetFields.map(fieldText),
        'All processable fields were tested against them, by extracting' +
        ` ${accurateMatchTermsCount} terms and` +
        '\nchecking that' +
        ` ${accurateMatchRequiredSuccessPercent}% of them exist in the source field.`
      ]);
    }

    targetFields.forEach(targetField => {
      const fld = fieldToData(targetField);
      fld.notes.push('Processed as accurate matching source');
    });

    return logSection;
  }

  function logDirectRelationsResults(args, logSection, linkGroups) {
    const { content: logContent } = logSection;
    const { valid, splitWithEid, removedDueLoop, removedDueSerialMatch } = linkGroups;

    const linksLogOpts = {
      scoreText: link => _.padLeft(_.round(link.score, 0) + '%', 4),
      scorePadding: 6
    };

    logContent.push('\nThe following relations were found:');
    logContent.push(linksToLog(valid, args, linksLogOpts));

    if (splitWithEid.length) {
      logContent.push(
        '\nThe following relations were found, and an EID has been added for' +
        '\nconvenience to avoid loops on the same index unless the target field is' +
        '\na primary key:');
      logContent.push(linksToLog(splitWithEid, args, linksLogOpts));
    }

    if (removedDueSerialMatch.length) {
      logContent.push(
        '\nThe following links were found, but were removed to isolate serial number' +
        '\nfields with matching names:');

      logContent.push(linksToLog(removedDueSerialMatch, args, linksLogOpts));
    }

    if (removedDueLoop.length) {
      logContent.push(
        '\nThe following links were found, but duplicate a better relations chain:');

      logContent.push(linksToLog(removedDueLoop, args, linksLogOpts));
    }
  }

  /**
   * Main function for calculating relations using 'accurate' terms-matching. The term
   * 'accurate' derives from the fact that this method retrieves terms in one fields
   * and tries to match them in another field by using actual 'server-side' match
   * requests.
   */
  function findDirectRelations(args, progress) {
    const { advancedParameters, dataset, fps, mediator } = args;
    const { fieldsById, fieldToData } = dataset;

    const {
      accurateMatchTermsCount, accurateMatchRequiredSuccessPercent
    } = advancedParameters;

    const accurateMatchRequiredSuccessRatio = accurateMatchRequiredSuccessPercent / 100;

    const logSection = initDirectRelationsLog(args);

    const candidateFields = _(fieldsById).filter(fieldCandidate).map('orig').value();
    const sourceFields = accurateMatchSourceCandidates(candidateFields, args);
    const targetFields = accurateMatchTargetCandidates(candidateFields, args);

    logDirectRelationsFields(args, logSection, sourceFields, targetFields);

    if (!targetFields.length || !accurateMatchTermsCount) {
      args.skippedDirectRelations = true;
      progress.updateMax();

      return args;
    }


    return Promise.resolve()
      .then(() => relationsHelper.accurateTermsMatching(
        targetFields, sourceFields, mediator, {
          termsCount: accurateMatchTermsCount,
          requiredMatchesRatio: accurateMatchRequiredSuccessRatio,
          progress, progressSize: directRelationSteps,
          onError: storeError(args, logSection.errors, progress)
        }))
      .then(allLinks => {
        let linkGroups = { valid: allLinks };

        [ linkGroups.valid, linkGroups.removedDueSerialMatch ] =
          relationsHelper
            .isolateSerialEndpointsWithMatchingNames(linkGroups.valid, fps);
        [ linkGroups.valid, linkGroups.removedDueLoop ] =
          relationsHelper.breakDirectLinkLoops(linkGroups.valid, fps);
        [ linkGroups.valid, linkGroups.splitWithEid ] =
          relationsHelper.splitDirectLinksWithAddedEid(linkGroups.valid, fps);

        linkGroups = _.mapValues(linkGroups, links => {
          links = hydrateLinks(links, fieldToData);
          links = _.sortBy(links, linkHash);

          return links;
        });

        // Retire only target fields, since they cannot be processed with the
        // approximate method (by definition); the other fields are still on.
        _.forEach(targetFields, field => {
          fieldsById[fieldHash(field)].retired = true;
        });

        logDirectRelationsResults(args, logSection, linkGroups);
        noteLinks(linkGroups.valid);

        args.links = args.links
          .concat(linkGroups.valid)
          .concat(linkGroups.splitWithEid);

        return args;
      });
  }

  //=============================================================================

  /**
   * Internal class for relating fields by matching their most-counted terms
   * on the client, without making matching requests to the backend.
   */
  class ClientSideTermsMatcher {
    constructor(args) {
      this.args = args;
      this.values = new Map();
      this.overlaps = {};
      this.links = {};
      this.valueCountByFieldId = {};
      this.regions = [];
    }

    // field1 and field2 share a value
    _addOverlap(field1Id, field2Id) {
      if (field1Id === field2Id) { return; }

      const overlaps = this.overlaps;
      let overlap;

      overlap = overlaps[field1Id];
      if (!overlap) { overlap = overlaps[field1Id] = {}; }
      overlap[field2Id] = (overlap[field2Id] || 0) + 1;

      overlap = overlaps[field2Id];
      if (!overlap) { overlap = overlaps[field2Id] = {}; }
      overlap[field1Id] = (overlap[field1Id] || 0) + 1;
    }

    // add a value field to the dictionary
    addValue(fieldId, value) {
      if (value === '') { return; }

      const valueFieldIds = this.values.get(value);
      if (valueFieldIds) {
        valueFieldIds.forEach(otherId => { this._addOverlap(fieldId, otherId); });
        valueFieldIds.push(fieldId);
      }
      else { this.values.set(value, [fieldId]); }
      this.valueCountByFieldId[fieldId] =
        (this.valueCountByFieldId[fieldId] || 0) + 1;
    }

    // field1 and field2 are linked (they share many values)
    _addLink(field1Id, field2Id) {
      if (this.links[field1Id]) { this.links[field1Id].push(field2Id); }
      else { this.links[field1Id] = [field2Id]; }
      if (this.links[field2Id]) { this.links[field2Id].push(field1Id); }
      else { this.links[field2Id] = [field1Id]; }
    }

    // build links (i.e. field<->field relationships) from overlaps
    _addLinks(logLink) {
      const { samplesMatchRequiredSuccessPercent } = this.args.advancedParameters;
      const samplesMatchRequiredSuccessRatio = samplesMatchRequiredSuccessPercent / 100;

      _.forEach(this.overlaps, (overlap, field1Id) => {
        const n1 = this.valueCountByFieldId[field1Id];

        _.forEach(overlap, (n, field2Id) => {
          if (field2Id < field1Id) { return; }

          const n2 = this.valueCountByFieldId[field2Id];
          const nMin = Math.min(n1, n2);

          const matchQuotient = n / nMin;
          const matched = matchQuotient > samplesMatchRequiredSuccessRatio;

          logLink(field1Id, field2Id, matchQuotient, matched);
          if (matched) { this._addLink(field1Id, field2Id); }
        });
      });
    }

    // split the graph in connected part
    _addRegions() {
      const { links, regions } = this;

      const touched = {};
      function touch(fieldId, region) {
        if (touched[fieldId]) { return; }
        touched[fieldId] = true;
        region.fields.push(fieldId);
        const lst = links[fieldId];
        if (lst) {
          for (let i = 0; i < lst.length; i++) {
            touch(lst[i],region);
          }
        }
      };

      // create regions
      const fieldToRegion = {};
      _.forEach(links, function (link, fldId) {
        const region = { fields : [] };
        touch(fldId, region);
        if (region.fields.length > 1) {
          regions.push(region);
          region.fields.forEach(fldId => {
            fieldToRegion[fldId] = region;
          });
        }
      });
    }

    // split the graph in connected part
    _linksFromRegions() {
      const { args, regions } = this;
      const { dataset } = args;
      const { fieldsById } = dataset;

      const result = [];

      _.forEach(regions, region => {
        // Compute EID name for this region
        const nameHisto = _.reduce(region.fields, function (memo, fieldId) {
          const [iName, fName] = fieldId.split('//');
          memo.set(fName, (memo.get(fName) || 0) + 1);
          return memo;
        }, new Map);

        let bestName = null;
        let bestNameCount = -1;

        nameHisto.forEach(function (count, name) {
          if (count > bestNameCount || count === bestNameCount && name < bestName) {
            bestName = name;
            bestNameCount = count;
          }
        });

        const target = { eid: bestName };

        _.forEach(region.fields, fieldId => {
          result.push({ source: fieldsById[fieldId], target });
        });
      });

      return result;
    }

    calculateLinks(logLink) {
      this._addLinks(logLink);
      this._addRegions();
      return this._linksFromRegions();
    }
  }

  //=============================================================================

  function eidRelationStepsFn(args) {
    return args.skippedEidRelations ? 0 : eidRelationSteps;
  }

  function eidBuilderQuery(samplesMatchTermsCount, field) {
    // get up to samplesMatchTermsCount terms
    return relationsHelper.msearchRequest(field.iName, {
      size: 0,
      aggs: {
        terms: { terms : {
          field: field.fName,
          size: samplesMatchTermsCount,
          shard_size: samplesMatchTermsCount,
          min_doc_count: eidMinTermDocsCount + 1,
          shard_min_doc_count: 2
        } } ,
      }
    }, { context: 'approximate-match-terms-get', field });
  };

  function eidRelationsBuilder(args, progress, opts) {
    const { fieldsByType, type, stepsPerField, logContent, errors } = opts;
    const { advancedParameters, mediator, dataset } = args;
    const { fieldsById } = dataset;

    const sp = new ClientSideTermsMatcher(args);
    const onError = storeError(args, errors, progress);

    // run multiple queries on the fields chunk; process the results
    function evalRequest(reqChunk) {
      return relationsHelper.cancelableMSearch(reqChunk, progress.canceledPromise)
        .each((resp, r) => {
          const request = reqChunk[r];

          if (resp.error) {
            onError(request, resp);
            return;
          }

          const { field } = request;

          const aggs = resp.aggregations;
          if (!aggs) { return; }

          if (aggs.terms.buckets.length < eidMinTermsCount) {
            field.notes.push(`Not enough high-frequency terms for approximate matching`);
            return;
          }

          field.notes.push('Processed as approximate matching endpoint');
          field.maxEidMatchQuotient = 0;

          aggs.terms.buckets
            .forEach(b => { sp.addValue(field.id, b.key); });
        });
    }

    function logLink(logLinks, field1Id, field2Id, matchQuotient, matched) {
      matchQuotient = _.round(100 * matchQuotient, 0);

      const field1 = fieldsById[field1Id];
      const field2 = fieldsById[field2Id];

      field1.maxEidMatchQuotient = Math.max(field1.maxEidMatchQuotient, matchQuotient);
      field2.maxEidMatchQuotient = Math.max(field2.maxEidMatchQuotient, matchQuotient);

      if (!matched) { return; }

      logLinks.push(paddedString([
        [0, _.padLeft(matchQuotient + '%', 4)],
        [6, fieldsById[field1Id].text],
        [40, '<==>'],
        [46, fieldsById[field2Id].text],
      ]));
    }


    const typeFields = fieldsByType[type];
    if (!typeFields.length) { return []; }

    const logLinks = [];
    logContent.push(logLinks);

    const chunks = _(typeFields)
      .map(eidBuilderQuery.bind(null, advancedParameters.samplesMatchTermsCount))
      .thru(mediator.array)
      .value();

    const evalQueries = promiseMapSeries(chunks, (reqChunk, c) => {
      const msg = `process ${type} fields (${c + 1}/${chunks.length})`;

      return progress.notifyStart(msg, reqChunk.length * stepsPerField)
        ? evalRequest(reqChunk) : Promise.reject();
    });

    return Promise.resolve(evalQueries)
      .then(() => {
        const logLinkFn = logLink.bind(null, logLinks);
        const links = sp.calculateLinks(logLinkFn);

        _.forEach(typeFields, field => {
          if (field.maxEidMatchQuotient < 0) { return; }

          field.notes.push(
            `Max terms compatibility with other fields: ${field.maxEidMatchQuotient}%`);
        });

        return links;
      });
  }

  /**
   * Filters out EID links when all of the involved fields already relate as sources
   * of direct relations with the same target. This can happen because source fields of
   * direct relations with the same target often share high-frequency terms.
   */
  function extractAlreadyDirectRelated(args, newLinks) {
    const { links } = args;

    // Note, target fields of this map are *not* found related to EIDs in newLinks.
    const directTargetIdsBySourceId = _(links)
      .indexBy('source.id')
      .mapValues('target.id')
      .value();

    return _(newLinks)
      .groupBy('target.eid')
      .partition(links => {
        const directTargets = _(links)
          .map(link => directTargetIdsBySourceId[link.source.id])
          .uniq()
          .value();

        return directTargets.length > 1 || directTargets[0] === undefined;
      })
      .map(_.flatten)
      .value();
  }

  /**
   * Calculates relations using the 'approximate' matching method, which works
   * by matching the most-counted terms from two or more fields.
   *
   * This approach is called 'approximated' because the match is between
   * incomplete representations of the actual data of each field, so the match
   * can give false negatives - but it works in practice because important
   * terms tend to appear high-counted in multiple fields (e.g. USA on 'country' fields).
   */
  function buildEidRelations(args, progress) {
    const { advancedParameters, dataset, log } = args;
    let { fieldsByType } = dataset;

    const {
      samplesMatchTermsCount,
      samplesMatchRequiredSuccessPercent
    } = advancedParameters;

    if (!samplesMatchTermsCount) {
      args.skippedEidRelations = true;
      progress.updateMax();

      return args;
    }

    const logIgnoredNumberFields = [];
    const errors = [];

    const logContent = [
      `For all the remaining fields we try to extract the ${samplesMatchTermsCount}` +
      ' most significant' +
      '\nterms and see if those match' +
      ` ${samplesMatchRequiredSuccessPercent}% of the most significant terms of other` +
      '\nfields.',

      '\nFound matches are related to an EID built from one of the involved fields.',

      '\nApproximate terms-matching for numbers is only enabled for 64-bit integral' +
      '\nhashes. Common numbers are ignored because of the high chance of false' +
      '\npositives in known test datasets.',

      '\nThe following integer fields are ignored:',
      logIgnoredNumberFields
    ];

    log.push({
      name: 'Approximate Terms-Matching',
      content: logContent,
      errors
    });

    const types = ['string', 'number'];

    fieldsByType = _(fieldsByType)
      .pick(types)
      .defaults(_(types).indexBy().mapValues(() => []).value())
      .mapValues(flds => _.filter(flds, fieldCandidate))
      .value();

    fieldsByType.number = fieldsByType.number
      .filter(field => {
        const isHashNum = _.get(args.fps[field.iName][field.fName], 'tags.hash_num');
        if (!isHashNum) {
          field.notes.push("Number field without 'hash_num' tag");
          logIgnoredNumberFields.push(field.text);
        }

        return isHashNum;
      });


    logContent.push(
      'This is the list of detected matches between processed fields:');

    const fieldsCount = _.sum(fieldsByType, 'length');
    if (!fieldsCount) {
      return progress.notifyStart('', eidRelationSteps)
        ? args : Promise.reject();
    }

    const stepsPerField = eidRelationSteps / fieldsCount;

    return promiseMapSeries(types, type => eidRelationsBuilder(args, progress, {
      fieldsByType, type, stepsPerField, logContent, errors
    }))
      .then(linksList => {
        const [ validLinks, dupLinks ] = _(linksList)
          .flatten()
          .thru(links => extractAlreadyDirectRelated(args, links))
          .map(links => _.sortBy(links, linkHash))
          .value();

        logContent.push('The following relations were found:');
        logContent.push(linksToLog(validLinks, args));

        logContent.push(
          'These relations duplicate previously found direct relations and were ignored:');
        logContent.push(linksToLog(dupLinks, args));

        noteLinks(validLinks);

        args.links = args.links.concat(validLinks);
        return args;
      });
  }

  /**
   * Retrieves the existing ontology. Current ontology will be tested against
   * generated relations.
   */
  function getOntologyRelations(args) {
    const { savedSearches } = args;

    return ontologyModel.getRelationList()
      .then(ontologyRelations => {
        const indexesBySearchId = _(savedSearches)
          .indexBy('id').mapValues(ssearchIndexPattern).value();

        return _.assign(args, {
          ontologyRelations,

          relEndpointHash(endpoint) {
            if (endpoint.type === EntityType.VIRTUAL_ENTITY) { return endpoint.label; }

            const indexPattern = indexesBySearchId[endpoint.id];
            return indexPattern && pairHash(indexPattern.title, endpoint.field);
          }
        });
      });
  }

  /**
   * Generated relations that are already in current ontology won't be included
   * as suggested relations. We'll mark them as 'Found' and log them anyway.
   */
  function separateExistingLinks(args) {
    const { links, ontologyRelations, relEndpointHash, log } = args;

    function logExistingLinks(existingLinks) {
      log.push({
        name: 'Existing relations',
        content: [
          'The following connections already exist in the current ontology, and are' +
          '\ntherefore not included as suggested relations:',
          linksToLog(existingLinks, args)
        ],
      });
    }

    function relationHash(rel) {
      return pairHash(relEndpointHash(rel.domain), relEndpointHash(rel.range));
    }

    const ontologyRelationsByHash = _.indexBy(ontologyRelations, relationHash);

    const [ validLinks, existingLinks ] =
      _.partition(links, link => !ontologyRelationsByHash[linkHash(link)]);

    logExistingLinks(existingLinks);

    args.links = validLinks;
    args.existingLinks = existingLinks;

    return args;
  }

  /**
   * Calculates which of the remaining generated relations should be selected
   * by default.
   *
   * We'll select relations by default *unless* the relation happens to
   * *functionally* duplicate an existing relation, like a direct relation
   * between two fields that are already linked in current ontology through an
   * EID.
   */
  function calculateSelectedLinks(args) {
    const { links, ontologyRelations, relEndpointHash, log } = args;

    function logUnselected() {
      log.push({
        name: 'Default Relations Selection',
        content: [
          'The following relations were deselected because they duplicate some part' +
          '\nof the existing relational configuration (though they are not duplicates' +
          '\nthemselves)',
          linksToLog(_.filter(links, link => !link.selected), args)
        ],
      });
    }

    function twoWayReduce(arr, fn, first, second) {
      const reduced = _.reduce(arr, function (memo, val) {
        fn(memo, val[first], val[second]);
        fn(memo, val[second], val[first]);

        return memo;
      }, new Map);

      for (const [key, valueArr] of reduced.entries()) {
        reduced.set(key, _.uniq(valueArr));
      }

      return reduced;
    }

    const existingNeighborsByEid = twoWayReduce(ontologyRelations,
      function storeExEidRelatedField(memo, eidEp, fieldEp) {
        if (eidEp.type !== EntityType.VIRTUAL_ENTITY) { return; }

        let eidFields = memo.get(eidEp.id);
        if (!eidFields) { memo.set(eidEp.id, eidFields = []); }

        eidFields.push(relEndpointHash(fieldEp));
      }, 'domain', 'range');

    const existingNeighborsByFieldId = twoWayReduce(ontologyRelations,
      function storeExNeighbors(memo, firstEp, secondEp) {
        if (firstEp.type === EntityType.VIRTUAL_ENTITY) { return; }

        const firstHash = relEndpointHash(firstEp);

        let neighbors = memo.get(firstHash);
        if (!neighbors) { memo.set(firstHash, neighbors = []); }

        const newNeighbors = (secondEp.type === EntityType.VIRTUAL_ENTITY)
          ? existingNeighborsByEid.get(secondEp.id)
          : [ firstHash, relEndpointHash(secondEp) ];

        // Note, firstHash being added to newNeighbors is intended, because
        // eid neighborhoods also include the own field itself

        Array.prototype.push.apply(neighbors, newNeighbors);
      }, 'domain', 'range');

    const eidRelatedFields = twoWayReduce(links,
      function storeEidRelatedFields(memo, eidEp, fieldEp) {
        if (!eidEp.eid) { return; }

        let eidFields = memo.get(eidEp.eid);
        if (!eidFields) { memo.set(eidEp.eid, eidFields = []); }

        eidFields.push(fieldEp.id);
      }, 'source', 'target');


    _.forEach(links, link => {
      const { source, target } = link;

      const relatedFieldsByLink = target.eid
        ? eidRelatedFields.get(target.eid) : [ target.id ];

      const exNeighbors = existingNeighborsByFieldId.get(source.id) || [];

      link.selected = !!_.difference(relatedFieldsByLink, exNeighbors).length;
    });

    logUnselected(links);

    return args;
  }

  /**
   * Formats known data to be displayed in the interactive report
   */
  function toReportFormat(args) {
    const { links, existingLinks } = args;

    args.relationGroups = report.linksToRelationGroups(links);
    args.existingConnections = report.linksToConnections(existingLinks);

    return args;
  }

  /**
   * Re-formats back user-interacted data from the report format to procedure format
   */
  function toConnections(args) {
    args.connections = report.relationGroupsToConnections(args.relationGroups);
    return args;
  }


  /**
   * Coalesces gathered logs into a single text string, which documents all phases
   * of the relational generation. Errors are included in the log, too.
   */
  function formattedLog(log) {
    function formatSectionContent(content, indent) {
      const lastSub = content.length - 1;

      return _(content)
        .map((sub, s) => _.isString(sub)
          ? _.repeat(' ', indent) + sub
          : formatSectionContent(sub, indent + 4) + ((s < lastSub) ? '\n' : ''))
        .join('\n') + '\n';
    }

    function formatErrors(section) {
      let { errors } = section;
      if (!errors) { return ''; }

      const errorsCount = errors.length;
      if (!errorsCount) { return ''; }

      let suffix = '\n';
      if (errorsCount > maxShownErrors) {
        errors = errors.slice(0, maxShownErrors);
        suffix = `\n>>> ... ${errorsCount - maxShownErrors} more ...\n`;
      }

      return `>>> ${errorsCount} network errors found at this point\n>>>\n` +
        _(errors).map(error => '>>> ' + JSON.stringify(error)).join('\n') +
        suffix;
    }

    function formatSection(section) {
      const sectionHeader = section.name ?  '==== ' + section.name + ' ====\n\n' : '';
      return sectionHeader +
        formatSectionContent(section.content, 0) +
        formatErrors(section);
    }

    return _(log).map(formatSection).join('\n\n');
  }

  //-----------------------------------------------------------------------------
  // Removes notes duplicates
  //-----------------------------------------------------------------------------
  function pruneNotes(args) {
    const { dataset } = args;
    if (!dataset) { return; }                    // Errors could prevent dataset setup

    const { fieldsById } = args.dataset;

    // Notes should be unique
    _.forEach(fieldsById, field => { field.notes = _.uniq(field.notes); });
  }

  //-----------------------------------------------------------------------------
  // Log sections
  //-----------------------------------------------------------------------------
  function timesLog(startDate) {
    const stopDate = new Date();

    // Moment.js lacks accurate duration formatting, so I'm rolling my own quick
    // workaround. See https://github.com/moment/moment/issues/463 for reference.

    let time = Math.floor(1e-3 * (stopDate - startDate));     // time in seconds

    const seconds = time % 60;
    let elapsedStr = `${seconds} seconds`;

    time = Math.round((time - seconds) / 60);                 // time in minutes
    const minutes = time % 60;
    if (minutes) { elapsedStr = `${minutes} minutes ` + elapsedStr; }

    const hours = Math.round((time - minutes) / 60);
    if (hours) { elapsedStr = `${hours} hours ` + elapsedStr; }

    return [{
      name: 'Times',
      content: [
        _.map([
          [ 'Start Time:', startDate.toString() ],
          [ 'Stop Time:' , stopDate.toString() ],
          [ 'Elapsed Time:', elapsedStr ]
        ], pair => paddedString([ [ 0, pair[0] ], [ 30, pair[1] ] ]))
      ]
    }];
  }

  function errorLog(err) {
    if (!err) { return []; }

    return [{
      content: _.compact([
        '>>> Error thrown at this point <<<', err.stack,
        '\n>>> Full error object <<<', JSON.stringify(err, null, 2)
      ])
    }];
  }

  function notesLog(dataset) {
    if (!dataset) { return []; }

    return [{
      name: 'Notes By Field',
      content: [
        'This is the list of annotations gathered by the procedure for each field:',
        _(dataset.fieldsById)
          .sortBy('text')
          .map(field => paddedString([
            [0, field.text],
            [40, field.notes.join(', ')]
          ]))
          .value()
      ]
    }];
  }

  function finalizeLogging(args, err) {
    pruneNotes(args);

    args.logString = formattedLog([]
      .concat(timesLog(args.startDate))
      .concat(args.log)
      .concat(errorLog(err))
      .concat(notesLog(args.dataset)));

    return args;
  }

  /**
   * Spawns the procedure's report
   */
  function showReport(args) {
    const { interactive, log } = args;
    if (!interactive) { return args; }

    return report.show(args);
  }

  /**
   * Spawns the error modal for unrecoverable errors after the procedure has
   * aborted - unlike network errors which are stored in the log without stopping.
   */
  function showErrorReport(args, err) {
    const { interactive } = args;
    if (!interactive) { return Promise.resolve(args); }

    finalizeLogging(args, err);

    return report.showError(err, args.logString)
      .then(() => args);
  }

  /**
   * Suggested relations found in the current ontology are marked to be
   * excluded later on.
   *
   * This is possible because users have a limited ability to alter the
   * suggested relational configuration - by changing the designated target
   * (left) field.
   */
  function markAlreadyExisting(args) {
    const { savedSearches, connections, ontologyRelations, relEndpointHash } = args;

    const indexesBySearchId = _(savedSearches)
      .indexBy('id').mapValues(ssearchIndexPattern).value();

    function toEndpointHashes(rel) {
      return _(rel).pick('domain', 'range').mapValues(relEndpointHash).value();
    }

    const srcMap = _(ontologyRelations)
      .map(toEndpointHashes)
      .filter('domain')
      .groupBy('domain')
      .mapValues(group => _(group).filter('range').groupBy('range').value())
      .value();

    connections.forEach(conn => {
      const dstMap = srcMap[conn.source.hash];
      conn.alreadyExists = dstMap && dstMap[conn.target.hash];
    });

    return args;
  }

  /**
   * Transforms entities from internal format to the one actually used by the ontology
   */
  function formatEntities(args) {
    const { connections } = args;

    // Build entities
    return Bluebird.resolve(ontologyModel.getEntityList())
      .filter(entity => entity.type === EntityType.VIRTUAL_ENTITY)
      .then(knownEntities => {
        const knownEntsByLabel = _.indexBy(knownEntities, 'label');
        args.knownEntsByLabel = knownEntsByLabel;

        const promises = _(connections)
          .filter(conn => conn.target.eid && conn.source.selected)
          .indexBy('target.eid.name')
          .filter(conn => !knownEntsByLabel.hasOwnProperty(conn.target.eid.name))
          .map(conn => {
            return savedEids.get()
              .then(savedEid => {
                const id = 'eid:' + conn.target.eid.name;
                savedEid.title = conn.target.eid.name;
                savedEid.id = id;

                return {
                  id,
                  parentId: null,
                  label: conn.target.eid.name,
                  type: EntityType.VIRTUAL_ENTITY,
                  instanceLabel: {},
                  autoRelation: true,
                  _objects: {
                    savedEid
                  }
                };
              });
          })
          .value();

        return Promise.all(promises)
          .then(entities => {
            args.entities = entities;
            return args;
          });
      });
  }

  /**
   * Transforms relations from internal format to the one actually used by the ontology
   */
  function formatRelations(args) {
    const { savedSearchesByIndexTitle, connections, knownEntsByLabel } = args;

    function fieldToFieldLabel(srcField, dstField) {
      return (srcField.name === dstField.name)
        ? srcField.displayName
        : `${srcField.displayName} - ${dstField.displayName}`;
    }

    // NOTE: Endpoints have been swapped in relations output after the previous
    // algorithm stabilized on explicit request, to make it easier for users to
    // understand. We're swapping them at this point to keep changes contained.

    const relations = _(connections)
      .filter(conn => conn.source.selected && !conn.alreadyExists)
      .map(function connToRelation(conn, c) {
        const { source, target } = conn;

        const relation = {
          id: `~autoRelation #${c + 1}`,                  // Tilde will sort them last
          autoRelation: true,
          range: {
            id: savedSearchesByIndexTitle[source.field.indexPattern.title].id,
            title: savedSearchesByIndexTitle[source.field.indexPattern.title].title,
            field: source.field.name,
            type: EntityType.SAVED_SEARCH
          },
          rangeField: source.field.name
        };

        if (target.eid) {
          const eidId = knownEntsByLabel.hasOwnProperty(target.eid.name)
            ? knownEntsByLabel[target.eid.name].id
            : 'eid:' + target.eid.name;

          _.assign(relation, {
            directLabel: source.field.displayName,
            inverseLabel: source.field.displayName,

            domain: {
              id: eidId,
              type: EntityType.VIRTUAL_ENTITY,
              title: target.eid.name
            },
            domainField: undefined
          });
        } else {
          _.assign(relation, {
            directLabel: fieldToFieldLabel(target.field, source.field),
            inverseLabel: fieldToFieldLabel(source.field, target.field),

            domain: {
              id: savedSearchesByIndexTitle[target.field.indexPattern.title].id,
              title: savedSearchesByIndexTitle[source.field.indexPattern.title].title,
              field: target.field.name,
              type: EntityType.SAVED_SEARCH
            },
            domainField: target.field.name
          });
        }

        relation.title = `${relation.domain.title} -> ${relation.directLabel} -> ${relation.range.title}`;
        return relation;
      })
      .thru(relations => relations.concat(relations.map(function relToInverse(rel, r) {
        const id = `~autorelation inverse #${r + 1}`;
        rel.inverseOf = id;

        return {
          id,
          inverseOf: rel.id,
          title: `${rel.range.title} -> ${rel.inverseLabel} -> ${rel.domain.title}`,

          directLabel: rel.inverseLabel,
          inverseLabel: rel.directLabel,

          domain: rel.range,
          range: rel.domain,
          domainField: rel.rangeField,
          rangeField: rel.domainField,

          autoRelation: true,
          inverse: true
        };
      })))
      .value();

    args.relations = relations;
    return args;
  }

  //=============================================================================

  const entityRegexesDefault = _.map(entityRegexesInputDefault, entityRegex => {
    const { entity, context, regexStr } = entityRegex;
    return { entity, context, regex: stringToRegex(regexStr) };
  });

  const phases = {
    /**
     * Starts the interactive procedure with a welcome message
     */
    start() {
      return showWelcome()
        .then(phases.selectInput);
    },

    /**
     * Shows a popup for selecting index patterns and entity regexes,
     * returns an object with selections
     */
    selectInput() {
      return Promise.resolve({})
        .then(showSelectInputModal)
        .then(getCandidateSavedSearches)
        .then(getIndicesByPattern)
        .then(loadInputState)
        .then(hydrateInputState)
        .then(readySelectInputModal)
        .then(saveInputState)
        .then(getMaxIndicesPerPattern)
        .then(formatInputState)
        .then(phases.calculate);
    },

    /**
     * Builds a list of relations between input saved searches.
     *
     * @param {Object}  args      Arguments in object format
     *
     * @param {Array} args.savedSearches
     *    Saved searches we want to calculate relations on
     *
     * @param {Array} [args.entityRegexes]
     *    List of { entity, context, regex } objects used to calculate EID relations
     *    using regular expressions.
     *
     *    The 'entity' field is the name of the EID to associate, 'context' can
     *    be either 'fieldName' or 'terms', and 'regex' is the regular
     *    expression to use for matches.
     *
     * @param {Object} [args.advancedParameters=parametersPresets.balanced]
     *    Parameters used to tune the procedure in various ways. See the wizard's UI
     *    for explanations of each option.
     *
     * @param {Number} [args.maxIndicesPerPattern]
     *    Specifies a maximum number of indices to analyze for multi-index patterns.
     *
     * @param {Boolean} [args.interactive=true]
     *    Whether a report modal should be shown
     */
    calculate(args = {}) {
      _.defaults(args, {
        savedSearches: [],
        entityRegexes: entityRegexesDefault,
        advancedParameters: advancedParametersDefaults,
        interactive: true,
        onError: _.noop
      });

      _.assign(args, { links: [], log: [], startDate: new Date() });

      return Promise.resolve()
        .then(() => progressMap([
          { fn: getIndexPatterns,                   step: 'get index patterns' },
          { fn: getIndicesByPattern,                step: 'get indices' },
          { fn: makeRequestsMediator,               step: 'init requests mediator' },
          { fn: buildDataset,                       step: 'build dataset' },
          { fn: loadFingerPrints,                   step: fingerprintsSteps },
          { fn: filterFields,                       step: 'filter fields' },
          { fn: buildEidRegexRelations,             step: eidRegexRelationSteps },
          { fn: buildFixedPatternRelations,         step: 'relate fixed patterns' },
          { fn: findDirectRelations,                step: directRelationStepsFn },
          { fn: buildEidRelations,                  step: eidRelationStepsFn }
        ], {
          className: 'kibi-findrel-progress',
          title: 'Relations Wizard - In Progress...',
          valueMap: (op, o, progress) => op.fn(args, progress),
          stepMap: (op, o) => _.isFunction(op.step) ? op.step(args) : op.step,
          textTemplate: findRelationsProgressTemplate
        }))
        .then(() => args)
        .then(getAllSavedSearches)
        .then(getOntologyRelations)
        .then(separateExistingLinks)
        .then(calculateSelectedLinks)
        .then(toReportFormat)
        .then(finalizeLogging)
        .then(showReport)
        .then(toConnections)
        .then(markAlreadyExisting)
        .then(formatEntities)
        .then(formatRelations)
        .then(({ entities, relations }) => ({ entities, relations }))
        .catch(err => {
          if (!err) { return phases.selectInput(); }             // canceled, go back
          if (err === 'quit') { return Promise.reject(); }

          return showErrorReport(args, err)
            .then(() => Promise.reject(err));
        });
    },
  };

  //=============================================================================

  return {
    entityRegexesDefault,

    start: phases.start,
    calculate: phases.calculate,

    updateInputAfterCompletion(savedSearchesPair) {
      return Promise.resolve({})
        .then(loadInputState)
        .then(updateInputState)
        .then(saveInputState);
    },

    setAutoRelationsData(autoRelationsData) {
      return wrappedDataModel.setAutoRelationsData(autoRelationsData);
    },

    clearAutoRelations() {
      return wrappedDataModel.clearAutoRelations();
    },

    applyAutoRelations(relations, opts) {
      return Private(DataModelPermissionsProvider)
        .checkEntityIdPermissions(CrudType.CREATE)
        .then(allowed => allowed
          ? wrappedDataModel.applyAutoRelations(relations, opts)
          : Promise.reject());
    }
  };
}

