Source code for pylag.processing.ensemble

"""
Tools to assist with processing the results of ensemble simulations
"""

from __future__ import division, print_function

import numpy as np
from scipy import stats

from pylag.processing.ncview import Viewer

have_pyqt_fit = True
try:
    from pyqt_fit import kde, kde_methods, kernels
except ImportError:
    have_pyqt_fit = False


[docs]def get_probability_density_1D(file_names, dates, depths, depth_bnds, pylag_time_rounding):
    """Compute the ensemble mean concentration in 1D
    
    Particle concentrations are computed on the dates and at the depth levels
    given in the arrays `dates` and `depth`. Each member of the ensemble is
    a separate realisation, with particles starting at the sames locations
    and at the same time in each run. A different method should be used to
    compute probability densities for ensembles in which particles are released
    at different times.
    
    To compute particle concentrations a gaussian kernel density estimator
    is used. Boundaries are treated as being reflecting, thus there is no loss
    of density.
    
    Parameters
    ---------
    file_names : list[str]
        List of sorted PyLag output files. Each output file corresponds to one member
        of the ensemble.
    
    dates : 1D NumPy array ([t], datetime)
        Dates on which to compute the ensemble mean concentration.
    
    depths : 2D Numpy array ([t, z], float)
        Depths at which to compute the ensemble mean concentration. The array is
        2D, since it may be desirable to have the depths at which concentrations
        are calculated vary in time (e.g. if the model has a moving free
        surface). NB dates.shape[0] must equal depths.shape[0].
    
    depths_bnds : 2D Numpy array ([t, 2], float)
        These are the lower and upper depth bands which are required by the
        kernel method.

    pylag_time_rounding : int
        The number of seconds PyLag outputs should be rounded to.

    Returns
    -------
    conc : 2D Numpy array (float)
        The concentration at the specified times and depths
    """
    # Function requires pyqt_fit. First check that it is installed
    if not have_pyqt_fit:
        raise RuntimeError("PyQt-fit was not found within this python distribution. Please see PyLag's documentation "
                           "for more information.")

    if dates.shape[0] != depths.shape[0]:
        raise ValueError('Array lengths do not match')
    
    # Array sizes
    n_trials = len(file_names)
    n_times = dates.shape[0]
    n_zlevs = depths.shape[1]
    
    # Use kernel method to estimate density
    dens = np.empty((n_trials, n_times, n_zlevs), dtype=float)
    for i, file_name in enumerate(file_names):
        viewer = Viewer(file_name, time_rounding=pylag_time_rounding)

        # Establish the indices of the time points we want to work with
        time_indices = [viewer.date.tolist().index(date) for date in dates]      
        
        for j, t_idx in enumerate(time_indices):
            zmin = depth_bnds[j, 0]
            zmax = depth_bnds[j, 1]
            est = kde.KDE1D(viewer('z')[t_idx, :].squeeze(), lower=zmin, upper=zmax,
                    method=kde_methods.reflection, kernel=kernels.normal_kernel1d())
            dens[i, j, :] = est(depths[j, :])
    
    return np.mean(dens, axis=0)


[docs]def get_probability_density_2D(file_names, time_deltas, x_points, y_points, pylag_time_rounding, group_id=None):
    """Compute the probability density in 2D
    
    Probability densities are computed at the x and y points provided, and at time
    time points after the start of the simulation, as listed in the `time_deltas`
    array. Each member of the ensemble is a separate realisation. Typically, particles
    will have been released from the same points in space in each simulation. Particles
    may or may not have been released at different times.
    
    To compute the probability density a gaussian kernel density estimator is used.
    
    Parameters
    ----------
    file_names : list[str]
        List of sorted PyLag output files. Each output file corresponds to one member
        of the ensemble.
    
    time_deltas : 1D NumPy array, timedelta
        Time deltas corresponding to times after the start of each ensemble
        simulation at which the probability density should be computed. For
        example, if time_deltas = [1 (day), 2 (days)], then the probability
        density will be computed one and two days after the start of each
        simulation. NB - it is important that data has been saved at
        the specified time point. If it hasn't, an index error will be
        thrown. For example, if the data has been saved at 15 minute time
        intervals, and time_delta = 30 minutes, there should be no problems.
        However, if time_delta = 40 minutes, which cannot be divided exactly by
        15, an error will be generated.
    
    x_points : 1D NumPy array, float
        x points at which to compute the ensemble mean concentration.

    y_points : 1D NumPy array, float
        y points at which to compute the ensemble mean concentration.

    pylag_time_rounding : int
        The number of seconds PyLag outputs should be rounded to.

    group_id : int, optional
        Calculate the probability density for a single group.

    Returns
    -------
    dens: 2D Numpy array (float)
        The concentration at the specified times and x/y positions.
    """
    
    if len(x_points.shape) != 1 or len(y_points.shape) != 1:
        raise ValueError('x and y input arrays should be 1D')
    
    if x_points.shape[0] != y_points.shape[0]:
        raise ValueError('x and y input arrays should be the same length')
    
    # Array sizes
    n_trials = len(file_names)
    n_times = time_deltas.shape[0]
    n_points = x_points.shape[0]

    # Positions at which to estimate the particle density
    positions = np.vstack([x_points, y_points])

    # Use a kernel method to estimate the density
    dens = np.zeros((n_trials, n_times, n_points), dtype=float)
    for i, file_name in enumerate(file_names):
        viewer = Viewer(file_name, time_rounding=pylag_time_rounding)

        ref_date = viewer.date[0]
        dates = [ref_date + time_delta for time_delta in time_deltas]
        time_indices = [viewer.date.tolist().index(date) for date in dates]

        if group_id is not None:
            group_indices = np.where(viewer('group_id')[:] == group_id)[0]

            for j, t_idx in enumerate(time_indices):
                xpos = viewer('xpos')[t_idx, group_indices].squeeze()
                ypos = viewer('ypos')[t_idx, group_indices].squeeze()
                values = np.vstack([xpos, ypos])
                kernel = stats.gaussian_kde(values)
                dens[i, j, :] = kernel(positions)
        else:
            for j, t_idx in enumerate(time_indices):
                xpos = viewer('xpos')[t_idx, :].squeeze()
                ypos = viewer('ypos')[t_idx, :].squeeze()
                values = np.vstack([xpos, ypos])
                kernel = stats.gaussian_kde(values)
                dens[i, j, :] = kernel(positions)
    return np.mean(dens, axis=0)