Source code for aisynphys.database.synphys_database

import datetime
import os.path
from sqlalchemy.orm import aliased, contains_eager, selectinload
import pandas as pd
from collections import OrderedDict
from .database import Database
from .schema import schema_version, default_sample_rate
from ..synphys_cache import get_db_path, list_db_versions


[docs]class SynphysDatabase(Database): """Augments the Database class with convenience methods for querying the synphys database. """ default_sample_rate = default_sample_rate schema_version = schema_version mouse_projects = ["mouse V1 coarse matrix", "mouse V1 pre-production"] human_projects = ["human coarse matrix"]
[docs] @classmethod def load_sqlite(cls, sqlite_file, readonly=True): """Return a SynphysDatabase instance connected to an existing sqlite file. """ ro_host = 'sqlite:///' rw_host = None if readonly else ro_host return cls(ro_host, rw_host, db_name=sqlite_file)
[docs] @classmethod def load_current(cls, db_size): """Load the most recent version of the database that is supported by this version of aisynphys. The database file will be downloaded and cached, if an existing cache file is not found. Parameters ---------- db_size : str Must be one of 'small', 'medium', or 'full'. """ assert db_size in ('small', 'medium', 'full'), "db_size argument must be one of 'small', 'medium', or 'full'" current = cls.list_current_versions() if db_size not in current: raise Exception(f"This version of aisynphys requires database schema version {cls.schema_version}, " "but no released database files were found with this schema version.") if current[db_size] is None: raise Exception(f"There are no published {db_size} database files for the current schema version ({cls.schema_version}).") return cls.load_version(current[db_size]['db_file'])
[docs] @classmethod def list_current_versions(cls): """Return a dict of the most recent DB versions for each size. If no published DB file is available for a particular size, then the value will be set to None in the returned dictionary. """ versions_available = list_db_versions() current_versions = {} for size in ('small', 'medium', 'full'): versions_with_size = [desc for desc in versions_available if desc['db_size'] == size and desc['schema_version'] == cls.schema_version] if len(versions_with_size) == 0: current_versions[size] = None else: current_versions[size] = versions_with_size[-1] return current_versions
[docs] @classmethod def list_versions(cls, only_supported=False): """Return a list of all available database versions. Each item in the list is a dictionary with keys db_file, release_version, db_size, and schema_version. """ vers = list_db_versions() if only_supported: vers = [v for v in vers if v['schema_version'] == cls.schema_version] return vers
[docs] @classmethod def load_version(cls, db_version): """Load a named database version. Available database names can be listed using :func:`list_versions`. The database file will be downloaded and cached, if an existing cache file is not found. Example:: >>> from aisynphys.database import SynphysDatabase >>> SynphysDatabase.list_versions() [ {'db_file': 'synphys_r1.0_small.sqlite'}, {'db_file': 'synphys_r1.0_medium.sqlite'}, {'db_file': 'synphys_r1.0_full.sqlite'}, ... ] >>> db = SynphysDatabase.load_version('synphys_r1.0_small.sqlite') Downloading http://api.brain-map.org/api/v2/well_known_file_download/937779595 => /home/luke/docs/aisynphys/doc/cache/database/synphys_r1.0_2019-08-29_small.sqlite [####################] 100.00% (73.13 MB / 73.1 MB) 4.040 MB/s 0:00:00 remaining done. """ db_file = get_db_path(db_version) db = SynphysDatabase.load_sqlite(db_file, readonly=True) return db
def __init__(self, ro_host, rw_host, db_name, check_schema=True): from .schema import ORMBase Database.__init__(self, ro_host, rw_host, db_name, ORMBase) self._project_names = None if check_schema: try: self._check_version() except Exception: # need to clean up carefully since this can introduce a persistent connection self.default_session.rollback() self.dispose_engines() raise @property def version_name(self): """The version name of this database, as accepted by SynphysDatabase.load_version() """ return os.path.split(self.db_name)[-1]
[docs] def initialize_database(self): """Optionally called after create_tables """ # initialize or verify db version mrec = self.metadata_record() if mrec is None: mrec = self.Metadata(meta={ 'db_version': schema_version, 'creation_date': datetime.datetime.now().strftime('%Y-%m-%d'), 'origin': "Allen Institute for Brain Science / Synaptic Physiology", }) s = self.session(readonly=False) s.add(mrec) s.commit() else: self._check_version()
def _check_version(self): mrec = self.metadata_record() if mrec is not None: ver = mrec.meta['db_version'] assert ver == schema_version, "Database {self} has unsupported schema version {ver} (expected {schema_version})".format(locals()) def metadata_record(self, session=None): if session is None: session = self.session() close_session = True else: close_session = False try: recs = session.query(self.Metadata).all() if len(recs) == 0: return None elif len(recs) > 1: raise Exception("Multiple metadata records found.") return recs[0] finally: if close_session: session.close() def slice_from_timestamp(self, ts, session=None): session = session or self.default_session slices = session.query(self.Slice).filter(self.Slice.acq_timestamp==ts).all() if len(slices) == 0: raise KeyError("No slice found for timestamp %0.3f" % ts) elif len(slices) > 1: raise KeyError("Multiple slices found for timestamp %0.3f" % ts) return slices[0] def experiment_from_timestamp(self, ts, session=None): session = session or self.default_session expts = session.query(self.Experiment).filter(self.Experiment.acq_timestamp==ts).all() if len(expts) == 0: # For backward compatibility, check for timestamp truncated to 2 decimal places for expt in session.query(self.Experiment).all(): if abs((expt.acq_timestamp - ts)) < 0.01: return expt raise KeyError("No experiment found for timestamp %0.3f" % ts) elif len(expts) > 1: raise RuntimeError("Multiple experiments found for timestamp %0.3f" % ts) return expts[0] def experiment_from_ext_id(self, ext_id, session=None): session = session or self.default_session expts = session.query(self.Experiment).filter(self.Experiment.ext_id==ext_id).all() if len(expts) == 0: raise KeyError('No experiment found for ext_id %s' %ext_id) elif len(expts) > 1: raise RuntimeError("Multiple experiments found for ext_id %s" %ext_id) return expts[0] def slice_from_ext_id(self, ext_id, session=None): session = session or self.default_session slices = session.query(self.Slice).filter(self.Slice.ext_id==ext_id).all() if len(slices) == 0: raise KeyError("No slice found for ext_id %s" % ext_id) elif len(slices) > 1: raise KeyError("Multiple slices found for ext_id %s" % ext_id) return slices[0] def list_experiments(self, session=None): session = session or self.default_session return session.query(self.Experiment).all() def list_project_names(self, session=None, cache=True): session = session or self.default_session if cache is False or self._project_names is None: self._project_names = [rec[0] for rec in session.query(self.Experiment.project_name).distinct().all()] return self._project_names
[docs] def pair_query(self, pre_class=None, post_class=None, synapse=None, synapse_type=None, synapse_probed=None, electrical=None, experiment_type=None, project_name=None, acsf=None, age=None, species=None, distance=None, internal=None, preload=(), session=None, filter_exprs=None): """Generate a query for selecting pairs from the database. Parameters ---------- pre_class : :class:`aisynphys.cell_class.CellClass` | None Filter for pairs where the presynaptic cell belongs to this class post_class : :class:`aisynphys.cell_class.CellClass` | None Filter for pairs where the postsynaptic cell belongs to this class synapse : bool | None Include only pairs that are (or are not) connected by a chemical synapse synapse_type : str | None Include only synapses of a particular type ('ex' or 'in') synapse_probed : bool | None If True, include only pairs that were probed for a synaptic connection (regardless of whether a connectin was found) electrical : bool | None Include only pairs that are (or are not) connected by an electrical synapse (gap junction) experiment_type : str | None Include only data from specific types of experiments project_name : str | list | None Value(s) to match from experiment.project_name (e.g. "mouse V1 coarse matrix" or "human coarse matrix") acsf : str | list | None Filter for ACSF recipe name(s) age : tuple | None (min, max) age ranges to filter for. Either limit may be None to disable that check. species : str | None Species ('mouse' or 'human') to filter for distance : tuple | None (min, max) intersomatic distance in meters internal : str | list | None Electrode internal solution recipe name(s) preload : list List of strings specifying resources to preload along with the queried pairs. This can speed up performance in cases where these would otherwise be individually queried later on. Options are: - "experiment" (includes experiment and slice) - "cell" (includes cell, morphology, cortical_location, and patch_seq) - "synapse" (includes synapse, resting_state, dynamics, and synapse_model) - "synapse_prediction" (includes only synapse_prediction) filter_exprs : list | None List of sqlalchemy expressions, each of which will restrict the query via a call to query.filter(expr) """ if experiment_type == 'standard multipatch': assert project_name is None, "cannot specify both experiment_type and project_name" project_name = ['mouse V1 coarse matrix', 'mouse V1 pre-production', 'human coarse matrix'] session = session or self.default_session pre_cell = aliased(self.Cell, name='pre_cell') post_cell = aliased(self.Cell, name='post_cell') pre_morphology = aliased(self.Morphology, name='pre_morphology') post_morphology = aliased(self.Morphology, name='post_morphology') pre_patch_seq = aliased(self.PatchSeq, name='pre_patch_seq') post_patch_seq = aliased(self.PatchSeq, name='post_patch_seq') pre_intrinsic = aliased(self.Intrinsic, name='pre_intrinsic') post_intrinsic = aliased(self.Intrinsic, name='post_intrinsic') pre_location = aliased(self.CorticalCellLocation, name='pre_cortical_cell_location') post_location = aliased(self.CorticalCellLocation, name='post_cortical_cell_location') query = ( session.query(self.Pair) .join(pre_cell, pre_cell.id==self.Pair.pre_cell_id) .join(post_cell, post_cell.id==self.Pair.post_cell_id) .outerjoin(pre_morphology, pre_morphology.cell_id==pre_cell.id) .outerjoin(post_morphology, post_morphology.cell_id==post_cell.id) .outerjoin(pre_patch_seq, pre_patch_seq.cell_id==pre_cell.id) .outerjoin(post_patch_seq, post_patch_seq.cell_id==post_cell.id) .outerjoin(pre_intrinsic, pre_intrinsic.cell_id==pre_cell.id) .outerjoin(post_intrinsic, post_intrinsic.cell_id==post_cell.id) .outerjoin(pre_location, pre_location.cell_id==pre_cell.id) .outerjoin(post_location, post_location.cell_id==post_cell.id) .join(self.Experiment, self.Pair.experiment_id==self.Experiment.id) .outerjoin(self.Slice, self.Experiment.slice_id==self.Slice.id) ## don't want to drop all pairs if we don't have slice or connection strength entries .outerjoin(self.Synapse, self.Synapse.pair_id==self.Pair.id) .outerjoin(self.SynapsePrediction, self.SynapsePrediction.pair_id==self.Pair.id) .outerjoin(self.Dynamics, self.Dynamics.pair_id==self.Pair.id) .outerjoin(self.RestingStateFit, self.RestingStateFit.synapse_id==self.Synapse.id) .outerjoin(self.SynapseModel, self.SynapseModel.pair_id==self.Pair.id) .outerjoin(self.Conductance, self.Conductance.synapse_id==self.Synapse.id) # .outerjoin(self.PolySynapse) # .outerjoin(self.GapJunction) ) if pre_class is not None: query = pre_class.filter_query(query, pre_cell, db=self) if post_class is not None: query = post_class.filter_query(query, post_cell, db=self) if synapse is not None: query = query.filter(self.Pair.has_synapse==synapse) if synapse_type is not None: assert synapse_type in ['ex', 'in', 'mixed'], "synapse_type must be 'ex', 'in', or 'mixed'" query = query.filter(self.Synapse.synapse_type==synapse_type) if synapse_probed is True: from ..connectivity import probed_pair_test_spike_limit query = query.filter( ((pre_cell.cell_class == 'ex') & (self.Pair.n_ex_test_spikes > probed_pair_test_spike_limit)) | ((pre_cell.cell_class == 'in') & (self.Pair.n_in_test_spikes > probed_pair_test_spike_limit)) | ((self.Pair.n_ex_test_spikes > probed_pair_test_spike_limit) & (self.Pair.n_in_test_spikes > probed_pair_test_spike_limit)) ) if electrical is not None: query = query.filter(self.Pair.has_electrical==electrical) if project_name is not None: names = [project_name] if isinstance(project_name, str) else project_name for name in names: assert name in self.list_project_names(), "project_name %r not found in database (see SynphysDatabase.list_project_names)" % name if isinstance(project_name, str): query = query.filter(self.Experiment.project_name==project_name) else: query = query.filter(self.Experiment.project_name.in_(project_name)) if acsf is not None: if isinstance(acsf, str): query = query.filter(self.Experiment.acsf==acsf) else: query = query.filter(self.Experiment.acsf.in_(acsf)) if age is not None: if age[0] is not None: query = query.filter(self.Slice.age>=age[0]) if age[1] is not None: query = query.filter(self.Slice.age<=age[1]) if distance is not None: if distance[0] is not None: query = query.filter(self.Pair.distance>=distance[0]) if distance[1] is not None: query = query.filter(self.Pair.distance<=distance[1]) if species is not None: query = query.filter(self.Slice.species==species) if internal is not None: if isinstance(internal, str): query = query.filter(self.Experiment.internal==internal) else: query = query.filter(self.Experiment.internal.in_(internal)) if filter_exprs is not None: for expr in filter_exprs: query = query.filter(expr) if 'experiment' in preload: query = ( query .add_entity(self.Experiment) .add_entity(self.Slice) ) query = query.options( contains_eager(self.Pair.experiment), contains_eager(self.Experiment.slice), ) if 'cell' in preload: query = ( query .add_entity(pre_cell) .add_entity(post_cell) .add_entity(pre_morphology) .add_entity(post_morphology) .add_entity(pre_patch_seq) .add_entity(post_patch_seq) .add_entity(pre_intrinsic) .add_entity(post_intrinsic) .add_entity(pre_location) .add_entity(post_location) ) query = query.options( contains_eager(self.Pair.pre_cell, alias=pre_cell), contains_eager(self.Pair.post_cell, alias=post_cell), contains_eager(pre_cell.morphology, alias=pre_morphology), contains_eager(post_cell.morphology, alias=post_morphology), contains_eager(pre_cell.patch_seq, alias=pre_patch_seq), contains_eager(post_cell.patch_seq, alias=post_patch_seq), contains_eager(pre_cell.intrinsic, alias=pre_intrinsic), contains_eager(post_cell.intrinsic, alias=post_intrinsic), contains_eager(pre_cell.cortical_location, alias=pre_location), contains_eager(post_cell.cortical_location, alias=post_location), ) if 'synapse' in preload: query = ( query .add_entity(self.Synapse) .add_entity(self.RestingStateFit) .add_entity(self.Dynamics) .add_entity(self.SynapseModel) ) query = query.options( contains_eager(self.Pair.synapse), contains_eager(self.Synapse.resting_state_fit), contains_eager(self.Pair.dynamics), contains_eager(self.Pair.synapse_model), ) if 'synapse_prediction' in preload: query = ( query .add_entity(self.SynapsePrediction) ) query = query.options( contains_eager(self.Pair.synapse_prediction), ) # package the aliased cells query.pre_cell = pre_cell query.post_cell = post_cell query.pre_morphology = pre_morphology query.post_morphology = post_morphology query.pre_location = pre_location query.post_location = post_location query.pre_intrinsic = pre_intrinsic query.post_intrinsic = post_intrinsic query.pre_patch_seq = pre_patch_seq query.post_patch_seq = post_patch_seq return query
[docs] def matrix_pair_query(self, pre_classes, post_classes, columns=None, pair_query_args=None): """Returns the concatenated result of running pair_query over every combination of presynaptic and postsynaptic cell class. """ if pair_query_args is None: pair_query_args = {} pairs = None for pre_name, pre_class in pre_classes.items(): for post_name, post_class in post_classes.items(): pair_query = self.pair_query( pre_class=pre_class, post_class=post_class, **pair_query_args ) if columns is not None: pair_query = pair_query.add_columns(*columns) df = pair_query.dataframe(rename_columns=False) df['pre_class'] = pre_name df['post_class'] = post_name if pairs is None: pairs = df else: pairs = pd.concat([pairs, df], axis=0, join='outer') return pairs
def __getstate__(self): """Allows DB to be pickled and passed to subprocesses. """ return { 'ro_host': self.ro_host, 'rw_host': self.rw_host, 'db_name': self.db_name, } def __setstate__(self, state): self.__init__(ro_host=state['ro_host'], rw_host=state['rw_host'], db_name=state['db_name'])