"""
parser.sql.alchemyadapter module (imdb.parser.sql package).
This module adapts the SQLAlchemy ORM to the internal mechanism.
Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""
import re
import warnings
from sqlalchemy import *
from sqlalchemy import schema
try: from sqlalchemy import exc# 0.5
exceptImportError:fromsqlalchemyexceptions
try:
import migrate.changeset
HAS_MC = True
except ImportError:
HAS_MC = False
warnings.warn('Unable to import migrate.changeset: Foreign Keys will ' \
'not be created.')
from imdb._exceptions import IMDbDataAccessError
from dbschema import *
# Used to convert table and column names.
re_upper = re.compile(r'([A-Z])')
# XXX: I'm not sure at all that this is the best method to connect
# to the database and bind that connection to every table.
metadata = MetaData()
# Maps our placeholders to SQLAlchemy's column types.
MAP_COLS = {
INTCOL: Integer,
UNICODECOL: UnicodeText,
STRINGCOL: String
}
class NotFoundError(IMDbDataAccessError):
"""Exception raised when Table.get(id) returns no value."""
pass
def _renameTable(tname):
"""Build the name of a table, as done by SQLObject."""
tname = re_upper.sub(r'_\1', tname)
if tname.startswith('_'):
tname = tname[1:]
return tname.lower()
def _renameColumn(cname):
"""Build the name of a column, as done by SQLObject."""
cname = cname.replace('ID', 'Id')
return _renameTable(cname)
class DNNameObj(object):
"""Used to access table.sqlmeta.columns[column].dbName (a string)."""
def __init__(self, dbName):
self.dbName = dbName
def __repr__(self):
return '<DNNameObj(dbName=%s) [id=%s]>' % (self.dbName, id(self))
class DNNameDict(object):
"""Used to access table.sqlmeta.columns (a dictionary)."""
def __init__(self, colMap):
self.colMap = colMap
def __getitem__(self, key):
return DNNameObj(self.colMap[key])
def __repr__(self):
return '<DNNameDict(colMap=%s) [id=%s]>' % (self.colMap, id(self))
class SQLMetaAdapter(object):
"""Used to access table.sqlmeta (an object with .table, .columns and
.idName attributes)."""
def __init__(self, table, colMap=None):
self.table = table
if colMap is None:
colMap = {}
self.colMap = colMap
def __getattr__(self, name):
if name == 'table':
return getattr(self.table, name)
if name == 'columns':
return DNNameDict(self.colMap)
if name == 'idName':
return self.colMap.get('id', 'id')
return None
def __repr__(self):
return '<SQLMetaAdapter(table=%s, colMap=%s) [id=%s]>' % \
(repr(self.table), repr(self.colMap), id(self))
class QAdapter(object):
"""Used to access table.q attribute (remapped to SQLAlchemy table.c)."""
def __init__(self, table, colMap=None):
self.table = table
if colMap is None:
colMap = {}
self.colMap = colMap
def __getattr__(self, name):
try: return getattr(self.table.c, self.colMap[name])
except KeyError, e: raise AttributeError, "unable to get '%s'" % name
def __repr__(self):
return '<QAdapter(table=%s, colMap=%s) [id=%s]>' % \
(repr(self.table), repr(self.colMap), id(self))
class RowAdapter(object):
"""Adapter for a SQLAlchemy RowProxy object."""
def __init__(self, row, table, colMap=None):
self.row = row
# FIXME: it's OBSCENE that 'table' should be passed from
# TableAdapter through ResultAdapter only to land here,
# where it's used to directly update a row item.
self.table = table
if colMap is None:
colMap = {}
self.colMap = colMap
self.colMapKeys = colMap.keys()
def __getattr__(self, name):
try: return getattr(self.row, self.colMap[name])
except KeyError, e: raise AttributeError, "unable to get '%s'" % name
def __setattr__(self, name, value):
# FIXME: I can't even think about how much performances suffer,
# for this horrible hack (and it's used so rarely...)
# For sure something like a "property" to map column names
# to getter/setter functions would be much better, but it's
# not possible (or at least not easy) to build them for a
# single instance.
if name in self.__dict__.get('colMapKeys', ()):
# Trying to update a value in the database.
row = self.__dict__['row']
table = self.__dict__['table']
colMap = self.__dict__['colMap']
params = {colMap[name]: value}
table.update(table.c.id==row.id).execute(**params)
# XXX: minor bug: after a value is assigned with the
# 'rowAdapterInstance.colName = value' syntax, for some
# reason rowAdapterInstance.colName still returns the
# previous value (even if the database is updated).
# Fix it? I'm not even sure it's ever used.
return
# For every other attribute.
object.__setattr__(self, name, value)
def __repr__(self):
return '<RowAdapter(row=%s, table=%s, colMap=%s) [id=%s]>' % \
(repr(self.row), repr(self.table), repr(self.colMap), id(self))
class ResultAdapter(object):
"""Adapter for a SQLAlchemy ResultProxy object."""
def __init__(self, result, table, colMap=None):
self.result = result
self.table = table
if colMap is None:
colMap = {}
self.colMap = colMap
def count(self):
return len(self)
def __len__(self):
# FIXME: why sqlite returns -1? (that's wrooong!)
if self.result.rowcount == -1:
return 0
return self.result.rowcount
def __getitem__(self, key):
res = list(self.result)[key]
if not isinstance(key, slice):
# A single item.
return RowAdapter(res, self.table, colMap=self.colMap)
else:
# A (possible empty) list of items.
return [RowAdapter(x, self.table, colMap=self.colMap)
for x in res]
def __iter__(self):
for item in self.result:
yield RowAdapter(item, self.table, colMap=self.colMap)
def __repr__(self):
return '<ResultAdapter(result=%s, table=%s, colMap=%s) [id=%s]>' % \
(repr(self.result), repr(self.table),
repr(self.colMap), id(self))
class TableAdapter(object):
"""Adapter for a SQLAlchemy Table object, to mimic a SQLObject class."""
def __init__(self, table, uri=None):
"""Initialize a TableAdapter object."""
self._imdbpySchema = table
self._imdbpyName = table.name
self.connectionURI = uri
self.colMap = {}
columns = []
for col in table.cols:
# Column's paramters.
params = {'nullable': True}
params.update(col.params)
if col.name == 'id':
params['primary_key'] = True
if 'notNone' in params:
params['nullable'] = not params['notNone']
del params['notNone']
cname = _renameColumn(col.name)
self.colMap[col.name] = cname
colClass = MAP_COLS[col.kind]
colKindParams = {}
if 'length' in params:
colKindParams['length'] = params['length']
del params['length']
elif colClass is UnicodeText and col.index:
# XXX: limit length for UNICODECOLs that will have an index.
# this can result in name.name and title.title truncations!
colClass = Unicode
# Should work for most of the database servers.
length = 511
if self.connectionURI:
if self.connectionURI.startswith('mysql'):
# To stay compatible with MySQL 4.x.
length = 255
colKindParams['length'] = length
elif self._imdbpyName == 'PersonInfo' and col.name == 'info':
if self.connectionURI:
if self.connectionURI.startswith('ibm'):
# There are some entries longer than 32KB.
colClass = CLOB
# I really do hope that this space isn't wasted
# for each other shorter entry... <g>
colKindParams['length'] = 68*1024
colKind = colClass(**colKindParams)
if 'alternateID' in params:
# There's no need to handle them here.
del params['alternateID']
# Create a column.
colObj = Column(cname, colKind, **params)
columns.append(colObj)
self.tableName = _renameTable(table.name)
# Create the table.
self.table = Table(self.tableName, metadata, *columns)
self._ta_insert = self.table.insert()
self._ta_select = self.table.select
# Adapters for special attributes.
self.q = QAdapter(self.table, colMap=self.colMap)
self.sqlmeta = SQLMetaAdapter(self.table, colMap=self.colMap)
def select(self, conditions=None):
"""Return a list of results."""
result = self._ta_select(conditions).execute()
return ResultAdapter(result, self.table, colMap=self.colMap)
def get(self, theID):
"""Get an object given its ID."""
result = self.select(self.table.c.id == theID)
#if not result:
# raise NotFoundError, 'no data for ID %s' % theID
# FIXME: isn't this a bit risky? We can't check len(result),
# because sqlite returns -1...
# What about converting it to a list and getting the first item?
try:
return result[0]
except KeyError:
raise NotFoundError, 'no data for ID %s' % theID
def dropTable(self, checkfirst=True):
"""Drop the table."""
dropParams = {'checkfirst': checkfirst}
# Guess what? Another work-around for a ibm_db bug.
if self.table.bind.engine.url.drivername.startswith('ibm_db'):
del dropParams['checkfirst']
try:
self.table.drop(**dropParams)
except exc.ProgrammingError:
# As above: re-raise the exception, but only if it's not ibm_db.
if not self.table.bind.engine.url.drivername.startswith('ibm_db'):
raise
def createTable(self, checkfirst=True):
"""Create the table."""
self.table.create(checkfirst=checkfirst)
# Create indexes for alternateID columns (other indexes will be
# created later, at explicit request for performances reasons).
for col in self._imdbpySchema.cols:
if col.name == 'id':
continue
if col.params.get('alternateID', False):
self._createIndex(col, checkfirst=checkfirst)
def _createIndex(self, col, checkfirst=True):
"""Create an index for a given (schema) column."""
# XXX: indexLen is ignored in SQLAlchemy, and that means that
# indexes will be over the whole 255 chars strings...
# NOTE: don't use a dot as a separator, or DB2 will do
# nasty things.
idx_name = '%s_%s' % (self.table.name, col.index or col.name)
if checkfirst:
for index in self.table.indexes:
if index.name == idx_name:
return
idx = Index(idx_name, getattr(self.table.c, self.colMap[col.name]))
# XXX: beware that exc.OperationalError can be raised, is some
# strange circumstances; that's why the index name doesn't
# follow the SQLObject convention, but includes the table name:
# sqlite, for example, expects index names to be unique at
# db-level.
try:
idx.create()
except exc.OperationalError, e:
warnings.warn('Skipping creation of the %s.%s index: %s' %
(self.sqlmeta.table, col.name, e))
def addIndexes(self, ifNotExists=True):
"""Create all required indexes."""
for col in self._imdbpySchema.cols:
if col.index:
self._createIndex(col, checkfirst=ifNotExists)
def addForeignKeys(self, mapTables, ifNotExists=True):
"""Create all required foreign keys."""
if not HAS_MC:
return
# It seems that there's no reason to prevent the creation of
# indexes for columns with FK constrains: if there's already
# an index, the FK index is not created.
countCols = 0
for col in self._imdbpySchema.cols:
countCols += 1
if not col.foreignKey:
continue
fks = col.foreignKey.split('.', 1)
foreignTableName = fks[0]
if len(fks) == 2:
foreignColName = fks[1]
else:
foreignColName = 'id'
foreignColName = mapTables[foreignTableName].colMap.get(
foreignColName, foreignColName)
thisColName = self.colMap.get(col.name, col.name)
thisCol = self.table.columns[thisColName]
foreignTable = mapTables[foreignTableName].table
foreignCol = getattr(foreignTable.c, foreignColName)
# Need to explicitly set an unique name, otherwise it will
# explode, if two cols points to the same table.
fkName = 'fk_%s_%s_%d' % (foreignTable.name, foreignColName,
countCols)
constrain = migrate.changeset.ForeignKeyConstraint([thisCol],
[foreignCol],
name=fkName)
try:
constrain.create()
except exc.OperationalError:
continue
def __call__(self, *args, **kwds):
"""To insert a new row with the syntax: TableClass(key=value, ...)"""
taArgs = {}
for key, value in kwds.items():
taArgs[self.colMap.get(key, key)] = value
self._ta_insert.execute(*args, **taArgs)
def __repr__(self):
return '<TableAdapter(table=%s) [id=%s]>' % (repr(self.table), id(self))
# Module-level "cache" for SQLObject classes, to prevent
# "Table 'tableName' is already defined for this MetaData instance" errors,
# when two or more connections to the database are made.
# XXX: is this the best way to act?
TABLES_REPOSITORY = {}
def getDBTables(uri=None):
"""Return a list of TableAdapter objects to be used to access the
database through the SQLAlchemy ORM. The connection uri is optional, and
can be used to tailor the db schema to specific needs."""
DB_TABLES = []
for table in DB_SCHEMA:
if table.name in TABLES_REPOSITORY:
DB_TABLES.append(TABLES_REPOSITORY[table.name])
continue
tableAdapter = TableAdapter(table, uri)
DB_TABLES.append(tableAdapter)
TABLES_REPOSITORY[table.name] = tableAdapter
return DB_TABLES
# Functions used to emulate SQLObject's logical operators.
def AND(*params):
"""Emulate SQLObject's AND."""
return and_(*params)
def OR(*params):
"""Emulate SQLObject's OR."""
return or_(*params)
def IN(item, inList):
"""Emulate SQLObject's IN."""
if not isinstance(item, schema.Column):
return OR(*[x == item for x in inList])
else:
return item.in_(inList)
def ISNULL(x):
"""Emulate SQLObject's ISNULL."""
# XXX: Should we use null()? Can null() be a global instance?
# XXX: Is it safe to test None with the == operator, in this case?
return x == None
def ISNOTNULL(x):
"""Emulate SQLObject's ISNOTNULL."""
return x != None
def CONTAINSSTRING(expr, pattern):
"""Emulate SQLObject's CONTAINSSTRING."""
return expr.like('%%%s%%' % pattern)
def toUTF8(s):
"""For some strange reason, sometimes SQLObject wants utf8 strings
instead of unicode; with SQLAlchemy we just return the unicode text."""
return s
class _AlchemyConnection(object):
"""A proxy for the connection object, required since _ConnectionFairy
uses __slots__."""
def __init__(self, conn):
self.conn = conn
def __getattr__(self, name):
return getattr(self.conn, name)
def setConnection(uri, tables, encoding='utf8', debug=False):
"""Set connection for every table."""
# FIXME: why on earth MySQL requires an additional parameter,
# is well beyond my understanding...
if uri.startswith('mysql'):
if '?' in uri:
uri += '&'
else:
uri += '?'
uri += 'charset=%s' % encoding
params = {'encoding': encoding}
if debug:
params['echo'] = True
if uri.startswith('ibm_db'):
# Try to work-around a possible bug of the ibm_db DB2 driver.
params['convert_unicode'] = True
# XXX: is this the best way to connect?
engine = create_engine(uri, **params)
metadata.bind = engine
eng_conn = engine.connect()
# XXX: OH MY, THAT'S A MESS!
# We need to return a "connection" object, with the .dbName
# attribute set to the db engine name (e.g. "mysql"), .paramstyle
# set to the style of the paramters for query() calls, and the
# .module attribute set to a module (?) with .OperationalError and
# .IntegrityError attributes.
# Another attribute of "connection" is the getConnection() function,
# used to return an object with a .cursor() method.
connection = _AlchemyConnection(eng_conn.connection)
paramstyle = eng_conn.dialect.paramstyle
connection.module = eng_conn.dialect.dbapi
connection.paramstyle = paramstyle
connection.getConnection = lambda: connection.connection
connection.dbName = engine.url.drivername
return connection
|