Saturday, October 10, 2009

Caseless (case insensitive) Dictionaries

I had a need for a case insensitive dictionary, started out subclassing the dict class, then I discovered DictMixin. DictMixin is awesome! It lets you define the bare minimum to implement a dictionary, however if you just do that some of your operations might be slower than necessary, to speed your dict up you should write these functions too: __contains__, __iter__, iteritems.

import UserDict
class bareminimumoverloads(UserDict.DictMixin):
def __getitem__(self, key):
pass
#end def

def __setitem__(self, key, value):
pass
#end def

def __delitem__(self, key):
pass
#end def

def keys(self):
pass
#end def
#end class


I wrote two types of case insensitive dictionaries, they behave a little differently, you can modify them to suit your needs, here they are:


#locals
import UserDict

class caseless(UserDict.DictMixin):
def __init__(self): # could probably allow a normal dict to be passed in, and call self.update(adict)
self.caseless = True
self.m_dict = {}
#end def

def __getitem__(self, key):
"""Retrieve the value associated with 'key' (in any case)."""
return self.m_dict[key.lower()][1]
#end def

def __setitem__(self, key, value):
"""Associate 'value' with 'key'. If 'key' already exists, but
in different case, it will be replaced."""
self.m_dict[key.lower()] = (key, value)
#end def

def __delitem__(self, key):
del self.m_dict[key.lower()]
#end def

def keys(self):
return [v[0] for v in self.m_dict.values()]
#end def

def __contains__(self, key):
return self.m_dict.has_key(key.lower)
#end def

def __iter__(self):
for k,v in self.m_dict.iteritems():
yield v[0]
#end for
#end def

def iteritems(self):
for k,v in self.m_dict.iteritems():
yield v
#end for
#end def

def keys(self):
return [v[0] for v in self.m_dict.values()]
#end def
#end class

def iscaseless(adict):
return isinstance(adict, caselessdict)
#end def

class semi(UserDict.DictMixin):
def __init__(self): # could probably allow a normal dict to be passed in, and call self.update(adict)
self.m_lower = {} # each value is a dictionary of real keys in the real dict
self.m_real = {}
#end def

# case sensitive lookup
def __getitem__(self, key):
"""Retrieve the value associated with 'key' (in any case)."""
return self.m_real[key]
#end def

def __setitem__(self, key, value):
"""Associate 'value' with 'key'. If 'key' already exists, but
in different case, it will be replaced."""
keylower = key.lower()

# update the lower dictionary
if keylower in self.m_lower:
self.m_lower[keylower][key] = value
else:
self.m_lower[keylower] = {}
self.m_lower[keylower][key] = value
#end if

# update the real dictionary
self.m_real[key] = value
#end def

def __delitem__(self, key):
keylower = key.lower()
realkeys = self.m_lower[keylower]
for k in realkeys:
del self.m_real[k]
#end if
del self.m_lower[keylower]
#end def

def keys(self):
return self.m_real.keys()
#end def

def __contains__(self, key):
return key.lower() in self.m_lower
#end def

def __iter__(self):
return self.m_real.__iter__()
#end def

def iteritems(self):
return self.m_real.iteritems()
#end def
#end class

class semi2(semi):

# This version does a case insensitive lookup
def __getitem__(self, key):
"""Retrieve the value associated with 'key' (in any case)."""
# first try to look up the real key
if key in self.m_real:
return self.m_real[key]
else:
keylower = key.lower()
realkeys = self.m_lower[keylower]
for k in realkeys:
result = self.m_real[k]
break
#end for
#end if
return result
#end def
#end def

No comments: