1077 lines
35 KiB
Python
1077 lines
35 KiB
Python
# We did not author this file nor mantain it. Skip linting it.
|
|
#pylint: skip-file
|
|
# Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
# THE SOFTWARE.
|
|
#
|
|
# Comments and/or additions are welcome (send e-mail to:
|
|
# strang@nmr.mgh.harvard.edu).
|
|
#
|
|
"""pstat.py module
|
|
|
|
#################################################
|
|
####### Written by: Gary Strangman ###########
|
|
####### Last modified: Dec 18, 2007 ###########
|
|
#################################################
|
|
|
|
This module provides some useful list and array manipulation routines
|
|
modeled after those found in the |Stat package by Gary Perlman, plus a
|
|
number of other useful list/file manipulation functions. The list-based
|
|
functions include:
|
|
|
|
abut (source,*args)
|
|
simpleabut (source, addon)
|
|
colex (listoflists,cnums)
|
|
collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
|
|
dm (listoflists,criterion)
|
|
flat (l)
|
|
linexand (listoflists,columnlist,valuelist)
|
|
linexor (listoflists,columnlist,valuelist)
|
|
linedelimited (inlist,delimiter)
|
|
lineincols (inlist,colsize)
|
|
lineincustcols (inlist,colsizes)
|
|
list2string (inlist)
|
|
makelol(inlist)
|
|
makestr(x)
|
|
printcc (lst,extra=2)
|
|
printincols (listoflists,colsize)
|
|
pl (listoflists)
|
|
printl(listoflists)
|
|
replace (lst,oldval,newval)
|
|
recode (inlist,listmap,cols='all')
|
|
remap (listoflists,criterion)
|
|
roundlist (inlist,num_digits_to_round_floats_to)
|
|
sortby(listoflists,sortcols)
|
|
unique (inlist)
|
|
duplicates(inlist)
|
|
writedelimited (listoflists, delimiter, file, writetype='w')
|
|
|
|
Some of these functions have alternate versions which are defined only if
|
|
Numeric (NumPy) can be imported. These functions are generally named as
|
|
above, with an 'a' prefix.
|
|
|
|
aabut (source, *args)
|
|
acolex (a,indices,axis=1)
|
|
acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
|
|
adm (a,criterion)
|
|
alinexand (a,columnlist,valuelist)
|
|
alinexor (a,columnlist,valuelist)
|
|
areplace (a,oldval,newval)
|
|
arecode (a,listmap,col='all')
|
|
arowcompare (row1, row2)
|
|
arowsame (row1, row2)
|
|
asortrows(a,axis=0)
|
|
aunique(inarray)
|
|
aduplicates(inarray)
|
|
|
|
Currently, the code is all but completely un-optimized. In many cases, the
|
|
array versions of functions amount simply to aliases to built-in array
|
|
functions/methods. Their inclusion here is for function name consistency.
|
|
"""
|
|
|
|
## CHANGE LOG:
|
|
## ==========
|
|
## 07-11-26 ... edited to work with numpy
|
|
## 01-11-15 ... changed list2string() to accept a delimiter
|
|
## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
|
|
## 01-05-31 ... added duplicates() and aduplicates() functions
|
|
## 00-12-28 ... license made GPL, docstring and import requirements
|
|
## 99-11-01 ... changed version to 0.3
|
|
## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
|
|
## 03/27/99 ... added areplace function, made replace fcn recursive
|
|
## 12/31/98 ... added writefc function for ouput to fixed column sizes
|
|
## 12/07/98 ... fixed import problem (failed on collapse() fcn)
|
|
## added __version__ variable (now 0.2)
|
|
## 12/05/98 ... updated doc-strings
|
|
## added features to collapse() function
|
|
## added flat() function for lists
|
|
## fixed a broken asortrows()
|
|
## 11/16/98 ... fixed minor bug in aput for 1D arrays
|
|
##
|
|
## 11/08/98 ... fixed aput to output large arrays correctly
|
|
|
|
import stats # required 3rd party module
|
|
import string, copy
|
|
from types import *
|
|
|
|
__version__ = 0.4
|
|
|
|
###=========================== LIST FUNCTIONS ==========================
|
|
###
|
|
### Here are the list functions, DEFINED FOR ALL SYSTEMS.
|
|
### Array functions (for NumPy-enabled computers) appear below.
|
|
###
|
|
|
|
|
|
def abut(source, *args):
|
|
"""
|
|
Like the |Stat abut command. It concatenates two lists side-by-side
|
|
and returns the result. '2D' lists are also accomodated for either argument
|
|
(source or addon). CAUTION: If one list is shorter, it will be repeated
|
|
until it is as long as the longest list. If this behavior is not desired,
|
|
use pstat.simpleabut().
|
|
|
|
Usage: abut(source, args) where args=any # of lists
|
|
Returns: a list of lists as long as the LONGEST list past, source on the
|
|
'left', lists in <args> attached consecutively on the 'right'
|
|
"""
|
|
|
|
if type(source) not in [ListType, TupleType]:
|
|
source = [source]
|
|
for addon in args:
|
|
if type(addon) not in [ListType, TupleType]:
|
|
addon = [addon]
|
|
if len(addon) < len(source): # is source list longer?
|
|
if len(source) % len(addon) == 0: # are they integer multiples?
|
|
repeats = len(source) / len(addon) # repeat addon n times
|
|
origadd = copy.deepcopy(addon)
|
|
for i in range(repeats - 1):
|
|
addon = addon + origadd
|
|
else:
|
|
repeats = len(source) / len(addon) + 1 # repeat addon x times,
|
|
origadd = copy.deepcopy(addon) # x is NOT an integer
|
|
for i in range(repeats - 1):
|
|
addon = addon + origadd
|
|
addon = addon[0:len(source)]
|
|
elif len(source) < len(addon): # is addon list longer?
|
|
if len(addon) % len(source) == 0: # are they integer multiples?
|
|
repeats = len(addon) / len(source) # repeat source n times
|
|
origsour = copy.deepcopy(source)
|
|
for i in range(repeats - 1):
|
|
source = source + origsour
|
|
else:
|
|
repeats = len(addon) / len(source) + 1 # repeat source x times,
|
|
origsour = copy.deepcopy(source) # x is NOT an integer
|
|
for i in range(repeats - 1):
|
|
source = source + origsour
|
|
source = source[0:len(addon)]
|
|
|
|
source = simpleabut(source, addon)
|
|
return source
|
|
|
|
|
|
def simpleabut(source, addon):
|
|
"""
|
|
Concatenates two lists as columns and returns the result. '2D' lists
|
|
are also accomodated for either argument (source or addon). This DOES NOT
|
|
repeat either list to make the 2 lists of equal length. Beware of list pairs
|
|
with different lengths ... the resulting list will be the length of the
|
|
FIRST list passed.
|
|
|
|
Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
|
|
Returns: a list of lists as long as source, with source on the 'left' and
|
|
addon on the 'right'
|
|
"""
|
|
if type(source) not in [ListType, TupleType]:
|
|
source = [source]
|
|
if type(addon) not in [ListType, TupleType]:
|
|
addon = [addon]
|
|
minlen = min(len(source), len(addon))
|
|
list = copy.deepcopy(source) # start abut process
|
|
if type(source[0]) not in [ListType, TupleType]:
|
|
if type(addon[0]) not in [ListType, TupleType]:
|
|
for i in range(minlen):
|
|
list[i] = [source[i]] + [addon[i]] # source/addon = column
|
|
else:
|
|
for i in range(minlen):
|
|
list[i] = [source[i]] + addon[i] # addon=list-of-lists
|
|
else:
|
|
if type(addon[0]) not in [ListType, TupleType]:
|
|
for i in range(minlen):
|
|
list[i] = source[i] + [addon[i]] # source=list-of-lists
|
|
else:
|
|
for i in range(minlen):
|
|
list[i] = source[i] + addon[i] # source/addon = list-of-lists
|
|
source = list
|
|
return source
|
|
|
|
|
|
def colex(listoflists, cnums):
|
|
"""
|
|
Extracts from listoflists the columns specified in the list 'cnums'
|
|
(cnums can be an integer, a sequence of integers, or a string-expression that
|
|
corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
|
|
columns 3 onward from the listoflists).
|
|
|
|
Usage: colex (listoflists,cnums)
|
|
Returns: a list-of-lists corresponding to the columns from listoflists
|
|
specified by cnums, in the order the column numbers appear in cnums
|
|
"""
|
|
global index
|
|
column = 0
|
|
if type(cnums) in [ListType, TupleType]: # if multiple columns to get
|
|
index = cnums[0]
|
|
column = map(lambda x: x[index], listoflists)
|
|
for col in cnums[1:]:
|
|
index = col
|
|
column = abut(column, map(lambda x: x[index], listoflists))
|
|
elif type(cnums) == StringType: # if an 'x[3:]' type expr.
|
|
evalstring = 'map(lambda x: x' + cnums + ', listoflists)'
|
|
column = eval(evalstring)
|
|
else: # else it's just 1 col to get
|
|
index = cnums
|
|
column = map(lambda x: x[index], listoflists)
|
|
return column
|
|
|
|
|
|
def collapse(listoflists,
|
|
keepcols,
|
|
collapsecols,
|
|
fcn1=None,
|
|
fcn2=None,
|
|
cfcn=None):
|
|
"""
|
|
Averages data in collapsecol, keeping all unique items in keepcols
|
|
(using unique, which keeps unique LISTS of column numbers), retaining the
|
|
unique sets of values in keepcols, the mean for each. Setting fcn1
|
|
and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
|
|
will append those results (e.g., the sterr, N) after each calculated mean.
|
|
cfcn is the collapse function to apply (defaults to mean, defined here in the
|
|
pstat module to avoid circular imports with stats.py, but harmonicmean or
|
|
others could be passed).
|
|
|
|
Usage: collapse
|
|
(listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
|
|
Returns: a list of lists with all unique permutations of entries appearing in
|
|
columns ("conditions") specified by keepcols, abutted with the result of
|
|
cfcn (if cfcn=None, defaults to the mean) of each column specified by
|
|
collapsecols.
|
|
"""
|
|
|
|
def collmean(inlist):
|
|
s = 0
|
|
for item in inlist:
|
|
s = s + item
|
|
return s / float(len(inlist))
|
|
|
|
if type(keepcols) not in [ListType, TupleType]:
|
|
keepcols = [keepcols]
|
|
if type(collapsecols) not in [ListType, TupleType]:
|
|
collapsecols = [collapsecols]
|
|
if cfcn == None:
|
|
cfcn = collmean
|
|
if keepcols == []:
|
|
means = [0] * len(collapsecols)
|
|
for i in range(len(collapsecols)):
|
|
avgcol = colex(listoflists, collapsecols[i])
|
|
means[i] = cfcn(avgcol)
|
|
if fcn1:
|
|
try:
|
|
test = fcn1(avgcol)
|
|
except:
|
|
test = 'N/A'
|
|
means[i] = [means[i], test]
|
|
if fcn2:
|
|
try:
|
|
test = fcn2(avgcol)
|
|
except:
|
|
test = 'N/A'
|
|
try:
|
|
means[i] = means[i] + [len(avgcol)]
|
|
except TypeError:
|
|
means[i] = [means[i], len(avgcol)]
|
|
return means
|
|
else:
|
|
values = colex(listoflists, keepcols)
|
|
uniques = unique(values)
|
|
uniques.sort()
|
|
newlist = []
|
|
if type(keepcols) not in [ListType, TupleType]:
|
|
keepcols = [keepcols]
|
|
for item in uniques:
|
|
if type(item) not in [ListType, TupleType]:
|
|
item = [item]
|
|
tmprows = linexand(listoflists, keepcols, item)
|
|
for col in collapsecols:
|
|
avgcol = colex(tmprows, col)
|
|
item.append(cfcn(avgcol))
|
|
if fcn1 <> None:
|
|
try:
|
|
test = fcn1(avgcol)
|
|
except:
|
|
test = 'N/A'
|
|
item.append(test)
|
|
if fcn2 <> None:
|
|
try:
|
|
test = fcn2(avgcol)
|
|
except:
|
|
test = 'N/A'
|
|
item.append(test)
|
|
newlist.append(item)
|
|
return newlist
|
|
|
|
|
|
def dm(listoflists, criterion):
|
|
"""
|
|
Returns rows from the passed list of lists that meet the criteria in
|
|
the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
|
|
will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
|
|
with column 2 equal to the string 'N').
|
|
|
|
Usage: dm (listoflists, criterion)
|
|
Returns: rows from listoflists that meet the specified criterion.
|
|
"""
|
|
function = 'filter(lambda x: ' + criterion + ',listoflists)'
|
|
lines = eval(function)
|
|
return lines
|
|
|
|
|
|
def flat(l):
|
|
"""
|
|
Returns the flattened version of a '2D' list. List-correlate to the a.ravel()()
|
|
method of NumPy arrays.
|
|
|
|
Usage: flat(l)
|
|
"""
|
|
newl = []
|
|
for i in range(len(l)):
|
|
for j in range(len(l[i])):
|
|
newl.append(l[i][j])
|
|
return newl
|
|
|
|
|
|
def linexand(listoflists, columnlist, valuelist):
|
|
"""
|
|
Returns the rows of a list of lists where col (from columnlist) = val
|
|
(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
|
|
len(columnlist) must equal len(valuelist).
|
|
|
|
Usage: linexand (listoflists,columnlist,valuelist)
|
|
Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
|
|
"""
|
|
if type(columnlist) not in [ListType, TupleType]:
|
|
columnlist = [columnlist]
|
|
if type(valuelist) not in [ListType, TupleType]:
|
|
valuelist = [valuelist]
|
|
criterion = ''
|
|
for i in range(len(columnlist)):
|
|
if type(valuelist[i]) == StringType:
|
|
critval = '\'' + valuelist[i] + '\''
|
|
else:
|
|
critval = str(valuelist[i])
|
|
criterion = criterion + ' x[' + str(columnlist[
|
|
i]) + ']==' + critval + ' and'
|
|
criterion = criterion[0:-3] # remove the "and" after the last crit
|
|
function = 'filter(lambda x: ' + criterion + ',listoflists)'
|
|
lines = eval(function)
|
|
return lines
|
|
|
|
|
|
def linexor(listoflists, columnlist, valuelist):
|
|
"""
|
|
Returns the rows of a list of lists where col (from columnlist) = val
|
|
(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
|
|
One value is required for each column in columnlist. If only one value
|
|
exists for columnlist but multiple values appear in valuelist, the
|
|
valuelist values are all assumed to pertain to the same column.
|
|
|
|
Usage: linexor (listoflists,columnlist,valuelist)
|
|
Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
|
|
"""
|
|
if type(columnlist) not in [ListType, TupleType]:
|
|
columnlist = [columnlist]
|
|
if type(valuelist) not in [ListType, TupleType]:
|
|
valuelist = [valuelist]
|
|
criterion = ''
|
|
if len(columnlist) == 1 and len(valuelist) > 1:
|
|
columnlist = columnlist * len(valuelist)
|
|
for i in range(len(columnlist)): # build an exec string
|
|
if type(valuelist[i]) == StringType:
|
|
critval = '\'' + valuelist[i] + '\''
|
|
else:
|
|
critval = str(valuelist[i])
|
|
criterion = criterion + ' x[' + str(columnlist[i]) + ']==' + critval + ' or'
|
|
criterion = criterion[0:-2] # remove the "or" after the last crit
|
|
function = 'filter(lambda x: ' + criterion + ',listoflists)'
|
|
lines = eval(function)
|
|
return lines
|
|
|
|
|
|
def linedelimited(inlist, delimiter):
|
|
"""
|
|
Returns a string composed of elements in inlist, with each element
|
|
separated by 'delimiter.' Used by function writedelimited. Use '\t'
|
|
for tab-delimiting.
|
|
|
|
Usage: linedelimited (inlist,delimiter)
|
|
"""
|
|
outstr = ''
|
|
for item in inlist:
|
|
if type(item) <> StringType:
|
|
item = str(item)
|
|
outstr = outstr + item + delimiter
|
|
outstr = outstr[0:-1]
|
|
return outstr
|
|
|
|
|
|
def lineincols(inlist, colsize):
|
|
"""
|
|
Returns a string composed of elements in inlist, with each element
|
|
right-aligned in columns of (fixed) colsize.
|
|
|
|
Usage: lineincols (inlist,colsize) where colsize is an integer
|
|
"""
|
|
outstr = ''
|
|
for item in inlist:
|
|
if type(item) <> StringType:
|
|
item = str(item)
|
|
size = len(item)
|
|
if size <= colsize:
|
|
for i in range(colsize - size):
|
|
outstr = outstr + ' '
|
|
outstr = outstr + item
|
|
else:
|
|
outstr = outstr + item[0:colsize + 1]
|
|
return outstr
|
|
|
|
|
|
def lineincustcols(inlist, colsizes):
|
|
"""
|
|
Returns a string composed of elements in inlist, with each element
|
|
right-aligned in a column of width specified by a sequence colsizes. The
|
|
length of colsizes must be greater than or equal to the number of columns
|
|
in inlist.
|
|
|
|
Usage: lineincustcols (inlist,colsizes)
|
|
Returns: formatted string created from inlist
|
|
"""
|
|
outstr = ''
|
|
for i in range(len(inlist)):
|
|
if type(inlist[i]) <> StringType:
|
|
item = str(inlist[i])
|
|
else:
|
|
item = inlist[i]
|
|
size = len(item)
|
|
if size <= colsizes[i]:
|
|
for j in range(colsizes[i] - size):
|
|
outstr = outstr + ' '
|
|
outstr = outstr + item
|
|
else:
|
|
outstr = outstr + item[0:colsizes[i] + 1]
|
|
return outstr
|
|
|
|
|
|
def list2string(inlist, delimit=' '):
|
|
"""
|
|
Converts a 1D list to a single long string for file output, using
|
|
the string.join function.
|
|
|
|
Usage: list2string (inlist,delimit=' ')
|
|
Returns: the string created from inlist
|
|
"""
|
|
stringlist = map(makestr, inlist)
|
|
return string.join(stringlist, delimit)
|
|
|
|
|
|
def makelol(inlist):
|
|
"""
|
|
Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
|
|
want to use put() to write a 1D list one item per line in the file.
|
|
|
|
Usage: makelol(inlist)
|
|
Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
|
|
"""
|
|
x = []
|
|
for item in inlist:
|
|
x.append([item])
|
|
return x
|
|
|
|
|
|
def makestr(x):
|
|
if type(x) <> StringType:
|
|
x = str(x)
|
|
return x
|
|
|
|
|
|
def printcc(lst, extra=2):
|
|
"""
|
|
Prints a list of lists in columns, customized by the max size of items
|
|
within the columns (max size of items in col, plus 'extra' number of spaces).
|
|
Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
|
|
respectively.
|
|
|
|
Usage: printcc (lst,extra=2)
|
|
Returns: None
|
|
"""
|
|
if type(lst[0]) not in [ListType, TupleType]:
|
|
lst = [lst]
|
|
rowstokill = []
|
|
list2print = copy.deepcopy(lst)
|
|
for i in range(len(lst)):
|
|
if lst[i] == [
|
|
'\n'
|
|
] or lst[i] == '\n' or lst[i] == 'dashes' or lst[i] == '' or lst[i] == ['']:
|
|
rowstokill = rowstokill + [i]
|
|
rowstokill.reverse() # delete blank rows from the end
|
|
for row in rowstokill:
|
|
del list2print[row]
|
|
maxsize = [0] * len(list2print[0])
|
|
for col in range(len(list2print[0])):
|
|
items = colex(list2print, col)
|
|
items = map(makestr, items)
|
|
maxsize[col] = max(map(len, items)) + extra
|
|
for row in lst:
|
|
if row == ['\n'] or row == '\n' or row == '' or row == ['']:
|
|
print
|
|
elif row == ['dashes'] or row == 'dashes':
|
|
dashes = [0] * len(maxsize)
|
|
for j in range(len(maxsize)):
|
|
dashes[j] = '-' * (maxsize[j] - 2)
|
|
print lineincustcols(dashes, maxsize)
|
|
else:
|
|
print lineincustcols(row, maxsize)
|
|
return None
|
|
|
|
|
|
def printincols(listoflists, colsize):
|
|
"""
|
|
Prints a list of lists in columns of (fixed) colsize width, where
|
|
colsize is an integer.
|
|
|
|
Usage: printincols (listoflists,colsize)
|
|
Returns: None
|
|
"""
|
|
for row in listoflists:
|
|
print lineincols(row, colsize)
|
|
return None
|
|
|
|
|
|
def pl(listoflists):
|
|
"""
|
|
Prints a list of lists, 1 list (row) at a time.
|
|
|
|
Usage: pl(listoflists)
|
|
Returns: None
|
|
"""
|
|
for row in listoflists:
|
|
if row[-1] == '\n':
|
|
print row,
|
|
else:
|
|
print row
|
|
return None
|
|
|
|
|
|
def printl(listoflists):
|
|
"""Alias for pl."""
|
|
pl(listoflists)
|
|
return
|
|
|
|
|
|
def replace(inlst, oldval, newval):
|
|
"""
|
|
Replaces all occurrences of 'oldval' with 'newval', recursively.
|
|
|
|
Usage: replace (inlst,oldval,newval)
|
|
"""
|
|
lst = inlst * 1
|
|
for i in range(len(lst)):
|
|
if type(lst[i]) not in [ListType, TupleType]:
|
|
if lst[i] == oldval:
|
|
lst[i] = newval
|
|
else:
|
|
lst[i] = replace(lst[i], oldval, newval)
|
|
return lst
|
|
|
|
|
|
def recode(inlist, listmap, cols=None):
|
|
"""
|
|
Changes the values in a list to a new set of values (useful when
|
|
you need to recode data from (e.g.) strings to numbers. cols defaults
|
|
to None (meaning all columns are recoded).
|
|
|
|
Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
|
|
Returns: inlist with the appropriate values replaced with new ones
|
|
"""
|
|
lst = copy.deepcopy(inlist)
|
|
if cols != None:
|
|
if type(cols) not in [ListType, TupleType]:
|
|
cols = [cols]
|
|
for col in cols:
|
|
for row in range(len(lst)):
|
|
try:
|
|
idx = colex(listmap, 0).index(lst[row][col])
|
|
lst[row][col] = listmap[idx][1]
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
for row in range(len(lst)):
|
|
for col in range(len(lst)):
|
|
try:
|
|
idx = colex(listmap, 0).index(lst[row][col])
|
|
lst[row][col] = listmap[idx][1]
|
|
except ValueError:
|
|
pass
|
|
return lst
|
|
|
|
|
|
def remap(listoflists, criterion):
|
|
"""
|
|
Remaps values in a given column of a 2D list (listoflists). This requires
|
|
a criterion as a function of 'x' so that the result of the following is
|
|
returned ... map(lambda x: 'criterion',listoflists).
|
|
|
|
Usage: remap(listoflists,criterion) criterion=string
|
|
Returns: remapped version of listoflists
|
|
"""
|
|
function = 'map(lambda x: ' + criterion + ',listoflists)'
|
|
lines = eval(function)
|
|
return lines
|
|
|
|
|
|
def roundlist(inlist, digits):
|
|
"""
|
|
Goes through each element in a 1D or 2D inlist, and applies the following
|
|
function to all elements of FloatType ... round(element,digits).
|
|
|
|
Usage: roundlist(inlist,digits)
|
|
Returns: list with rounded floats
|
|
"""
|
|
if type(inlist[0]) in [IntType, FloatType]:
|
|
inlist = [inlist]
|
|
l = inlist * 1
|
|
for i in range(len(l)):
|
|
for j in range(len(l[i])):
|
|
if type(l[i][j]) == FloatType:
|
|
l[i][j] = round(l[i][j], digits)
|
|
return l
|
|
|
|
|
|
def sortby(listoflists, sortcols):
|
|
"""
|
|
Sorts a list of lists on the column(s) specified in the sequence
|
|
sortcols.
|
|
|
|
Usage: sortby(listoflists,sortcols)
|
|
Returns: sorted list, unchanged column ordering
|
|
"""
|
|
newlist = abut(colex(listoflists, sortcols), listoflists)
|
|
newlist.sort()
|
|
try:
|
|
numcols = len(sortcols)
|
|
except TypeError:
|
|
numcols = 1
|
|
crit = '[' + str(numcols) + ':]'
|
|
newlist = colex(newlist, crit)
|
|
return newlist
|
|
|
|
|
|
def unique(inlist):
|
|
"""
|
|
Returns all unique items in the passed list. If the a list-of-lists
|
|
is passed, unique LISTS are found (i.e., items in the first dimension are
|
|
compared).
|
|
|
|
Usage: unique (inlist)
|
|
Returns: the unique elements (or rows) in inlist
|
|
"""
|
|
uniques = []
|
|
for item in inlist:
|
|
if item not in uniques:
|
|
uniques.append(item)
|
|
return uniques
|
|
|
|
|
|
def duplicates(inlist):
|
|
"""
|
|
Returns duplicate items in the FIRST dimension of the passed list.
|
|
|
|
Usage: duplicates (inlist)
|
|
"""
|
|
dups = []
|
|
for i in range(len(inlist)):
|
|
if inlist[i] in inlist[i + 1:]:
|
|
dups.append(inlist[i])
|
|
return dups
|
|
|
|
|
|
def nonrepeats(inlist):
|
|
"""
|
|
Returns items that are NOT duplicated in the first dim of the passed list.
|
|
|
|
Usage: nonrepeats (inlist)
|
|
"""
|
|
nonrepeats = []
|
|
for i in range(len(inlist)):
|
|
if inlist.count(inlist[i]) == 1:
|
|
nonrepeats.append(inlist[i])
|
|
return nonrepeats
|
|
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
#=================== PSTAT ARRAY FUNCTIONS =====================
|
|
|
|
try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE
|
|
import numpy as N
|
|
|
|
def aabut(source, *args):
|
|
"""
|
|
Like the |Stat abut command. It concatenates two arrays column-wise
|
|
and returns the result. CAUTION: If one array is shorter, it will be
|
|
repeated until it is as long as the other.
|
|
|
|
Usage: aabut (source, args) where args=any # of arrays
|
|
Returns: an array as long as the LONGEST array past, source appearing on the
|
|
'left', arrays in <args> attached on the 'right'.
|
|
"""
|
|
if len(source.shape) == 1:
|
|
width = 1
|
|
source = N.resize(source, [source.shape[0], width])
|
|
else:
|
|
width = source.shape[1]
|
|
for addon in args:
|
|
if len(addon.shape) == 1:
|
|
width = 1
|
|
addon = N.resize(addon, [source.shape[0], width])
|
|
else:
|
|
width = source.shape[1]
|
|
if len(addon) < len(source):
|
|
addon = N.resize(addon, [source.shape[0], addon.shape[1]])
|
|
elif len(source) < len(addon):
|
|
source = N.resize(source, [addon.shape[0], source.shape[1]])
|
|
source = N.concatenate((source, addon), 1)
|
|
return source
|
|
|
|
def acolex(a, indices, axis=1):
|
|
"""
|
|
Extracts specified indices (a list) from passed array, along passed
|
|
axis (column extraction is default). BEWARE: A 1D array is presumed to be a
|
|
column-array (and that the whole array will be returned as a column).
|
|
|
|
Usage: acolex (a,indices,axis=1)
|
|
Returns: the columns of a specified by indices
|
|
"""
|
|
if type(indices) not in [ListType, TupleType, N.ndarray]:
|
|
indices = [indices]
|
|
if len(N.shape(a)) == 1:
|
|
cols = N.resize(a, [a.shape[0], 1])
|
|
else:
|
|
# print a[:3]
|
|
cols = N.take(a, indices, axis)
|
|
# print cols[:3]
|
|
return cols
|
|
|
|
def acollapse(a, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None):
|
|
"""
|
|
Averages data in collapsecol, keeping all unique items in keepcols
|
|
(using unique, which keeps unique LISTS of column numbers), retaining
|
|
the unique sets of values in keepcols, the mean for each. If stderror or
|
|
N of the mean are desired, set either or both parameters to 1.
|
|
|
|
Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
|
|
Returns: unique 'conditions' specified by the contents of columns specified
|
|
by keepcols, abutted with the mean(s) of column(s) specified by
|
|
collapsecols
|
|
"""
|
|
|
|
def acollmean(inarray):
|
|
return N.sum(N.ravel(inarray))
|
|
|
|
if type(keepcols) not in [ListType, TupleType, N.ndarray]:
|
|
keepcols = [keepcols]
|
|
if type(collapsecols) not in [ListType, TupleType, N.ndarray]:
|
|
collapsecols = [collapsecols]
|
|
|
|
if cfcn == None:
|
|
cfcn = acollmean
|
|
if keepcols == []:
|
|
avgcol = acolex(a, collapsecols)
|
|
means = N.sum(avgcol) / float(len(avgcol))
|
|
if fcn1 <> None:
|
|
try:
|
|
test = fcn1(avgcol)
|
|
except:
|
|
test = N.array(['N/A'] * len(means))
|
|
means = aabut(means, test)
|
|
if fcn2 <> None:
|
|
try:
|
|
test = fcn2(avgcol)
|
|
except:
|
|
test = N.array(['N/A'] * len(means))
|
|
means = aabut(means, test)
|
|
return means
|
|
else:
|
|
if type(keepcols) not in [ListType, TupleType, N.ndarray]:
|
|
keepcols = [keepcols]
|
|
values = colex(a, keepcols) # so that "item" can be appended (below)
|
|
uniques = unique(values) # get a LIST, so .sort keeps rows intact
|
|
uniques.sort()
|
|
newlist = []
|
|
for item in uniques:
|
|
if type(item) not in [ListType, TupleType, N.ndarray]:
|
|
item = [item]
|
|
tmprows = alinexand(a, keepcols, item)
|
|
for col in collapsecols:
|
|
avgcol = acolex(tmprows, col)
|
|
item.append(acollmean(avgcol))
|
|
if fcn1 <> None:
|
|
try:
|
|
test = fcn1(avgcol)
|
|
except:
|
|
test = 'N/A'
|
|
item.append(test)
|
|
if fcn2 <> None:
|
|
try:
|
|
test = fcn2(avgcol)
|
|
except:
|
|
test = 'N/A'
|
|
item.append(test)
|
|
newlist.append(item)
|
|
try:
|
|
new_a = N.array(newlist)
|
|
except TypeError:
|
|
new_a = N.array(newlist, 'O')
|
|
return new_a
|
|
|
|
def adm(a, criterion):
|
|
"""
|
|
Returns rows from the passed list of lists that meet the criteria in
|
|
the passed criterion expression (a string as a function of x).
|
|
|
|
Usage: adm (a,criterion) where criterion is like 'x[2]==37'
|
|
"""
|
|
function = 'filter(lambda x: ' + criterion + ',a)'
|
|
lines = eval(function)
|
|
try:
|
|
lines = N.array(lines)
|
|
except:
|
|
lines = N.array(lines, dtype='O')
|
|
return lines
|
|
|
|
def isstring(x):
|
|
if type(x) == StringType:
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
def alinexand(a, columnlist, valuelist):
|
|
"""
|
|
Returns the rows of an array where col (from columnlist) = val
|
|
(from valuelist). One value is required for each column in columnlist.
|
|
|
|
Usage: alinexand (a,columnlist,valuelist)
|
|
Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
|
|
"""
|
|
if type(columnlist) not in [ListType, TupleType, N.ndarray]:
|
|
columnlist = [columnlist]
|
|
if type(valuelist) not in [ListType, TupleType, N.ndarray]:
|
|
valuelist = [valuelist]
|
|
criterion = ''
|
|
for i in range(len(columnlist)):
|
|
if type(valuelist[i]) == StringType:
|
|
critval = '\'' + valuelist[i] + '\''
|
|
else:
|
|
critval = str(valuelist[i])
|
|
criterion = criterion + ' x[' + str(columnlist[
|
|
i]) + ']==' + critval + ' and'
|
|
criterion = criterion[0:-3] # remove the "and" after the last crit
|
|
return adm(a, criterion)
|
|
|
|
def alinexor(a, columnlist, valuelist):
|
|
"""
|
|
Returns the rows of an array where col (from columnlist) = val (from
|
|
valuelist). One value is required for each column in columnlist.
|
|
The exception is if either columnlist or valuelist has only 1 value,
|
|
in which case that item will be expanded to match the length of the
|
|
other list.
|
|
|
|
Usage: alinexor (a,columnlist,valuelist)
|
|
Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
|
|
"""
|
|
if type(columnlist) not in [ListType, TupleType, N.ndarray]:
|
|
columnlist = [columnlist]
|
|
if type(valuelist) not in [ListType, TupleType, N.ndarray]:
|
|
valuelist = [valuelist]
|
|
criterion = ''
|
|
if len(columnlist) == 1 and len(valuelist) > 1:
|
|
columnlist = columnlist * len(valuelist)
|
|
elif len(valuelist) == 1 and len(columnlist) > 1:
|
|
valuelist = valuelist * len(columnlist)
|
|
for i in range(len(columnlist)):
|
|
if type(valuelist[i]) == StringType:
|
|
critval = '\'' + valuelist[i] + '\''
|
|
else:
|
|
critval = str(valuelist[i])
|
|
criterion = criterion + ' x[' + str(columnlist[
|
|
i]) + ']==' + critval + ' or'
|
|
criterion = criterion[0:-2] # remove the "or" after the last crit
|
|
return adm(a, criterion)
|
|
|
|
def areplace(a, oldval, newval):
|
|
"""
|
|
Replaces all occurrences of oldval with newval in array a.
|
|
|
|
Usage: areplace(a,oldval,newval)
|
|
"""
|
|
return N.where(a == oldval, newval, a)
|
|
|
|
def arecode(a, listmap, col='all'):
|
|
"""
|
|
Remaps the values in an array to a new set of values (useful when
|
|
you need to recode data from (e.g.) strings to numbers as most stats
|
|
packages require. Can work on SINGLE columns, or 'all' columns at once.
|
|
@@@BROKEN 2007-11-26
|
|
|
|
Usage: arecode (a,listmap,col='all')
|
|
Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
|
|
"""
|
|
ashape = a.shape
|
|
if col == 'all':
|
|
work = a.ravel()
|
|
else:
|
|
work = acolex(a, col)
|
|
work = work.ravel()
|
|
for pair in listmap:
|
|
if type(pair[
|
|
1]) == StringType or work.dtype.char == 'O' or a.dtype.char == 'O':
|
|
work = N.array(work, dtype='O')
|
|
a = N.array(a, dtype='O')
|
|
for i in range(len(work)):
|
|
if work[i] == pair[0]:
|
|
work[i] = pair[1]
|
|
if col == 'all':
|
|
return N.reshape(work, ashape)
|
|
else:
|
|
return N.concatenate(
|
|
[a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1)
|
|
else: # must be a non-Object type array and replacement
|
|
work = N.where(work == pair[0], pair[1], work)
|
|
return N.concatenate(
|
|
[a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1)
|
|
|
|
def arowcompare(row1, row2):
|
|
"""
|
|
Compares two rows from an array, regardless of whether it is an
|
|
array of numbers or of python objects (which requires the cmp function).
|
|
@@@PURPOSE? 2007-11-26
|
|
|
|
Usage: arowcompare(row1,row2)
|
|
Returns: an array of equal length containing 1s where the two rows had
|
|
identical elements and 0 otherwise
|
|
"""
|
|
return
|
|
if row1.dtype.char == 'O' or row2.dtype == 'O':
|
|
cmpvect = N.logical_not(
|
|
abs(N.array(map(cmp, row1, row2)))) # cmp fcn gives -1,0,1
|
|
else:
|
|
cmpvect = N.equal(row1, row2)
|
|
return cmpvect
|
|
|
|
def arowsame(row1, row2):
|
|
"""
|
|
Compares two rows from an array, regardless of whether it is an
|
|
array of numbers or of python objects (which requires the cmp function).
|
|
|
|
Usage: arowsame(row1,row2)
|
|
Returns: 1 if the two rows are identical, 0 otherwise.
|
|
"""
|
|
cmpval = N.alltrue(arowcompare(row1, row2))
|
|
return cmpval
|
|
|
|
def asortrows(a, axis=0):
|
|
"""
|
|
Sorts an array "by rows". This differs from the Numeric.sort() function,
|
|
which sorts elements WITHIN the given axis. Instead, this function keeps
|
|
the elements along the given axis intact, but shifts them 'up or down'
|
|
relative to one another.
|
|
|
|
Usage: asortrows(a,axis=0)
|
|
Returns: sorted version of a
|
|
"""
|
|
return N.sort(a, axis=axis, kind='mergesort')
|
|
|
|
def aunique(inarray):
|
|
"""
|
|
Returns unique items in the FIRST dimension of the passed array. Only
|
|
works on arrays NOT including string items.
|
|
|
|
Usage: aunique (inarray)
|
|
"""
|
|
uniques = N.array([inarray[0]])
|
|
if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY
|
|
for item in inarray[1:]:
|
|
if N.add.reduce(N.equal(uniques, item).ravel()) == 0:
|
|
try:
|
|
uniques = N.concatenate([uniques, N.array[N.newaxis, :]])
|
|
except TypeError:
|
|
uniques = N.concatenate([uniques, N.array([item])])
|
|
else: # IT MUST BE A 2+D ARRAY
|
|
if inarray.dtype.char != 'O': # not an Object array
|
|
for item in inarray[1:]:
|
|
if not N.sum(N.alltrue(N.equal(uniques, item), 1)):
|
|
try:
|
|
uniques = N.concatenate([uniques, item[N.newaxis, :]])
|
|
except TypeError: # the item to add isn't a list
|
|
uniques = N.concatenate([uniques, N.array([item])])
|
|
else:
|
|
pass # this item is already in the uniques array
|
|
else: # must be an Object array, alltrue/equal functions don't work
|
|
for item in inarray[1:]:
|
|
newflag = 1
|
|
for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=>
|
|
test = N.sum(abs(N.array(map(cmp, item, unq))))
|
|
if test == 0: # if item identical to any 1 row in uniques
|
|
newflag = 0 # then not a novel item to add
|
|
break
|
|
if newflag == 1:
|
|
try:
|
|
uniques = N.concatenate([uniques, item[N.newaxis, :]])
|
|
except TypeError: # the item to add isn't a list
|
|
uniques = N.concatenate([uniques, N.array([item])])
|
|
return uniques
|
|
|
|
def aduplicates(inarray):
|
|
"""
|
|
Returns duplicate items in the FIRST dimension of the passed array. Only
|
|
works on arrays NOT including string items.
|
|
|
|
Usage: aunique (inarray)
|
|
"""
|
|
inarray = N.array(inarray)
|
|
if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY
|
|
dups = []
|
|
inarray = inarray.tolist()
|
|
for i in range(len(inarray)):
|
|
if inarray[i] in inarray[i + 1:]:
|
|
dups.append(inarray[i])
|
|
dups = aunique(dups)
|
|
else: # IT MUST BE A 2+D ARRAY
|
|
dups = []
|
|
aslist = inarray.tolist()
|
|
for i in range(len(aslist)):
|
|
if aslist[i] in aslist[i + 1:]:
|
|
dups.append(aslist[i])
|
|
dups = unique(dups)
|
|
dups = N.array(dups)
|
|
return dups
|
|
|
|
except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
|
|
pass
|