user, if you need to deal with the fixed format right now, you can use something like the following:
def fixed_width_to_items(filename, fields, first_column_is_index=False, ignore_first_rows=0):
reader = open(filename, 'r')
# skip first rows
for i in xrange(ignore_first_rows):
reader.next()
if first_column_is_index:
index = slice(0, fields[1])
fields = [slice(*x) for x in zip(fields[1:-1], fields[2:])]
return ((line[index], [line[x].strip() for x in fields]) for line in reader)
else:
fields = [slice(*x) for x in zip(fields[:-1], fields[1:])]
return ((i, [line[x].strip() for x in fields]) for i,line in enumerate(reader))
Here's a test program:
import pandas
import numpy
import tempfile
# create a data frame
df = pandas.DataFrame(numpy.random.randn(100, 5))
file_ = tempfile.NamedTemporaryFile(delete=True)
file_.write(df.to_string())
file_.flush()
# specify fields
fields = [0, 3, 12, 22, 32, 42, 52]
df2 = pandas.DataFrame.from_items( fixed_width_to_items(file_.name, fields, first_column_is_index=True, ignore_first_rows=1) ).T
# need to specify the datatypes, otherwise everything is a string
df2 = pandas.DataFrame(df2, dtype=float)
df2.index = [int(x) for x in df2.index]
# check
assert (df - df2).abs().max().max() < 1E-6
This should do the trick if you need it right now, but bear in mind that the function above is very simple, in particular it doesn't do anything about data types.