Donn Lee Python Cookbook

This article is a collection of code examples based on programming done in the past.

Get the size (in bytes) of a file

FILE = '/home/donn/postip/log.txt'
try:
filesize = os.path.getsize(FILE)
except:
print "Error getting size of file %s." % FILE
sys.exit()
if filesize > 5000000:
print "That's a big file!"

Get list of files in a directory (Windows):

myfiles = os.listdir(‘c:\\tmp’)
for f in myfiles:
   print f
   # Do other stuff to files. Maybe rename them with os.rename(oldpath, newpath).

Make a subdirectory if it doesn’t exist

import os, os.path

if not os.path.exists(dir):
print "Making dirs for %s." % dir
os.makedirs(dir)

Get and Print the timestamp of a file (last modify time)

from time import *

i_tmp = os.path.getmtime('myfile.txt')
t_tmp = localtime(i_tmp)
tstamp = strftime("%m/%d/%Y %H:%M:%S", t_tmp)

Run a unix command (do a shell command)

import commands
FILE = 'myfile.txt'
DIRROOT = '/home/donn'
cmd = 'cp -p %s/%s %s/tmp.txt' % (DIRROOT, FILE, DIRROOT)
print 'About to execute command: %s' % cmd
output = commands.getoutput(cmd)
print 'File %s copied as tmp.txt' % FILE
# Optionally, print the output of the command:
print 'Output of the command was: %s' % output

Grab the html of a URL

url = "http://search.ebay.com/search/search.dll?query=pentium&qty=y"
# Get search results.
try:
f = urllib.urlopen(url)
except:
print "HTTP client, urlopen error."
sys.exit()
data = f.read()

# Print the html.
print 'HTML received was:'
print data

# Search the html.
my_regex = r'mint\s+condition'   # Use raw strings with regex!
if re.search(my_regex, data):
print 'String match found!'
else:
print 'No matches.'

Extract text you want from a long string

long_string = 'The quick brown fox on 212 Main Street jumped over 1024 lazy dogs.'
# We want to extract only the numbers from the string.
num_list = re.findall(r'[0-9]+', long_string)
print "List of numbers found: %s" % num_list
print 'First number is: %s' % num_list[0]
print 'All numbers are:'
for num in num_list:
print num

Extracting with more precision using regular expressions (Ignoring regex parenthesis; Advanced regex)

long_string = '64.4.233.10, 172.16.166.55, 12.8.32.1 1024 blah 19.95'

# Only extract the first number (first octet) of IP addresses.
# Don't extract other numbers like '1024' and '19'.

# Use "(?:" to suppress capturing parenethesis.
# Use "\." to match a dot (suppress regex meaning of ".")
# "{3}": Look for 3 iterations (3 iterations of a dot followed by a number).
my_regex = r'([0-9]+)(?:\.[0-9]+){3}'

first_digits = re.findall(my_regex, long_string)
print "First number of each IP address: %s" % first_digits

# Remove "?:" from my_regex and see the difference!

Remove Ctrl-M (^M) characters from a line (remove a char by octal number)

new_line = re.sub('\033', '', line)

Script to remove Ctrl-M (^M) characters from a line

#!/usr/bin/python
import sys, re

if len(sys.argv) != 2:
print "Usage: this_script.py <input_file>"
sys.exit()

(tmp,FILE) = sys.argv

f = open(FILE, 'r')
data = f.readlines()
f.close()

newdata = []

for line in data:
s = re.sub('\015', '' , line)
newdata.append(s)

fo = open("%s.new" % FILE, 'w')

for line in newdata:
print line
fo.write(line)

fo.close()
print "Converted text saved as %s.new" % FILE

Send an email using smtplib module (comes with Python)

import smtplib, string

FROMADDR = 'donn@foo.com'        # Author is Donn Lee.
SMTPSERVER = 'mail.myisp.net'    # Put your own email server here.

def EmailOut(toaddrs, subj, msg):
# SIMPLE FUNCTION THAT SENDS EMAIL.
# toaddrs MUST be a python list of email addresses.

   # Convert list to string.
s_toaddrs = string.join(toaddrs, ",")
# Convert msg to smtp format.
msg = """\
To: %s
From: %s
Subject: %s

%s
""" % (s_toaddrs, FROMADDR, subj, msg)
try:
server = smtplib.SMTP(SMTPSERVER)
# If your mail server requires a username/login, you'll need the following line.
#server.login('donnlee', 'mypassword')
server.sendmail(FROMADDR, toaddrs, msg)
server.quit()
except:
print "ERROR SENDING EMAIL! SMTP ERROR."
sys.exit()
# End of fn EmailOut().

toaddrs = ['shirley@foo.com', 'mom@somewhere.com']
subj = 'Greetings from Hawaii!'
msg = """\
Hello family,
Weather here has been great.
Unfortunately, I don't have access to any fancy email clients,
so I'm writing this email with this crazy unix server I found.
"""

# Send the email.
EmailOut(toaddrs, subj, msg)

Sending email using Bcc field
Similar idea to EmailOut() function, but this time there are separate lists for “To:” and “Bcc:”

import smtplib, string

FROMADDR = 'donn@foo.com'       # Author is Donn Lee.
SMTPSERVER = 'mail.myisp.net' # Put your own email server here.

def EmailBcc(to_addrs, bcc_addrs, subj, msg):
# SIMPLE FUNCTION THAT SENDS EMAIL.
# Combine the lists for smtplib.
total_addrs = []
for i in to_addrs:
total_addrs.append(i)
for i in bcc_addrs:
total_addrs.append(i)

   print 'All receipients are: %s' % total_addrs

   # Convert lists to strings for msg.
s_toaddrs = string.join(to_addrs, ",")
s_bccaddrs = string.join(bcc_addrs, ",")

   # Convert msg to smtp format.
msg = """\
To: %s
From: %s
Subject: %s
Bcc: %s

%s
""" % (s_toaddrs, FROMADDR, subj, s_bccaddrs, msg)

   print 'SMTP message text is:'; print msg
try:
print 'Sending email...'
server = smtplib.SMTP(SMTPSERVER)
# If your mail server requires a username/login, you'll need the following line.
#server.login('donnlee', 'mypassword')
server.sendmail(FROMADDR, total_addrs, msg)
server.quit()
print 'Email sent!'
except:
print "ERROR SENDING EMAIL! SMTP ERROR."
sys.exit()

toaddrs = ['joe@gmail.com']
bccaddrs = ['bob@gmail.com', 'sally@gmail.com']
subj = 'Greetings from Hawaii!'
msg = """\
Hello family,
Weather here has been great.
Unfortunately, I don't have access to any fancy email clients,
so I'm writing this email with this crazy unix server I found.
"""

# Send the email.
EmailBcc(toaddrs, bccaddrs, subj, msg)

Read an email distribution list (simple text file, one address per line)

DIST_LIST = 'mydist.txt'

def ReadDistribution():
try:
f = open(DIST_LIST, 'r')
data = f.readlines()
f.close()
except:
print "Problem reading file %s." % DIST_LIST
# Don't quit abruptly so script stays alive.
return ['foo@foo.com', 'donn.lee@foo.com']
tmp = []
for i in data:
# Remove trailing CR.
tmp.append(i[:-1])
# Return a list of email addresses.
return tmp

Write a simple UDP server

import SocketServer, socket, string

class EchoHandler(SocketServer.DatagramRequestHandler):
def handle(self):
# Write client's addr and port #.
self.wfile.write("Data from %s" % (self.client_address,))
bufsize = 1024   # May want to increase this.
while 1:
line = self.rfile.read()
print "Got from client %s: %s" % (self.client_address,line)
if not string.strip(line):
break

# Create server & start serving.
serv = SocketServer.UDPServer(("",60009), EchoHandler)
print "Starting udp server..."
serv.serve_forever()

Write a simple UDP client for the above server

import socket

# This simple client moves a line of text to the server using UDP.

# IP address of the UDP server.
SERVER = '10.1.1.1'
PORT = 60009

LINE = 'Hello world!'

s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect((SERVER, PORT))
print "Sending: %s" % LINE
s.send('%s\r\n' % LINE)
print "Shutting down socket..."
s.shutdown(1)
print "End."

Gzip (compress) a list of files

import gzip
files = ['file1.txt', 'file2.txt', 'file3.txt']
for f in files:
print 'Compressing file %s' % f
input = open(f, 'rb')
output = gzip.open(f+'.gz', 'wb')
# Copy contents of input file.
while 1:
chunk = input.read(1024)
if chunk == "": break
output.write( chunk )
input.close(); output.close()
print "Done compressing files."

Print the current time in a nice, human-readable format

import time
print time.strftime('%H:%M:%S %a %b %d %Y', time.localtime(time.time()))

Will print:
23:01:22 Thu Jul 01 2004

Iterate through two lists with one FOR loop

>>> a=[1,2,3]
>>> b=['a','b','c']
>>> for (x,y) in map(None,a,b):
... print x,y
...
1 a
2 b
3 c