# files.py # Files # While a program is running, its data is in memory. When the program # ends, or the computer shuts down, data in memory disappears. To store # data permanently, you have to save it in a file. Files are usually # stored on a hard drive, usb drive, or CD. # When there are a large number of files, they are often organized into # directories (also called folders) in the file system of your computer. # Each file is identified by a unique name, or a combination of a file # name and a directory name. # Working with files is a lot like working with notebooks. To use a # notebook, you have to open it. When you're done, you have to close # it. While the notebook is open, you can either write in it or read # from it. In either case, you know where you are in the notebook. # Most of the time, you read the whole notebook in its natural order, # but you can also skip around. # All of this applies to files as well. To open a file, you specify # its name and indicate whether you want to read or write. # # Opening a file creates a file object. In this example, the variable # f refers to the new file object. f = open("test.dat", "w") print f # # The open function takes two arguments. The first is the name of # the file, and the second is the mode. Mode "w" means that we are # opening the file for writing. # If there is no file named test.dat, it will be created when you are # accessing it with the 'w' mode. If there already is one, it will be # replaced by the file we are writing. # When we print the file object, we see the name of the file, the # mode, and the location of the object. # To put data in the file we invoke the write method on the file object: f.write("Now is the time") f.write("to close the file") # Closing the file tells the system that we are done writing and makes # the file available for reading: f.close() # Now we can open the file again, this time for reading, and read the # contents into a string. This time, the mode argument is "r" for # reading: f = open("test.dat", "r") # If we try to open a file with the read mode that doesn't exist, we get # an error: f = open("test.cat","r") # IOError: [Errno 2] No such file or directory: 'test.cat' # Not surprisingly, the read method reads data from the file. With no # arguments, it reads the entire contents of the file: text = f.read() print text # Now is the timeto close the file # read can also take an argument that indicates how many characters # to read: f = open("test.dat","r") print f.read(5) # Now i # If not enough characters are left in the file, read returns the # remaining characters. When we get to the end of the file, read # returns the empty string: print f.read(1000006) # s the timeto close the file print f.read() # # Writing a function that copies a file: It reads and writes up to # fifty characters at a time. The first argument is the name of # the original file; the second is the name of the new file: def copyFile(oldFile, newFile): f1 = open(oldFile, "r") f2 = open(newFile, "w") while True: # infinite loop text = f1.read(50) if text == "": break # break out of the current loop f2.write(text) f1.close() f2.close() return # The break statement is new. Executing it breaks out of the loop; # the flow of execution moves to the first statement after the loop. # In this example, the while loop is infinite because the value True # is always true. The only way to get out of the loop is to execute # break, which happens when text is the empty string, which happens # when we get to the end of the file. # # Text files # # A text file is a file that contains printable characters and # whitespace, organized into lines separated by newline characters. # Since Python is specifically designed to process text files, it # provides methods that make the job easy. # # To demonstrate, we'll create a text file with three lines of text # separated by newlines: f = open("test1.dat", "w") f.write("line one\nline two\nline three\n") f.close() # The readline method reads all the characters up to and including # the next newline character ('\n'): f = open("test1.dat", "r") print f.readline() line one # readlines returns all of the remaining lines as a list of strings: # print f.readlines() ['line two\012', 'line three\012'] # In this case, the output is in list format, which means that the # strings appear with quotation marks and the newline character appears # as the escape sequence \\012. # At the end of the file, readline returns the empty string and # readlines returns the empty list: # >>> print f.readline() # # >>> print f.readlines() # [] # Exercise 1: # # The following is an example of a line-processing program. filterFile # makes a copy of oldFile, omitting any lines that begin with a #: def filterFile(oldFile, newFile): f1 = open(oldFile, "r") f2 = open(newFile, "w") while True: text = f1.readline() if text == "": break if text[0] == '#': continue f2.write(text) f1.close() f2.close() return # The continue statement ends the current iteration of the loop, but # continues looping. The flow of execution moves to the top of the # loop, checks the condition, and proceeds accordingly. # Thus, if text is the empty string, the loop exits. If the first # character of text is a hash mark, the flow of execution goes to # the top of the loop. Only if both conditions fail do we copy text # into the new file. ############################## # # Writing variables # # The argument of 'write' has to be a string, so if we want to put # other values in a file, we have to convert them to strings first. # The easiest way to do that is with the str function: # >>> x = 52 # >>> f.write (str(x)) # An alternative is to use the format operator, %. When applied to # integers, % is the modulus operator. But when the first operand # is a string, % is the format operator. # The first operand is the format string, and the second operand is # a tuple of expressions. The result is a string that contains the # values of the expressions, formatted according to the format string. # As a simple example, the format sequence "%d" means that the first # expression in the tuple should be formatted as an integer. Here # the letter d stands for decimal: # >>> cars = 52 # >>> "%d" % cars # '52' # The result is the string '52', which is not to be confused with # the integer value 52. # The format sequence "%f" formats the next item in the tuple as a # floating-point number, and "%s" formats the next item as a string: # # >>> "In %d days we made %f million %s." % (34,6.1,'dollars') # 'In 34 days we made 6.100000 million dollars.' ################################# # # Pickling # # In order to put values into a file, you have to convert them to # strings. You have already seen how to do that with str: # >>> f.write (str(12.3)) # >>> f.write (str([1,2,3])) # The problem is that when you read the value back, you get a string. # The original type information has been lost. In fact, you can't # even tell where one value ends and the next begins as you can see # below: # >>> f.readline() # '12.3[1, 2, 3]' # The solution is 'pickling', so called because it preserves data # structures. The pickle module contains the necessary commands. # To use it, import pickle and then open the file in the usual way: # >>> import pickle # >>> f = open("test.pck", "w") # To store a data structure, use the dump method and then close the # file in the usual way: # >>> pickle.dump(12.3, f) # >>> pickle.dump([1,2,3], f) # >>> f.close() # Then we can open the file for reading and load the data structures # we dumped: # >>> f = open("test.pck","r") # >>> x = pickle.load(f) # >>> x # 12.3 # >>> type(x) # # >>> y = pickle.load(f) # >>> y # [1, 2, 3] # >>> type(y) # # Each time we invoke load, we get a single value from the file, # complete with its original type. # Exercise 2: # # For more exercises, see Section 10.13 of [DEM].