文件操作

文件操作是编程中常见的任务，Python 提供了丰富的文件处理功能。

文件读写

打开文件

使用 open() 函数打开文件，返回文件对象。

# 基本语法
# file = open(file_path, mode='r', encoding='utf-8')

# 打开文件（相对路径）
file = open("example.txt", "r", encoding="utf-8")

# 打开文件（绝对路径）
file = open("C:/Users/name/Desktop/example.txt", "r", encoding="utf-8")

# 使用原始字符串避免转义（Windows 路径）
file = open(r"C:\Users\name\Desktop\example.txt", "r", encoding="utf-8")

# 检查文件是否存在
import os

if os.path.exists("example.txt"):
    file = open("example.txt", "r", encoding="utf-8")
    print("文件打开成功")
else:
    print("文件不存在")

with 语句（推荐）

使用 with 语句自动管理文件的打开和关闭。

# 推荐做法：使用 with 语句
with open("example.txt", "r", encoding="utf-8") as file:
    content = file.read()
    print(content)

# 文件会自动关闭，即使发生异常
# 等价于但不推荐的传统写法
file = open("example.txt", "r", encoding="utf-8")
try:
    content = file.read()
    print(content)
finally:
    file.close()

# 同时打开多个文件
with open("input.txt", "r") as f_in, open("output.txt", "w") as f_out:
    content = f_in.read()
    f_out.write(content)

读取文件

read() - 读取全部内容

# 读取整个文件
with open("example.txt", "r", encoding="utf-8") as file:
    content = file.read()
    print(content)

# 限制读取的字符数
with open("example.txt", "r", encoding="utf-8") as file:
    first_100 = file.read(100)  # 只读前100个字符
    print(first_100)

# 读取大文件（可能占用大量内存）
# 不推荐用于大文件

readline() - 读取一行

# 读取一行
with open("example.txt", "r", encoding="utf-8") as file:
    line1 = file.readline()
    print(line1, end="")  # readline 保留换行符

    line2 = file.readline()
    print(line2, end="")

# 逐行读取（手动控制）
with open("example.txt", "r", encoding="utf-8") as file:
    while True:
        line = file.readline()
        if not line:  # 到达文件末尾
            break
        print(line, end="")

readlines() - 读取所有行

# 读取所有行到列表
with open("example.txt", "r", encoding="utf-8") as file:
    lines = file.readlines()
    print(lines)  # ['line1\n', 'line2\n', 'line3\n']

# 去除换行符
lines = [line.strip() for line in lines]
print(lines)  # ['line1', 'line2', 'line3']

# 限制读取的行数
with open("example.txt", "r", encoding="utf-8") as file:
    first_3_lines = file.readlines(3)  # 只读前3行
    print(first_3_lines)

遍历文件（推荐）

# 直接遍历文件对象（推荐）
with open("example.txt", "r", encoding="utf-8") as file:
    for line in file:
        print(line, end="")

# 这种方法内存友好，适合大文件

# 处理大文件的示例
with open("large_file.txt", "r", encoding="utf-8") as file:
    line_count = 0
    for line in file:
        line_count += 1
        if line_count % 1000 == 0:
            print(f"已处理 {line_count} 行")
    print(f"总共 {line_count} 行")

写入文件

write() - 写入字符串

# 写入字符串
with open("output.txt", "w", encoding="utf-8") as file:
    file.write("Hello, World!\n")
    file.write("This is a new file.\n")

# 注意：如果文件不存在会创建，如果存在会覆盖

# 写入多行
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open("output.txt", "w", encoding="utf-8") as file:
    file.writelines(lines)

# 或使用循环
with open("output.txt", "w", encoding="utf-8") as file:
    for line in lines:
        file.write(line)

追加模式

# 追加内容到文件末尾
with open("output.txt", "a", encoding="utf-8") as file:
    file.write("This line is appended.\n")

# 追加多行
with open("output.txt", "a", encoding="utf-8") as file:
    file.writelines(["Line A\n", "Line B\n"])

关闭文件

# 手动关闭（不推荐）
file = open("example.txt", "r", encoding="utf-8")
content = file.read()
file.close()  # 必须手动关闭

# 使用 with 自动关闭（推荐）
with open("example.txt", "r", encoding="utf-8") as file:
    content = file.read()
# 文件自动关闭

# 检查文件是否关闭
print(file.closed)  # True（with 块结束后）

# flush() - 刷新缓冲区
with open("output.txt", "w", encoding="utf-8") as file:
    file.write("Hello")
    file.flush()  # 立即写入磁盘，不等待缓冲区满

文件指针位置

with open("example.txt", "r", encoding="utf-8") as file:
    # tell() - 获取当前位置
    print(f"当前位置: {file.tell()}")  # 0

    # read(10) - 读取10个字符
    content = file.read(10)
    print(f"内容: {content}")
    print(f"当前位置: {file.tell()}")  # 10

    # seek() - 移动指针
    file.seek(0)  # 移动到文件开头
    print(f"seek后位置: {file.tell()}")  # 0

    # 读取全部
    content = file.read()
    print(content)

    # seek(10) - 移动到第10个字节
    file.seek(10)
    print(f"当前位置: {file.tell()}")  # 10

    # seek(0, 2) - 移动到文件末尾
    file.seek(0, 2)  # 0=相对文件开头, 1=当前位置, 2=相对文件末尾
    print(f"文件末尾: {file.tell()}")

文件模式

基本模式

# 'r' - 只读（默认）
# 文件必须存在，否则报错
with open("example.txt", "r", encoding="utf-8") as file:
    content = file.read()

# 'w' - 只写
# 文件不存在则创建，存在则清空
with open("output.txt", "w", encoding="utf-8") as file:
    file.write("New content")

# 'a' - 追加
# 文件不存在则创建，存在则追加到末尾
with open("output.txt", "a", encoding="utf-8") as file:
    file.write("Appended content")

# 'x' - 排他性创建
# 文件不存在则创建，存在则报错（防止覆盖）
try:
    with open("new_file.txt", "x", encoding="utf-8") as file:
        file.write("New file")
except FileExistsError:
    print("文件已存在")

读写模式

# 'r+' - 读写（文件必须存在）
with open("example.txt", "r+", encoding="utf-8") as file:
    content = file.read()  # 读取
    file.seek(0)           # 回到开头
    file.write("Updated")  # 写入

# 'w+' - 读写（清空文件）
with open("example.txt", "w+", encoding="utf-8") as file:
    file.write("New content")  # 写入
    file.seek(0)               # 回到开头
    content = file.read()      # 读取

# 'a+' - 读写追加（指针在末尾）
with open("example.txt", "a+", encoding="utf-8") as file:
    file.write("Appended\n")
    file.seek(0)  # 回到开头才能读取
    content = file.read()

二进制模式

# 'rb' - 二进制读取
with open("image.jpg", "rb") as file:
    data = file.read()
    print(f"文件大小: {len(data)} 字节")

# 'wb' - 二进制写入
with open("output.bin", "wb") as file:
    data = b'\x00\x01\x02\x03\x04'
    file.write(data)

# 'ab' - 二进制追加
with open("output.bin", "ab") as file:
    file.write(b'\x05\x06\x07')

# 复制文件（二进制模式）
with open("source.jpg", "rb") as f_in:
    data = f_in.read()
    with open("copy.jpg", "wb") as f_out:
        f_out.write(data)

文件模式总结

模式	描述	文件不存在	文件存在	指针位置
'r'	只读	报错	正常读取	开头
'w'	只写	创建	清空	开头
'a'	追加	创建	追加	末尾
'x'	创建	创建	报错	开头
'r+'	读写	报错	正常读写	开头
'w+'	读写	创建	清空	开头
'a+'	读写	创建	追加	末尾
'rb'	二进制读	报错	正常读取	开头
'wb'	二进制写	创建	清空	开头

路径处理

os 模块

os 模块提供了传统的路径操作功能。

import os

# 获取当前工作目录
cwd = os.getcwd()
print(f"当前目录: {cwd}")

# 改变工作目录
os.chdir("/path/to/directory")

# 创建目录
os.mkdir("new_dir")  # 创建单个目录
os.makedirs("parent/child/grandchild")  # 创建多级目录

# 删除目录
os.rmdir("empty_dir")  # 删除空目录
os.removedirs("parent/child/grandchild")  # 删除多级空目录

# 删除文件
os.remove("file.txt")

# 重命名文件或目录
os.rename("old_name.txt", "new_name.txt")

# 检查路径
print(os.path.exists("file.txt"))  # 是否存在
print(os.path.isfile("file.txt"))  # 是否是文件
print(os.path.isdir("folder"))     # 是否是目录

路径拼接

import os

# os.path.join() - 跨平台路径拼接
path = os.path.join("folder", "subfolder", "file.txt")
print(path)  # folder/subfolder/file.txt（Linux/Mac）
        # folder\subfolder\file.txt（Windows）

# 获取文件名和目录名
path = "/home/user/documents/file.txt"
dirname = os.path.dirname(path)   # /home/user/documents
basename = os.path.basename(path)  # file.txt

# 分割路径
dirname, filename = os.path.split(path)
print(dirname)  # /home/user/documents
print(filename)  # file.txt

# 分离文件名和扩展名
filename, ext = os.path.splitext("file.txt")
print(filename)  # file
print(ext)      # .txt

# 获取绝对路径
abs_path = os.path.abspath("file.txt")
print(abs_path)

# 规范化路径
norm_path = os.path.normpath("folder/./subfolder/../file.txt")
print(norm_path)  # folder/file

# 判断路径
print(os.path.isabs("/home/user"))  # True（绝对路径）
print(os.path.isabs("file.txt"))    # False（相对路径）

路径信息

import os

path = "/home/user/documents/file.txt"

# 获取文件大小（字节）
print(os.path.getsize(path))

# 获取修改时间
import time
mtime = os.path.getmtime(path)
print(time.ctime(mtime))  # 可读时间格式

# 判断各种属性
print(os.path.exists(path))     # 是否存在
print(os.path.isfile(path))      # 是否是文件
print(os.path.isdir(path))       # 是否是目录
print(os.path.islink(path))      # 是否是符号链接
print(os.path.ismount("/"))      # 是否是挂载点

# 遍历目录
for root, dirs, files in os.walk("folder"):
    print(f"目录: {root}")
    print(f"子目录: {dirs}")
    print(f"文件: {files}")
    print("---")

pathlib 模块（推荐）

pathlib 是 Python 3.4+ 提供的面向对象路径处理库，更易用。

from pathlib import Path

# 创建 Path 对象
path = Path("folder/subfolder/file.txt")

# 路径拼接
path = Path("folder") / "subfolder" / "file.txt"
print(path)  # folder/subfolder/file.txt

# 获取各个部分
print(path.name)       # file.txt（文件名）
print(path.stem)       # file（不含扩展名）
print(path.suffix)     # .txt（扩展名）
print(path.parent)     # folder/subfolder（父目录）
print(path.parents[1]) # folder（上级的上级）

# 获取绝对路径
abs_path = path.absolute()
print(abs_path)

# 检查路径
path.exists()    # 是否存在
path.is_file()   # 是否是文件
path.is_dir()    # 是否是目录

# 读写文件（推荐使用）
path = Path("example.txt")

# 读取文件
content = path.read_text(encoding="utf-8")

# 写入文件
path.write_text("Hello, World!", encoding="utf-8")

# 读写二进制
data = path.read_bytes()
path.write_bytes(b"Binary data")

# 创建目录
Path("new_dir").mkdir()
Path("parent/child").mkdir(parents=True, exist_ok=True)

pathlib 遍历目录

from pathlib import Path

# 遍历目录
path = Path("folder")

# 遍历所有文件
for file in path.glob("*.txt"):
    print(file)

# 递归遍历（包括子目录）
for file in path.rglob("*.txt"):
    print(file)

# 遍历所有内容
for item in path.iterdir():
    print(item)
    if item.is_file():
        print(f"文件: {item.name}")
    elif item.is_dir():
        print(f"目录: {item.name}")

# 递归遍历所有文件
for file in path.rglob("*"):
    if file.is_file():
        print(file)

实用示例

from pathlib import Path

# 批量重命名文件
folder = Path("photos")
for file in folder.glob("*.jpg"):
    new_name = file.stem + "_old" + file.suffix
    file.rename(folder / new_name)

# 查找并处理文件
for file in Path(".").rglob("*.log"):
    if file.stat().st_size > 1024 * 1024:  # 大于1MB
        print(f"大文件: {file}")

# 创建目录结构
project = Path("project")
project.mkdir(exist_ok=True)
(project / "src").mkdir(exist_ok=True)
(project / "tests").mkdir(exist_ok=True)
(project / "docs").mkdir(exist_ok=True)

# 清理空目录
for folder in Path(".").rglob("*"):
    if folder.is_dir() and not any(folder.iterdir()):
        print(f"删除空目录: {folder}")
        folder.rmdir()

JSON 操作

JSON（JavaScript Object Notation）是一种轻量级的数据交换格式。

读取 JSON

import json

# 读取 JSON 文件
with open("data.json", "r", encoding="utf-8") as file:
    data = json.load(file)
    print(data)
    print(type(data))  # dict 或 list

# 读取 JSON 字符串
json_string = '{"name": "Alice", "age": 25}'
data = json.loads(json_string)
print(data)  # {'name': 'Alice', 'age': 25}

# 读取带注释的 JSON（需要去除注释）
def load_json_with_comments(file_path):
    """读取带注释的 JSON 文件"""
    with open(file_path, "r", encoding="utf-8") as file:
        content = file.read()
        # 移除 // 单行注释
        lines = [line for line in content.split('\n') if '//' not in line]
        content = '\n'.join(lines)
        return json.loads(content)

写入 JSON

import json

# 写入 JSON 文件
data = {
    "name": "Alice",
    "age": 25,
    "city": "New York"
}

with open("output.json", "w", encoding="utf-8") as file:
    json.dump(data, file, ensure_ascii=False, indent=2)

# ensure_ascii=False: 支持中文
# indent=2: 美化输出

# 转换为 JSON 字符串
json_string = json.dumps(data, ensure_ascii=False, indent=2)
print(json_string)

# 不换行输出（紧凑格式）
json_string = json.dumps(data, separators=(",", ":"))
print(json_string)

处理复杂 JSON

import json
from pathlib import Path

# 读取配置文件
config_file = Path("config.json")
if config_file.exists():
    with open(config_file, "r", encoding="utf-8") as file:
        config = json.load(file)
    print(config)
else:
    # 创建默认配置
    config = {
        "database": {
            "host": "localhost",
            "port": 5432,
            "name": "mydb"
        },
        "debug": True,
        "max_connections": 10
    }
    with open(config_file, "w", encoding="utf-8") as file:
        json.dump(config, file, indent=2, ensure_ascii=False)

# 处理列表数据
users = [
    {"id": 1, "name": "Alice", "email": "alice@example.com"},
    {"id": 2, "name": "Bob", "email": "bob@example.com"},
    {"id": 3, "name": "Charlie", "email": "charlie@example.com"}
]

with open("users.json", "w", encoding="utf-8") as file:
    json.dump(users, file, indent=2, ensure_ascii=False)

# 读取并更新
with open("users.json", "r", encoding="utf-8") as file:
    users = json.load(file)

# 添加新用户
users.append({"id": 4, "name": "David", "email": "david@example.com"})

# 保存
with open("users.json", "w", encoding="utf-8") as file:
    json.dump(users, file, indent=2, ensure_ascii=False)

JSON 错误处理

import json

# 捕获 JSON 解析错误
try:
    with open("data.json", "r", encoding="utf-8") as file:
        data = json.load(file)
except json.JSONDecodeError as e:
    print(f"JSON 解析错误: {e}")
    print(f"错误位置: 行 {e.lineno}, 列 {e.colno}")
except FileNotFoundError:
    print("文件不存在")

# 验证 JSON 格式
def validate_json(file_path):
    """验证 JSON 文件格式是否正确"""
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            json.load(file)
        return True
    except json.JSONDecodeError:
        return False
    except FileNotFoundError:
        return False

# 使用
if validate_json("data.json"):
    print("JSON 格式正确")
else:
    print("JSON 格式错误")

CSV 操作

CSV（Comma-Separated Values）是一种常用的表格数据存储格式。

读取 CSV

import csv

# 基本读取
with open("data.csv", "r", encoding="utf-8") as file:
    reader = csv.reader(file)
    for row in reader:
        print(row)

# 读取到列表
with open("data.csv", "r", encoding="utf-8") as file:
    reader = csv.reader(file)
    data = list(reader)
    print(data)

# 跳过表头
with open("data.csv", "r", encoding="utf-8") as file:
    reader = csv.reader(file)
    header = next(reader)  # 读取表头
    for row in reader:
        print(row)

# 使用 DictReader（推荐）
with open("data.csv", "r", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    for row in reader:
        print(row["name"], row["age"])

# 获取字段名
with open("data.csv", "r", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    print(reader.fieldnames)  # ['name', 'age', 'city']

写入 CSV

import csv

# 写入列表数据
data = [
    ["Alice", 25, "New York"],
    ["Bob", 30, "London"],
    ["Charlie", 35, "Paris"]
]

with open("output.csv", "w", encoding="utf-8", newline="") as file:
    writer = csv.writer(file)
    writer.writerows(data)

# 写入带表头的数据
with open("output.csv", "w", encoding="utf-8", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Name", "Age", "City"])
    writer.writerows(data)

# 使用 DictWriter（推荐）
with open("output.csv", "w", encoding="utf-8", newline="") as file:
    fieldnames = ["name", "age", "city"]
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows([
        {"name": "Alice", "age": 25, "city": "New York"},
        {"name": "Bob", "age": 30, "city": "London"}
    ])

# 注意：newline="" 防止在 Windows 上出现空行

CSV 进阶操作

import csv

# 指定分隔符
with open("data.csv", "r", encoding="utf-8") as file:
    reader = csv.reader(file, delimiter=';')  # 分号分隔
    for row in reader:
        print(row)

# 处理引号
with open("data.csv", "r", encoding="utf-8") as file:
    reader = csv.reader(file, quotechar='"')
    for row in reader:
        print(row)

# 读取大文件（逐行处理）
with open("large_file.csv", "r", encoding="utf-8") as file:
    reader = csv.reader(file)
    header = next(reader)
    for i, row in enumerate(reader):
        if i >= 1000:  # 只处理前1000行
            break
        print(row)

# 过滤数据
with open("data.csv", "r", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    for row in reader:
        if int(row["age"]) > 30:  # 只处理年龄大于30的
            print(row)

使用 pandas 处理 CSV

import pandas as pd

# 读取 CSV
df = pd.read_csv("data.csv", encoding="utf-8")
print(df.head())  # 查看前5行

# 只读取特定列
df = pd.read_csv("data.csv", usecols=["name", "age"])

# 过滤数据
df_filtered = df[df["age"] > 30]
print(df_filtered)

# 写入 CSV
df.to_csv("output.csv", index=False, encoding="utf-8")

# 追加数据
df.to_csv("output.csv", mode="a", header=False, index=False)

CSV 实用示例

import csv
from pathlib import Path

# 合并多个 CSV 文件
output_file = Path("merged.csv")
csv_files = Path(".").glob("*.csv")

with open(output_file, "w", encoding="utf-8", newline="") as f_out:
    writer = csv.writer(f_out)
    for i, csv_file in enumerate(csv_files):
        with open(csv_file, "r", encoding="utf-8") as f_in:
            reader = csv.reader(f_in)
            for j, row in enumerate(reader):
                if i == 0 or j > 0:  # 只保留第一个文件的表头
                    writer.writerow(row)

# 数据转换
with open("input.csv", "r", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    with open("output.csv", "w", encoding="utf-8", newline="") as file:
        fieldnames = ["name", "age_upper", "city"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            writer.writerow({
                "name": row["name"],
                "age_upper": row["age"].upper(),
                "city": row["city"]
            })

文件操作最佳实践

1. 使用 with 语句

# 推荐
with open("file.txt", "r") as file:
    content = file.read()

# 不推荐
file = open("file.txt", "r")
content = file.read()
file.close()

2. 指定编码

# 推荐
with open("file.txt", "r", encoding="utf-8") as file:
    content = file.read()

# 避免编码问题

3. 处理大文件

# 推荐：逐行处理
with open("large_file.txt", "r") as file:
    for line in file:
        process(line)

# 不推荐：一次性读取
with open("large_file.txt", "r") as file:
    content = file.read()  # 可能占用大量内存

4. 使用 pathlib

# 推荐（现代方式）
from pathlib import Path

path = Path("folder/file.txt")
content = path.read_text()
path.write_text("content")

# 不推荐（传统方式）
import os
path = os.path.join("folder", "file.txt")
with open(path, "r") as file:
    content = file.read()

5. 错误处理

# 推荐
try:
    with open("file.txt", "r") as file:
        content = file.read()
except FileNotFoundError:
    print("文件不存在")
except PermissionError:
    print("没有权限")
except Exception as e:
    print(f"发生错误: {e}")

小结

本章节介绍了 Python 的文件操作：

文件读写: open(), with 语句, read(), write(), 文件指针
文件模式: r, w, a, x, +, 二进制模式
路径处理: os 模块, pathlib 模块（推荐）
JSON 操作: json.load(), json.dump(), 复杂JSON处理
CSV 操作: csv.reader, csv.writer, DictReader, DictWriter

掌握文件操作是处理数据持久化和数据交换的基础。下一章我们将学习异常处理，包括 try-except、自定义异常等。

文件读写​

打开文件​

with 语句（推荐）​

读取文件​

read() - 读取全部内容​

readline() - 读取一行​

readlines() - 读取所有行​

遍历文件（推荐）​

写入文件​

write() - 写入字符串​

追加模式​

关闭文件​

文件指针位置​

文件模式​

基本模式​

读写模式​

二进制模式​

文件模式总结​

路径处理​

os 模块​

路径拼接​

路径信息​

pathlib 模块（推荐）​

pathlib 遍历目录​

实用示例​

JSON 操作​

读取 JSON​

写入 JSON​

处理复杂 JSON​

JSON 错误处理​

CSV 操作​

读取 CSV​

写入 CSV​

CSV 进阶操作​

使用 pandas 处理 CSV​

CSV 实用示例​

文件操作最佳实践​

1. 使用 with 语句​

2. 指定编码​

3. 处理大文件​

4. 使用 pathlib​

5. 错误处理​

小结​