Python: csv, subprocess, pathlib
CSV dict writer:
Dict Reader and Writer are most flexible ways to manipulate csv data.
Both reader and writer supports generators and hence can work in low memory systems pretty efficiently.
Example:
import csv
from typing import Dict, Iterable, Any
def read_csv(path: str) -> Iterable[Dict[str, Any]]:
"""Reads csv and yeilds row as dict"""
csv.field_size_limit(int(1e7))
with open(path, "r", newline="") as f:
reader = csv.DictReader(f)
for row in reader:
yield row
def write_csv(
path: str, data: Iterable[Dict[str, Any]], header: Iterable[str] = None
) -> None:
"""Writes data into csv"""
if not header:
header = data[0].keys()
with open(path, "w+", newline="") as f:
writer = csv.DictWriter(f, header)
writer.writeheader()
writer.writerows(data)
Subprocess:
Subprocess run with shell=True
sucks for some reason. This may be because, certain commands
opens up temporory buff file as output like commands less
, man
, git log
, etc. Following
is a pretty handy subprocess execution for any command.
from typing import List
import subporcess as sp
def get_process_output(command: List[str], timeout: int = 5) -> str:
"""Executes command and returns it's execution output"""
try:
process = sp.run(command, timeout=5, text=True, stdout=sp.PIPE, stderr=sp.PIPE)
return process.stdout if process.returncode == 0 else process.stderr
except sp.SubprocessError as e:
return str(e)
pathlib.Path
Really handy path library. Some useful methods are:
from pathlib import Path
path = Path("some/path/")
# File/Folder Name
path.name
# Read
path.read_text()
path.read_bytes()
# Write
path.write_text("data")
path.write_bytes(b"data")
# Navigation
path / "some/other/file"
"parent" / path / "some" / "other" / "file"
# Listing all files
paths_matching_pattern = path.glob("*pattern*") # Eg: *.csv, *.md, *.jpg, etc
all_files = list(path.iterdir()) # List all files and folders in `path`
assert list(path.glob("*")) == list(path.iterdir()) # True