Generating LaTeX Tables

An Example Document

Here you see the pdf of the sample latex document generated by the code explained below.

Setup and Data Preparation

First, let’s load the necessary libraries and prepare the data:

# Import necessary libraries
import numpy as np
import pandas as pd
import pyfixest as pf
import statsmodels.formula.api as smf
import maketables as mt

# Load sample dataset
df = pd.read_csv("../data/salaries.csv")

# Set variable labels
labels = {
    "logwage": "ln(Wage)",
    "wage": "Wage", 
    "age": "Age",
    "female": "Female",
    "tenure": "Years of Tenure",
    "occupation": "Occupation",
    "worker_type": "Worker Type",
    "education": "Education Level",
    "promoted": "Promotion"
}

# Set default labels 
mt.MTable.DEFAULT_LABELS = labels

# Generate a categorical variable for gender from the dummy variable
df["gender"] = df["female"].map({0: "Male", 1: "Female"})

Generating Tables

Create a descriptive statistics table:

# Create descriptive statistics table
tab1 = mt.DTable(df, vars=["wage", "age", "tenure"],
                 bycol=["worker_type"], byrow="gender",
                 stats=["count", "mean", "std"],
                 caption="Descriptive statistics by worker type and gender",
                 tab_label="tab:descriptives",
                 format_spec={'mean': ',.2f', 'std': '.2f'})

# Save as LaTeX
tab1.save(type="tex", file_name="../latex_output/table1_descriptives.tex", 
          tex_style={"first_col_width": "2.5cm"}, show=False, replace=True)

Create the wage regression table using PyFixest’s stepwise notation:

# Create regression table using PyFixest
tab2 = mt.ETable(pf.feols("logwage+wage ~ age + female + sw0(age:female)", data=df),
                 caption="Wage regressions",
                 tab_label="tab:regressions")

# Save as LaTeX
tab2.save(type="tex", file_name="../latex_output/table2_regressions.tex", show=False, replace=True)

Now use Statsmodels for an OLS and Probit comparison:

# Fit models for promotion prediction
est1 = smf.ols("promoted ~ tenure + female + worker_type", data=df).fit()
est2 = smf.probit("promoted ~ tenure + female + worker_type", data=df).fit(disp=0)

# Create comparison table
tab3 = mt.ETable([est1, est2],
                 keep=["tenure", "female", "worker_type"],
                 model_stats=["N", "r2", "pseudo_r2"],
                 model_heads=["OLS", "Probit"],
                 caption="Predicting Promotions",
                 tab_label="tab:promotions")

# Save as LaTeX
tab3.save(type="tex", file_name="../latex_output/table3_promotions.tex", show=False, replace=True)

Output Style and Defaults

You can customize table appearance by modifying the DEFAULT_TEX_STYLE dictionary. Key parameters include:

  • arraystretch: Controls row height (default 1)
  • tabcolsep: Sets column separation spacing (default “3pt”)
  • data_align: Column alignment for data (“l”, “c”, “r”)
  • first_row_addlinespace: Spacing before first row of each group (default “0.5ex”)
  • data_addlinespace: Spacing before and after data rows (default “0.5ex”)
  • rgroup_addlinespace: Spacing between row groups (default None)
  • group_header_format: Format for group headers (default r"\emph{%s}")

Example customization:

# Adjust spacing and formatting
mt.MTable.DEFAULT_TEX_STYLE.update({
    "arraystretch": 1.2,
    "first_row_addlinespace": "0.75ex",
    "data_addlinespace": "0.25ex",
    "group_header_format": r"\textbf{%s}"
})

Generating and Compiling a LaTeX Document

Of course you could directly import these tables now in your LaTeX document or into Overleaf. But here we use pylatex to buid the simple LaTeX document shown above and compile it to a pdf.

import pylatex as pl
import os
import glob

# Use pylatex to create a tex file with the table
def make_pdf(tex_files, output_file):
    """
    Create a PDF document with LaTeX tables from .tex files.

    Parameters:
    tex_files: list of .tex file paths
    output_file: output PDF file name (without extension)
    """
    geometry_options = {"margin": "1.5in"}
    doc = pl.Document(geometry_options=geometry_options)

    doc.packages.append(pl.Package("booktabs"))
    doc.packages.append(pl.Package("threeparttable"))
    doc.packages.append(pl.Package("makecell"))
    doc.packages.append(pl.Package("tabularx")) 
    doc.packages.append(pl.Package("array"))

    with doc.create(pl.Section("LaTeX Tables")):
        for tex_file in tex_files:
            if os.path.exists(tex_file):
                # Read the LaTeX table content, try utf-8 then latin1 if needed
                try:
                    with open(tex_file, 'r', encoding='utf-8') as f:
                        table_content = f.read()
                except UnicodeDecodeError:
                    with open(tex_file, 'r', encoding='latin1') as f:
                        table_content = f.read()

                # Add the table content directly
                doc.append(pl.NoEscape(table_content))
                doc.append(pl.NoEscape(r'\vspace{1cm}'))  # Add space between tables

    doc.generate_pdf(output_file, clean_tex=False)
    # Remove fls files generated by pylatex:
    [os.remove(f) for f in glob.glob(f"{os.path.dirname(output_file)}/pdflatex*.fls")]

# Create PDF with all four tables
tex_files = [
    "../latex_output/table1_descriptives.tex",
    "../latex_output/table2_regressions.tex", 
    "../latex_output/table3_promotions.tex",
]

make_pdf(tex_files, "../output/LatexOutput")