Regular expressions (regex) are powerful patterns for matching and manipulating text. Python’s re module provides comprehensive regex support.

Importing re

import re

Basic Functions

re.search() - Find First Match

import re

text = "The rain in Spain falls mainly in the plain"

# Search for pattern
match = re.search(r"Spain", text)
if match:
    print(f"Found: {match.group()}")  # Found: Spain
    print(f"Position: {match.start()}-{match.end()}")  # Position: 12-17

# No match returns None
result = re.search(r"France", text)
print(result)  # None

re.match() - Match at Beginning

import re

text = "Hello World"

# Matches at start
match = re.match(r"Hello", text)
print(match.group())  # Hello

# Won't match (not at start)
match = re.match(r"World", text)
print(match)  # None

re.findall() - Find All Matches

import re

text = "cat bat rat cat hat"

# Find all occurrences
matches = re.findall(r"cat", text)
print(matches)  # ['cat', 'cat']

# Find all words ending in 'at'
matches = re.findall(r"\b\w+at\b", text)
print(matches)  # ['cat', 'bat', 'rat', 'cat', 'hat']

re.sub() - Replace Matches

import re

text = "Hello World"

# Replace pattern
new_text = re.sub(r"World", "Python", text)
print(new_text)  # Hello Python

# Replace with function
def uppercase(match):
    return match.group().upper()

text = "hello world"
new_text = re.sub(r"\b\w+\b", uppercase, text)
print(new_text)  # HELLO WORLD

Common Patterns

Character Classes

import re

text = "abc123XYZ"

# \d - digits
print(re.findall(r"\d", text))     # ['1', '2', '3']
print(re.findall(r"\d+", text))    # ['123']

# \w - word characters (a-z, A-Z, 0-9, _)
print(re.findall(r"\w+", text))    # ['abc123XYZ']

# \s - whitespace
text = "Hello World"
print(re.findall(r"\s", text))     # [' ']

# Custom character class
print(re.findall(r"[aeiou]", "hello world"))  # ['e', 'o', 'o']

Quantifiers

import re

# * - zero or more
# + - one or more
# ? - zero or one
# {n} - exactly n
# {n,m} - n to m times

text = "aaa ab a"
print(re.findall(r"a+", text))     # ['aaa', 'a', 'a']
print(re.findall(r"a*", text))     # ['aaa', '', 'a', '', 'a', '']
print(re.findall(r"a{2}", text))   # ['aa']
print(re.findall(r"a{1,3}", text)) # ['aaa', 'a', 'a']

Anchors

import re

text = "hello world"

# ^ - start of string
print(re.search(r"^hello", text))  # Match
print(re.search(r"^world", text))  # None

# $ - end of string
print(re.search(r"world$", text))  # Match
print(re.search(r"hello$", text))  # None

# \b - word boundary
text = "cat category"
print(re.findall(r"\bcat\b", text))  # ['cat'] (not 'category')

Groups

Basic Groups

import re

text = "John Smith, Jane Doe"

# Capture groups with ()
pattern = r"(\w+) (\w+)"
matches = re.findall(pattern, text)
print(matches)  # [('John', 'Smith'), ('Jane', 'Doe')]

# Access groups from match object
match = re.search(r"(\w+) (\w+)", text)
print(match.group(0))  # John Smith (entire match)
print(match.group(1))  # John (first group)
print(match.group(2))  # Smith (second group)

Named Groups

import re

text = "john@example.com"

pattern = r"(?P<username>\w+)@(?P<domain>\w+\.\w+)"
match = re.search(pattern, text)

print(match.group("username"))  # john
print(match.group("domain"))    # example.com
print(match.groupdict())        # {'username': 'john', 'domain': 'example.com'}

Practical Examples

Email Validation

import re

def is_valid_email(email):
    pattern = r"^[\w.-]+@[\w.-]+\.\w+$"
    return bool(re.match(pattern, email))

print(is_valid_email("user@example.com"))  # True
print(is_valid_email("invalid-email"))      # False

Phone Number Extraction

import re

text = "Call me at 123-456-7890 or (555) 123-4567"

# Various phone formats
pattern = r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
phones = re.findall(pattern, text)
print(phones)  # ['123-456-7890', '(555) 123-4567']

URL Parsing

import re

url = "https://www.example.com/path/page?query=value"

pattern = r"(?P<protocol>https?)://(?P<domain>[\w.]+)(?P<path>/[\w/]*)?(?P<query>\?.*)?"
match = re.search(pattern, url)

print(match.group("protocol"))  # https
print(match.group("domain"))    # www.example.com
print(match.group("path"))      # /path/page

Text Cleaning

import re

text = "Hello,   World!   How   are   you?"

# Remove extra whitespace
clean = re.sub(r"\s+", " ", text)
print(clean)  # Hello, World! How are you?

# Remove non-alphanumeric
clean = re.sub(r"[^\w\s]", "", text)
print(clean)  # Hello   World   How   are   you

Flags

import re

text = "Hello WORLD"

# Case insensitive
match = re.search(r"world", text, re.IGNORECASE)
print(match.group())  # WORLD

# Multiline (^ and $ match line boundaries)
text = "line1\nline2"
matches = re.findall(r"^line\d", text, re.MULTILINE)
print(matches)  # ['line1', 'line2']

# Verbose (allows comments and whitespace)
pattern = re.compile(r"""
    \d{3}     # Area code
    [-.\s]?   # Separator
    \d{3}     # First 3 digits
    [-.\s]?   # Separator
    \d{4}     # Last 4 digits
""", re.VERBOSE)

Summary

Use re.search() to find first match
Use re.findall() to find all matches
Use re.sub() to replace matches
Use () for groups, (?P<name>) for named groups
Common patterns: \d (digit), \w (word), \s (space)
Quantifiers: *, +, ?, {n}, {n,m}
Anchors: ^ (start), $ (end), \b (word boundary)

Settings

Appearance

Importing re

Basic Functions

re.search() - Find First Match

re.match() - Match at Beginning

re.findall() - Find All Matches

re.sub() - Replace Matches

Common Patterns

Character Classes

Quantifiers

Anchors

Groups

Basic Groups

Named Groups

Practical Examples

Email Validation

Phone Number Extraction

URL Parsing

Text Cleaning

Flags

Summary