- Roman numbers
-
m/^m*(d?c{0,3}|c[dm])(l?x{0,3}|x[lc])(v?i{0,3}|i[vx])$/i
- Swap first two words
-
s/(\S+)(\s+)(\S+)/$3$2$1/
- Keyword = Value
-
m/(\w+)\s*=\s*(.*)\s*$/
# keyword is $1, value is $2
- Line of at least 80 characters
-
m/.{80,}/
- MM/DD/YY HH:MM:SS
-
m|(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)|
- Changing directories
-
s(/usr/bin)(/usr/local/bin)g
- Expanding %7E (hex) escapes
-
s/%([0-9A-Fa-f][0-9A-Fa-f])/chr hex $1/ge
- Deleting C comments (imperfectly)
-
s{/\* # Match the opening delimiter .*?
# Match a minimal number of characters \*/
# Match the closing delimiter } []gsx;
- Removing leading and trailing whitespace
-
s/^\s+//; s/\s+$//;
- Turning \ followed by n into a real newline
-
s/\\n/\n/g;
- Removing package portion of fully qualified symbols
-
s/^.*:://
- IP address
-
m/^([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])\.
([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])$/;
- Removing leading path from filename
-
s(^.*/)()
- Extracting columns setting from TERMCAP
-
$cols = ( ($ENV{TERMCAP} || " ") =~ m/:co#(\d+):/ ) ? $1 : 80;
- Removing directory components from program name and arguments
-
($name = " $0 @ARGV") =~ s, /\S+/, ,g;
- Checking your operating system
-
die "This isn't Linux" unless $^O =~ m/linux/i;
- Joining continuation lines in multiline string
-
s/\n\s+/ /g
- Extracting all numbers from a string
-
@nums = m/(\d+\.?\d*|\.\d+)/g;
- Finding all-caps words
-
@capwords = m/(\b[^\Wa-z0-9_]+\b)/g;
- Finding all-lowercase words
-
@lowords = m/(\b[^\WA-Z0-9_]+\b)/g;
- Finding initial-caps word
-
@icwords = m/(\b[^\Wa-z0-9_][^\WA-Z0-9_]*\b)/;
- Finding links in simple HTML
-
@links = m/<A[^>]+?HREF\s*=\s*["']?([^'" >]+?)[ '"]?>/sig;
- Finding middle initial in $_
-
($initial) = m/^\S+\s+(\S)\S*\s+\S/ ? $1 : "";
- Changing inch marks to quotes
-
s/"([^"]*)"/``$1''/g
- Extracting sentences (two spaces required)
-
{ local $/ = ""; while (<>) {
s/\n/ /g; s/ {3,}/ /g;
push @sentences, m/(\S.*?[!?.])(?= |\Z)/g; } }
- YYYY-MM-DD
-
m/(\d{4})-(\d\d)-(\d\d)/
# YYYY in $1, MM in $2, DD in $3
- North American telephone numbers
-
m/^(?: 1 \s (?: \d\d\d \s)?
# 1, or 1 and area code |
# ... or ... \(\d\d\d\) \s
# area code with parens |
# ... or ... (?: \+\d\d?\d? \s)?
# optional +country code \d\d\d ([\s\-])
# and area code ) \d\d\d (\s|\1)
# prefix (and area code separator) \d\d\d\d
# exchange $ /x
- Exclamations
-
m/\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b/i
- Extracting lines regardless of line terminator
-
push(@lines, $1)
while ($input =~ s/^([^\012\015]*)(\012\015?|\015\012?)//);
Saturday, January 15, 2011
Regular Expression Grabbag
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment