import re re.search('n', '\n') # first item is pattern, second item is string
1 2 3
# two ways to handle this one way is to use \ for every backslash # 有两种处理方法,一种方法是对每个反斜杠使用\ (另一种是在前面加 r) re.search('n', '\\n')
<re.Match object; span=(1, 2), match='n'>
1 2 3
# not the best way if we have too many \s # 如果我们有太多的\,这不是最好的方法 re.search('n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
1 2 3
# r converts to raw string # r 转换为原始字符串 re.search('n', r'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
<re.Match object; span=(1, 2), match='n'>
1 2 3 4 5 6 7 8
""" there are some nuances that you should be aware of regular expressions has its own special characters as well regex with '\n' and r'\n' both look for newline. 你应该注意到一些细微的差别,正则表达式也有自己的特殊字符, 带有'\n'和 r'\n'的正则表达式都查找换行符 """ re.search('\n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
<re.Match object; span=(0, 1), match='\n'>
1 2 3
# this works as well because r'\n' also looks for new line # 同样有效,因为 pattern 中 r'\n' 也会查找新行 re.search(r'\n', '\n\n')
<re.Match object; span=(0, 1), match='\n'>
1 2 3
# doesn't work because string doesn't use newline and r'\n' looks for newline # 不能工作,因为 string 不使用换行符,而 pattern 中 r'\n'查找换行符 re.search(r'\n', r'\n\n')
MATCH and SEARCH EXAMPLES
REs common methods - Match and Search
searches anywhere in the sentence 搜索句子中的任何位置
flags: special options 标志:特殊选项
re.search(pattern, string, flags)
only beginning of the string 只搜索字符串的开始
re.match(pattern, string, flags)
1 2 3
# returns none because only looks at the start of string # 返回 none,因为只查看字符串的开头 re.match("c", "abcdef")
1
re.search("c", "abcdef") # searches anywhere
<re.Match object; span=(2, 3), match='c'>
1
bool(re.match("c", "abcdef")) # no match returns boolean false
False
1
bool(re.match("a", "abcdef")) # match returns true
True
1 2 3
# tells you where it matched first and only first # 告诉你它首先匹配的位置 re.search("c", "abcdef")
<re.Match object; span=(2, 3), match='c'>
1
re.search("c", "abcdefc") # multiple 'c's first instance only 返回多个 c 的第一个实例
<re.Match object; span=(2, 3), match='c'>
1
re.search("c", "abdef\nc") # multiline works with search 多行与搜索一起工作
<re.Match object; span=(6, 7), match='c'>
1
re.match("c", "\nc") # match doesn't work with newline 匹配对换行符无效
string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.
Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.
In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments
This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'''
'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment. Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something. In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'
. the dot matches any character excerpt the newline. 点匹配除换行符以外的任何字符。
1 2 3 4 5 6 7
string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.
Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.
In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments
This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'''
1
re.search('.+', string).group() # no new line
'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.'
'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.\n\nVines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.\n\nIn the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments\n\nThis isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'
Creating your own character sets
[A-Z]'-' is a metacharacter when used in [] (custom character sets) '-'在[](自定义字符集)中使用时是一个元字符
1
string = 'Hello, There, How, Are, You'
1
re.findall('[A-Z]', string) # pulls out all capital letters 取出所有大写字母
['H', 'T', 'H', 'A', 'Y']
1 2 3
re.findall('[A-Z,]', string) # here we search for any capital letters or a comma # 这里我们搜索大写字母或逗号
# one or more of 4 types of characters # 四种字符中的一种或多种 re.search('[A-Za-z\s,]+', string).group()
'HELLO, There, How, Are, You'
1
re.findall('[A-Z]?[a-z\s,]+', string)
['O, ', 'There, ', 'How, ', 'Are, ', 'You']
1 2 3 4
# ^ is a metacharacter within brackets # ^是括号中的元字符 # 表示相反 re.search('[^A-Za-z\s,]+', string).group()
'...'
1
re.findall('[^A-Z]+', string) # 匹配所有非大写字符
[', ', 'here, ', 'ow, ', 're, ', 'ou...']
GROUPS
groups allow us to pull out sections of a match and store them
groups 允许我们提取匹配的部分并存储它们
1 2 3
# contrived example 举例 import re string = 'John has 6 cats but I think my friend Susan has 3 dogs and Mike has 8 fishes'
1
re.findall('[A-Za-z]+ \w+ \d+ \w+', string)
['John has 6 cats', 'Susan has 3 dogs', 'Mike has 8 fishes']
the use of brackets denotes a group 使用括号表示一个组
() = metacharacter 元字符
1
re.findall('([A-Za-z]+) \w+ \d+ \w+', string) # to pull out just the names 只把名字取出来
['John', 'Susan', 'Mike']
1
re.findall('[A-Za-z]+ \w+ \d+ (\w+)', string) # pull out animals 取出所有动物
['cats', 'dogs', 'fishes']
1 2 3
re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string) # use original string to make sure matching is correct, then use groups to pull out the info you want # 使用原始字符串确保匹配是正确的,然后使用组拉出你想要的信息
match = re.search('([A-Za-z]+) \w+ (\d+) (\w+)', string) # pulls out three groups 抽出三组 match
<re.Match object; span=(0, 15), match='John has 6 cats'>
1
match.group(0)
'John has 6 cats'
1
match.groups()
('John', '6', 'cats')
1
match.group(1)
'John'
1
match.group(2)
'6'
1
match.group(3)
'cats'
1
match.group(1, 3) # multiple groups 多个组
('John', 'cats')
1
match.group(3, 2, 1, 1) # change the order 改变顺序
('cats', '6', 'John', 'John')
1
match.span()
(0, 15)
1
match.span(2)
(9, 10)
1
match.span(3)
(11, 15)
1
match.start(3)
11
1 2 3
# find all has no group function # re.findall 没有 group 函数 re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string).group(1)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [101], in <cell line: 3>()
1 # find all has no group function
2 # re.findall 没有 group 函数
----> 3 re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string).group(1)
AttributeError: 'list' object has no attribute 'group'
string = 'abababababab'# original string match = re.search('(ab)+', string)
match.group(1) # capturing only one group; value is overwritten each time # 只捕获一个群体; 值每次都会被覆盖
'ab'
1
match.group(2) # no value 没有值
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Input In [10], in <cell line: 1>()
----> 1match.group(2)
IndexError: no such group
1
match.groups() # only one group, group just overwritten 只有一个组,组被覆盖了
('ab',)
1
match.group(0) # the full match, not related to groups 完全匹配,与组无关
'abababababab'
Another simple example with two groups using quantifiers