python-生成随机包含要分析字符的字符串
时间:2010-03-14 来源:oychw
File information
2010-3-14
磁针石:xurongzhong#gmail.com
博客:oychw.cublog.cn
这个脚本难在编码这块,尤其是设置默认编码。由于linux和windows和linux的默认编码,出现了程序在windows可以运行,但是linux无法运行的情况。
折腾了我几个小时。以下是脚本:
#!/usr/bin/env python
# -*- coding: gbk -*-
#用于指纹原文生成的脚本
import random
import string
import sys
#设定字符编码为GBK
reload(sys)
sys.setdefaultencoding('gbk')
def GBK1():
head = random.randint(0xA1, 0xA9)
tail = random.randint(0xA1, 0xFE)
val = ( head << 8 ) | tail
word = "%x" % val
return word.decode('hex')
def GBK2():
head = random.randint(0xB0, 0xF7)
tail = random.randint(0xA1, 0xFE)
val = ( head << 8 ) | tail
word = "%x" % val
return word.decode('hex')
def GBK3():
head = random.randint(0x81,0xA0)
tail = random.randint(0x40, 0xFE)
val = ( head << 8 ) | tail
word = "%x" % val
return word.decode('hex')
def GBK4():
head = random.randint(0xAA,0xFE)
tail = random.randint(0x40, 0xA0)
val = ( head << 8 ) | tail
word = "%x" % val
return word.decode('hex')
def GBK5():
head = random.randint(0xA8,0xA9)
tail = random.randint(0x40, 0xA0)
val = ( head << 8 ) | tail
word = "%x" % val
return word.decode('hex')
#产生大小写字母和数字
def ASCII():
if not random.randint(0,9):
word = random.randint(48,57)
else:
word = random.randint(65,90)
if random.randint(0,1):
word = word +32
return chr(word)
#中要处理的GBK码
def GBK_NON():
l_gbk = [0xA1A2,0xA1A3,0xA1A4,0xA1AA,0xA1AB,0xA1AD,0xA1AE,0xA1AF,0xA1B0,0xA1B1,0xA1B2,0xA1B3,0xA1B4,0xA1B5,0xA1B6,0xA1B7,0xA1B8,0xA1B9,0xA1BA,0xA1BB,0xA1BC,0xA1BD,0xA1BE,0xA1BF,0xA3A1,0xA3A2,0xA3AC,0xA3A8,0xA3A9,0xA3BA,0xA3BB,0xA3BF]
word = l_gbk[random.randint(0,31)]
word = "%x" % word
return word.decode('hex')
#中要处理的ASCII码
def ASCII_NON():
l_ascii = [20,42]
word = l_ascii[random.randint(0,1)]
return chr(word)
#随机生成GBK码,其中一半来自常用汉字
def GBK():
char_type = random.randint(0,9)
if char_type < 5:
word = GBK2()
return word
if char_type == 6:
word = GBK1()
return word
if char_type == 7:
word = GBK3()
return word
if char_type == 8:
word = GBK4()
return word
if char_type == 9:
word = GBK5()
return word
f = open("word.txt",'w')
for num in range(1000):
length = random.randint(0,300)
#定义生成的字符串,以汉字为主
char_type = random.randint(0,9)
#随机生成GBK中的汉字
if char_type < 5:
for i in range(length):
word = GBK()
if word:
f.write(word)
#随机生成GBK中的汉字,夹杂非法字符
if char_type == 5:
for i in range(length):
#每5个字串有一个特殊字符,80%的情况插入英文非法字符
if not random.randint(0,5):
if random.randint(0,1):
word = GBK_NON()
else:
word = ASCII_NON()
else:
word = word = GBK()
if word:
f.write(word)
#随机生成英文,夹杂非法字符
if char_type >7 :
for i in range(length):
if not random.randint(0,5):
if random.randint(0,1):
word = GBK_NON()
else:
word = ASCII_NON()
else:
word = ASCII()
if word:
f.write(word)
#随机生成GBK中的英文
if char_type == 7 :
for i in range(length):
word = ASCII()
if word:
f.write(word)
#随机生成GBK中的中英文,夹杂非法字符
if char_type == 5:
for i in range(length):
if not random.randint(0,5):
if random.randint(0,1):
word = GBK_NON()
else:
word = ASCII_NON()
else:
word = GBK()
if not random.randint(0,5):
word = ASCII()
if word:
f.write(word)
f.write("\n")
f.close()
附上GBK的编码简表:
GBK字符集范围
分区 高位 低位
----------------------------------------------
●GBK/1:GB2312非汉字符号: A1~A9 || A1~FE
●GBK/2:GB2312汉字 : B0~F7 || A1~FE
●GBK/3:扩充汉字 : 81~A0 || 40~FE
●GBK/4:扩充汉字 : AA~FE || 40~A0
●GBK/5:扩充非汉字 : A8~A9 || 40~A0
汉字在Unicode中的分布大致如下表:
首字编码 | 尾字编码 | 个数 | |
基本汉字 | U4E00 | U9FBF | 20928 |
异性字 | UF900 | UFAFF | 512 |
扩展A | U3400 | U4D8F | 512 |
扩展B | U20000 | U2A6DF | 42720 |
补充 | U2F800 | U2FA1F | 544 |
其他 |
随机生成中文验证码的python代码
python, 由admin创建.
来源:http://www.3gcnbeta.com/wordpress/2010/02/09/%E9%9A%8F%E6%9C%BA%E7%94%9F%E6%88%90%E4%B8%AD%E6%96%87%E9%AA%8C%E8%AF%81%E7%A0%81%E7%9A%84python%E4%BB%A3%E7%A0%81/
随机生成中文验证码的python代码
需要准备字体文件,或者通过fontPath参数指定。
默认是我用的字体wqy.ttc.
代码如下:
- # -*- coding: utf-8 -*-
- import Image,ImageDraw,ImageFont
- import random
- import math, string
- class RandomChar():
- """用于随机生成汉字"""
- @staticmethod
- def Unicode():
- val = random.randint(0x4E00, 0x9FBF)
- return unichr(val)
- @staticmethod
- def GB2312():
- head = random.randint(0xB0, 0xCF)
- body = random.randint(0xA, 0xF)
- tail = random.randint(0, 0xF)
- val = ( head << 8 ) | (body << 4) | tail
- str = "%x" % val
- return str.decode('hex').decode('gb2312')
- class ImageChar():
- def __init__(self, fontColor = (0, 0, 0),
- size = (100, 40),
- fontPath = 'wqy.ttc',
- bgColor = (255, 255, 255),
- fontSize = 20):
- self.size = size
- self.fontPath = fontPath
- self.bgColor = bgColor
- self.fontSize = fontSize
- self.fontColor = fontColor
- self.font = ImageFont.truetype(self.fontPath, self.fontSize)
- self.image = Image.new('RGB', size, bgColor)
- def rotate(self):
- self.image.rotate(random.randint(0, 30), expand=0)
- def drawText(self, pos, txt, fill):
- draw = ImageDraw.Draw(self.image)
- draw.text(pos, txt, font=self.font, fill=fill)
- del draw
- def randRGB(self):
- return (random.randint(0, 255),
- random.randint(0, 255),
- random.randint(0, 255))
- def randPoint(self):
- (width, height) = self.size
- return (random.randint(0, width), random.randint(0, height))
- def randLine(self, num):
- draw = ImageDraw.Draw(self.image)
- for i in range(0, num):
- draw.line([self.randPoint(), self.randPoint()], self.randRGB())
- del draw
- def randChinese(self, num):
- gap = 5
- start = 0
- for i in range(0, num):
- char = RandomChar().GB2312()
- x = start + self.fontSize * i + random.randint(0, gap) + gap * i
- self.drawText((x, random.randint(-5, 5)), RandomChar().GB2312(), self.randRGB())
- self.rotate()
- self.randLine(18)
- def save(self, path):
- self.image.save(path)
调用方法:
ic = ImageChar(fontColor=(100,211, 90))
ic.randChinese(4)
ic.save("1.jpeg")
效果图:
python 测试与应用:41357415
深圳IT招聘求职:105095215
武冈深圳高级群:66250781
相关阅读 更多 +