Integrated program operation, sample survey, combined with cost factors, 1000 people sample enough.
1000 people sample can meet 95% confidence degree, d=3% Deviation degree, standard error 1.5%
Exact situation:
99% confidence degree, d=3% Deviation degree, standard error 1%, sample number requires about 2000 people (accurate value 1842).
#coding =-utf8
#计算最小样本
#计算置信区间
#非精确计算值, but the largest sample estimate
#68% confidence interval, reliability (d) =1* standard error (SE)
# 95% confidence interval, set confidence (d) =2* standard error (SE)
#99% confidence interval, reliability (d) =3** standard error (SE)
Import Math
n=2500
#d为调查误差, the confidence interval is (P-D,P+D)
d=0.03
#置信区间
#confidence =0.95
confidence=0.99
p=0.3
def standard_error (n):
STANDARD_ERROR=MATH.SQRT (0.25/n)
Return Standard_error
#计算最小样本
#前提: Confidence level 3%, confidence interval 95%
def least_sample (d,confidence):
If confidence==0.95:
Standard_error=d/2
Elif confidence==0.99:
Standard_error=d/3
Least_sample=0.25/standard_error**2
Return least_sample
# 95% Confidence Interval
def confidence_interval (Confidence,p,standard_error):
If confidence==0.95:
Start=p-standard_error*2
End=p+standard_error*2
If confidence==0.99:
Start=p-standard_error*3
End=p+standard_error*3
Confidence_interval= (Start,end)
Return Confidence_interval
Standard_error=standard_error (N)
Confidence_interval=confidence_interval (Confidence,p,standard_error)
Print "N:", n
print ' P: ', p
Print "Standard_error:", Standard_error
Print "Confidence_interval:", Confidence_interval
Exact version
#coding =utf_8
#confidence Interval Confidence Interval
#已知次品概率p, total N, the confidence corresponds to the z-fraction (z), the estimated allowable error d--can be calculated and then the belief region is calculated.
Import Normal_distribution,math
#总数: N
n=300
#次品数: N1
N1=5
#置信度: Confidence
#confidence =0.95
confidence=0.99
#显著水平一半值
A_half=normal_distribution. A_half (confidence)
#生成Z分数累计补充表 (>), (confidence level, Z Score)
Complementary_cumulative=normal_distribution.complementary_cumulative
#次品概率
p=n1*1.0/n
#置信度对应概率
Z=normal_distribution. Z_score (Complementary_cumulative,a_half)
#可求出估计的允许误差d
def D (n,p,z):
D=z*math.sqrt (p* (1-p) *1.0/n)
D1=round (d,4)
return D1
def confidence_interval (p,d):
Start=p-d
Start1=round (start,4)
End=p+d
End1=round (end,4)
Confidence_interval= (START1,END1)
Return Confidence_interval
#至少需要的样本数
# n= (z**2*p* (1-p))/d**2
#一般d为3%
def least_sample (d,z):
#0.25 is the maximum value of p* (1-p)
n= (z**2*0.25)/(D**2)
return n
#标准差d
D=d (N,p,z)
#置信区域
Confidence_interval=confidence_interval (P,D)
Least_sample=least_sample (D,z)
#coding =utf-8
#正态分布
Import Math
Filename= "Normal_distribution.txt"
Filename_complementary_cumulative= ' Complementary_cumulative.txt '
#生成正态分布列表 (probability range table, i.e. probability of x<=n)
def make_list_normaldistribution (fileName):
Number=0
List_number=[]
List_value=[]
List_normaldistribution=[]
Fileobj=open (FileName)
For line in Fileobj:
Line=line.strip ()
Line_list=line.split ()
Line_list.remove (Line_list[0])
For word in line_list:
List_number.append (number)
List_value.append (float (word)) #把字符串转换为数字结构
number+=0.01
Number=round (number,3) #保留两位小数
List_normaldistribution=zip (List_number,list_value)
Return list_normaldistribution
#. Distribution of normal distribution, a specific probability corresponding to an X, a non-interval probability
#u代表期望值, mean value
#q代表标准差
#返回的是概率值
def normal_distribution (x,u=0,q=1):
Normal_distribution= (1.0/((Math.sqrt (2*MATH.PI) *q)) * (math.e** ((-(X-U) **2)/((q**2)))
Return normal_distribution
#9. Probability within the X-value range of normal distribution
#例如X <=1.52
#u代表期望值, mean value
#q代表标准差
#返回的是概率值
#转换公式x = (x-u)/q
#x =round (x,1) approximate value 0.1
def normal_distribution_innerarea (xlist,u=0,q=1):
#从只有一个元素列表中, extract values
X=XLIST[0]
x= (x-u)/q
List_normaldistribution=make_list_normaldistribution (FileName)
For I in List_normaldistribution:
If X==I[0]:
PROBABILITY=I[1]
return probability
If x<0:
Return 1-normal_distribution_innerarea ([-x],u,q)
#. Probability of a normal distribution outside the X-value range
#例如X >=1.52
def normal_distribution_outerarea (xlist,u=0,q=1):
Probability_innerarea=normal_distribution_innerarea (XLIST,U,Q)
Probability_outerarea=1-probability_innerarea
Return Probability_outerarea
#X随机变量区间内概率
#例如X在 (2,4) internal probability
def normal_distribution_range (xlist,u=0,q=1):
#取最值后, the data structure is converted to a list and calculated
List_max=[]
List_min=[]
Xmax=max (xlist)
List_max.append (Xmax)
Xmin=min (xlist)
List_min.append (Xmin)
Probability_xmax=normal_distribution_innerarea (LIST_MAX,U,Q)
Probability_xmin=normal_distribution_innerarea (LIST_MIN,U,Q)
Probability_range=probability_xmax-probability_xmin
Return Probability_range
# x random variable interval range probability, large synthesis
# (1) x<=n
# (2) X>=n
# (3) x in one interval (N1,N2)
#一共四个参数, Xlist has only one value, indicating greater than or less than a certain value;
#Xlist是一个列表时, expressed in an interval, compare the comparator input 0
#u是平均值, q is the standard deviation, compare is the comparison symbol, indicating greater than or less than, input (greater)
def normal_distribution_area (xlist,u=0,q=1,compare= "smaller"):
#测试X是否是一个含有两个元素的列表
If Len (xlist) ==2 and type (xlist) ==list:
Probability=normal_distribution_range (XLIST,U,Q)
return probability
If Len (xlist) ==1 and type (xlist) ==list:
If compare== "smaller":
Probability=normal_distribution_innerarea (XLIST,U,Q)
If compare== "greater":
Probability=normal_distribution_outerarea (XLIST,U,Q)
return probability
#置信区域: Probability---z-score query table
# A/2
def a_half (confidence):
#显著水平: A
A=1-confidence
#显著水平一半: A_half
A_half=round (a/2.0,8)
Return a_half
#输入概率, returns the Z-Score
def z_score (complementary_cumulative,a_half):
List_value=[]
For I in Complementary_cumulative:
If I[0]==a_half:
Z=round (i[1],4)
Return Z
Else
Round_value=round (i[0],3)
If Round_value==a_half:
print ' I: ', I
List_value.append (I[1])
Z=max (List_value)
Return Z
#置信度: Confidence
confidence=0.95
#显著水平一半值
A_half=a_half (confidence)
#生成正态分布列表 (z-score Accrual table, <=)
List_normaldistribution=make_list_normaldistribution (FileName)
#生成Z分数累计补充表 (>)
List_complementary_cumulative=make_list_normaldistribution (filename_complementary_cumulative)
#生成Z分数累计补充表 (>), (confidence level, Z Score)
complementary_cumulative=[(i[1],i[0]) for I in List_complementary_cumulative]
Z_score=z_score (Complementary_cumulative,a_half)
Minimum sample _ Confidence Interval _ standard error