TenderAutomateSystem/notebook/process_data.ipynb

463 lines
19 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-12-23T14:27:05.940840Z",
"start_time": "2024-12-23T14:27:05.123967Z"
}
},
"source": [
"import os\n",
"import re\n",
"\n",
"import pandas as pd"
],
"outputs": [],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-23T14:27:06.297003Z",
"start_time": "2024-12-23T14:27:05.974046Z"
}
},
"cell_type": "code",
"source": [
"classification = pd.read_excel(r\"D:\\workspace\\AI for Medical Devices\\标书参数分类(初版).xlsx\")\n",
"classification.head()"
],
"id": "b0beb023177a75c9",
"outputs": [
{
"data": {
"text/plain": [
" 中标医疗单位 中标时间 中标产品商品名 中标价格(元) \\\n",
"0 哈尔滨元新医疗器械有限公司 2023年8月 迈瑞sv300呼吸机 159500 \n",
"1 北京博辉康健科技有限公司 2023年12 迈瑞sv300呼吸机 176991.15 \n",
"2 国药器械(唐山)有限公司 2023年12月 迈瑞sv300呼吸机 2981250(包总价) \n",
"3 江西恩力医疗器械经营有限公司 2023年12月 迈瑞sv300呼吸机 298000 \n",
"4 河南省欧瑞医疗器械有限公司 2024年1月 迈瑞sv300呼吸机 185000 \n",
"\n",
" 外观参数 \\\n",
"0 1.采用≥12.1英寸彩色TFT触摸控制屏分辨率≥1280*800。\\n2.中文操作界面... \n",
"1 1.彩色液晶触摸显示屏≥10英寸分辨率≥1024×768。\\n2.波形显示通道≥3道。 \n",
"2 1.中文操作界面。\\n2.采用≥12英寸彩色TFT触摸控制屏幕分辨率≥1280*800。\\... \n",
"3 1.中文操作界面。\\n2.采用≥12 英寸彩色触摸控制屏幕分辨率≥1280*800。\\n2... \n",
"4 1.具备中文操作界面。\\n2.彩色触摸控制屏幕≥12 英寸分辨率≥1280*800\\n... \n",
"\n",
" 基本功能参数 \\\n",
"0 1.适用于成人、小儿和婴幼儿进行通气辅助及呼吸支持的呼吸机。\\n2.中文操作界面、中文报警、... \n",
"1 1.网络传输端口具备LAN、WIFI接口。\\n2.具备呼吸波形及环图冻结功能。\\n3.具备... \n",
"2 1.自检功能,检查系统管道阻力、泄漏量和顺应性,测试流量传感器、呼气阀和安全阀等部件。\\n2... \n",
"3 1.配备开机自检功能,能进行系统顺应性补偿及泄露补偿;具有图形化和文字提示功能。\\n2.参数... \n",
"4 1.呼吸环可存储不少于 4 个、具有对比显示功能,可冻结及通过 U 盘导出\\n模式,容量控制... \n",
"\n",
" 工学机械性能参数 \\\n",
"0 1.电动电控呼吸机,涡轮驱动产生空气气源,方便进行转运。\\n2.不小于120分钟内置后备可充... \n",
"1 1.后备电池续航能力≥90min\\n2.参数设置范围:\\n 潮气量25mL—2000... \n",
"2 1.标配1块锂电池≥120分钟内置后备可充电电池方便进行批1转运。\\n2.整机重量≤11... \n",
"3 1.电动电控呼吸机。\\n2.设置参数要求\\n 潮气量至少支持20ml-2000ml\\n ... \n",
"4 1.电动电控呼吸机。\\n2.呼吸机带内置锂电池,方便手提移动。\\n3.内置压缩空气发生器。\\... \n",
"\n",
" 延展功能参数(如附加软件功能等) 证件认证参数 \n",
"0 1.吸气安全阀呼气阀组件可拆卸并能高温高压蒸汽消毒134℃以防止交叉感染。\\n2.可... NaN \n",
"1 1.吸气阀、呼吸阀组件可拆卸可耐受≥134℃高温高压消毒。\\n2.具备自动吸痰功能。\\n3... NaN \n",
"2 1.吸气安全阀组件可拆卸并能高温高压蒸汽消毒134℃以防止交叉感染。\\n2.呼气阀组... NaN \n",
"3 1.吸气阀、呼气阀均可拆卸可高温消毒≥134°C。\\n2.参数设置时具有自动计算关联参... NaN \n",
"4 1.吸气阀、呼气阀可徒手拆卸并能高温消毒。\\n2.呼气触发灵敏度可调节,呼气灵敏度具有自动触... NaN "
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>中标医疗单位</th>\n",
" <th>中标时间</th>\n",
" <th>中标产品商品名</th>\n",
" <th>中标价格(元)</th>\n",
" <th>外观参数</th>\n",
" <th>基本功能参数</th>\n",
" <th>工学机械性能参数</th>\n",
" <th>延展功能参数(如附加软件功能等)</th>\n",
" <th>证件认证参数</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>哈尔滨元新医疗器械有限公司</td>\n",
" <td>2023年8月</td>\n",
" <td>迈瑞sv300呼吸机</td>\n",
" <td>159500</td>\n",
" <td>1.采用≥12.1英寸彩色TFT触摸控制屏分辨率≥1280*800。\\n2.中文操作界面...</td>\n",
" <td>1.适用于成人、小儿和婴幼儿进行通气辅助及呼吸支持的呼吸机。\\n2.中文操作界面、中文报警、...</td>\n",
" <td>1.电动电控呼吸机,涡轮驱动产生空气气源,方便进行转运。\\n2.不小于120分钟内置后备可充...</td>\n",
" <td>1.吸气安全阀呼气阀组件可拆卸并能高温高压蒸汽消毒134℃以防止交叉感染。\\n2.可...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>北京博辉康健科技有限公司</td>\n",
" <td>2023年12</td>\n",
" <td>迈瑞sv300呼吸机</td>\n",
" <td>176991.15</td>\n",
" <td>1.彩色液晶触摸显示屏≥10英寸分辨率≥1024×768。\\n2.波形显示通道≥3道。</td>\n",
" <td>1.网络传输端口具备LAN、WIFI接口。\\n2.具备呼吸波形及环图冻结功能。\\n3.具备...</td>\n",
" <td>1.后备电池续航能力≥90min\\n2.参数设置范围:\\n 潮气量25mL—2000...</td>\n",
" <td>1.吸气阀、呼吸阀组件可拆卸可耐受≥134℃高温高压消毒。\\n2.具备自动吸痰功能。\\n3...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>国药器械(唐山)有限公司</td>\n",
" <td>2023年12月</td>\n",
" <td>迈瑞sv300呼吸机</td>\n",
" <td>2981250(包总价)</td>\n",
" <td>1.中文操作界面。\\n2.采用≥12英寸彩色TFT触摸控制屏幕分辨率≥1280*800。\\...</td>\n",
" <td>1.自检功能,检查系统管道阻力、泄漏量和顺应性,测试流量传感器、呼气阀和安全阀等部件。\\n2...</td>\n",
" <td>1.标配1块锂电池≥120分钟内置后备可充电电池方便进行批1转运。\\n2.整机重量≤11...</td>\n",
" <td>1.吸气安全阀组件可拆卸并能高温高压蒸汽消毒134℃以防止交叉感染。\\n2.呼气阀组...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>江西恩力医疗器械经营有限公司</td>\n",
" <td>2023年12月</td>\n",
" <td>迈瑞sv300呼吸机</td>\n",
" <td>298000</td>\n",
" <td>1.中文操作界面。\\n2.采用≥12 英寸彩色触摸控制屏幕分辨率≥1280*800。\\n2...</td>\n",
" <td>1.配备开机自检功能,能进行系统顺应性补偿及泄露补偿;具有图形化和文字提示功能。\\n2.参数...</td>\n",
" <td>1.电动电控呼吸机。\\n2.设置参数要求\\n 潮气量至少支持20ml-2000ml\\n ...</td>\n",
" <td>1.吸气阀、呼气阀均可拆卸可高温消毒≥134°C。\\n2.参数设置时具有自动计算关联参...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>河南省欧瑞医疗器械有限公司</td>\n",
" <td>2024年1月</td>\n",
" <td>迈瑞sv300呼吸机</td>\n",
" <td>185000</td>\n",
" <td>1.具备中文操作界面。\\n2.彩色触摸控制屏幕≥12 英寸分辨率≥1280*800\\n...</td>\n",
" <td>1.呼吸环可存储不少于 4 个、具有对比显示功能,可冻结及通过 U 盘导出\\n模式,容量控制...</td>\n",
" <td>1.电动电控呼吸机。\\n2.呼吸机带内置锂电池,方便手提移动。\\n3.内置压缩空气发生器。\\...</td>\n",
" <td>1.吸气阀、呼气阀可徒手拆卸并能高温消毒。\\n2.呼气触发灵敏度可调节,呼气灵敏度具有自动触...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 5
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-23T14:36:34.026153Z",
"start_time": "2024-12-23T14:36:34.017240Z"
}
},
"cell_type": "code",
"source": [
"re_recognize_title = re.compile(r'^[一二三四五六七]、.+?(?:(.*))?$')\n",
"re_extract_term = re.compile(r'^★?▲?#?\\d{1,2}、(?:.+?)?([^]*)$')\n",
"re_extract_term1 = re.compile(r'^★?▲?#?\\d{1,2}\\.\\d{1,2}、(.+)$')\n",
"datadir = r'../data/raw/SV300'\n",
"datafile = os.listdir(datadir)\n",
"# datafile.sort(reverse=False)\n",
"def extract(s):\n",
" g = re_recognize_title.match(s)\n",
" if g:\n",
" return 1, g.group(1)\n",
" g = re_extract_term.match(s)\n",
" if g:\n",
" return 1, g.group(1)\n",
" g = re_extract_term1.match(s)\n",
" if g:\n",
" return 1, g.group(1)\n",
" return 0, s\n",
"for filename in datafile:\n",
" terms = []\n",
" with open(os.path.join(datadir, filename), mode='r', encoding='utf-8') as f:\n",
" s = f.readline().rstrip()\n",
" while s:\n",
" matched, term = extract(s)\n",
" if matched and term:\n",
" print(term)\n",
" terms.append(term)\n",
" elif term:\n",
" terms[-1] += term\n",
" s = f.readline().rstrip()\n",
" terms.append(term)\n",
" print(terms[:3])\n",
" break"
],
"id": "f971958e704d5118",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"适用于成人、小儿和婴幼儿患者通气辅助及呼吸支持\n",
"彩色液晶触摸显示屏≥10英寸分辨率≥1024×768\n",
"波形显示通道≥3道\n",
"具备呼吸波形及环图冻结功能;\n",
"具备动态肺视图;\n",
"监测参数回顾可回顾≥72h全部监测参数趋势图\n",
"报警及操作日志存储≥5000条\n",
"具备USB接口导出数据功能\n",
"后备电池续航能力≥90min\n",
"吸气阀、呼吸阀组件可拆卸可耐受≥134℃高温高压消毒\n",
"具备开机自检和图形化及文字提示功能\n",
"网络传输端口具备LAN、WIFI接口\n",
"呼吸模式及功能\n",
"通气模式≥6种至少包括容量控制/辅助通气模式、容量同步间歇指令通气模式容量模式流速波形可调方波、50%递减波和100%递减波)、压力控制/辅助通气模式、压力同步间歇指令通气模式。\n",
"具备高流速氧疗功能;\n",
"氧疗模式流速≥80L/min可计时\n",
"具备呼吸同步技术;\n",
"具备手动呼吸、吸气保持、呼气保持、同步雾化、纯氧灌注、智能吸痰功能;\n",
"具有气管插管阻力自动补偿功能,导管孔径和补偿百分比可设;\n",
"潮气量25mL—2000mL\n",
"呼吸频率2—100次/min\n",
"吸气流速10—180L/min\n",
"SIMV频率2—60次/min\n",
"吸呼比4-1—1-10\n",
"最大峰值流速≥210L/min\n",
"吸气压力6-80 cmH2O\n",
"压力支持0-80cmH2O\n",
"PEEP范围0-50cmH2O\n",
"吸气时间范围0.1-10s\n",
"监测参数\n",
"具备气道压力监测功能;\n",
"具备潮气量监测功能;\n",
"具备呼吸频率监测功能,包括总呼吸频率、自主呼吸频率、机控呼吸频率;\n",
"具备肺力学参数监测功能;\n",
"具备气道压力过高报警功能;\n",
"具备分钟通气量过高/过低报警功能;\n",
"具备呼出潮气量过高/过低报警功能;\n",
"具备呼吸频率过高/过低报警功能;\n",
"具备窒息报警功能报警时间5-60s可调\n",
"5台\n",
"3个\n",
"10套\n",
"5个\n",
"['适用于成人、小儿和婴幼儿患者通气辅助及呼吸支持', '彩色液晶触摸显示屏≥10英寸分辨率≥1024×768', '波形显示通道≥3道']\n"
]
}
],
"execution_count": 18
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-23T14:36:35.005647Z",
"start_time": "2024-12-23T14:36:35.002496Z"
}
},
"cell_type": "code",
"source": [
"g = re_extract_term.match('1、基本要求')\n",
"print(g)"
],
"id": "768a61cabaa0de01",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<re.Match object; span=(0, 7), match='1、基本要求'>\n"
]
}
],
"execution_count": 19
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-23T14:36:38.424669Z",
"start_time": "2024-12-23T14:36:38.421062Z"
}
},
"cell_type": "code",
"source": "g[1]",
"id": "1691d82060cdf339",
"outputs": [
{
"data": {
"text/plain": [
"''"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 20
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-23T10:02:35.443676Z",
"start_time": "2024-12-23T10:02:35.440726Z"
}
},
"cell_type": "code",
"source": "",
"id": "c4e5067760988489",
"outputs": [],
"execution_count": 83
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-23T10:03:20.061896Z",
"start_time": "2024-12-23T10:03:20.057321Z"
}
},
"cell_type": "code",
"source": "datafile",
"id": "e2cbed3207d7d46c",
"outputs": [
{
"data": {
"text/plain": [
"['2023年12月北京清华长庚医院自主采购.txt',\n",
" '2023年12月天津市第一中心医院自主采购.txt',\n",
" '2023年12月广西医科大学第一附属医院自主采购.txt',\n",
" '2023年8月黑龙江中医药大学附属第二医院自主采购.txt',\n",
" '2024年10月西南大学自主采购.txt',\n",
" '2024年11月青岛市市立医院自主采购.txt',\n",
" '2024年1月新乡医学院自主采购.txt',\n",
" '2024年1月首都医科大学附属北京友谊医院 顺义院区自主采购.txt',\n",
" '2024年9月云南省肿瘤医院自主采购.txt',\n",
" '2024年9月贵州医科大学附属医院自主采购.txt']"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 88
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-23T10:34:18.178181Z",
"start_time": "2024-12-23T10:34:18.171860Z"
}
},
"cell_type": "code",
"source": [
"datafile.sort()\n",
"datafile"
],
"id": "d845f16ca0762041",
"outputs": [
{
"data": {
"text/plain": [
"['2023年12月北京清华长庚医院自主采购.txt',\n",
" '2023年12月天津市第一中心医院自主采购.txt',\n",
" '2023年12月广西医科大学第一附属医院自主采购.txt',\n",
" '2023年8月黑龙江中医药大学附属第二医院自主采购.txt',\n",
" '2024年10月西南大学自主采购.txt',\n",
" '2024年11月青岛市市立医院自主采购.txt',\n",
" '2024年1月新乡医学院自主采购.txt',\n",
" '2024年1月首都医科大学附属北京友谊医院 顺义院区自主采购.txt',\n",
" '2024年9月云南省肿瘤医院自主采购.txt',\n",
" '2024年9月贵州医科大学附属医院自主采购.txt']"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 95
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-23T10:35:08.728504Z",
"start_time": "2024-12-23T10:35:08.723103Z"
}
},
"cell_type": "code",
"source": [
"temp = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']\n",
"temp.sort(reverse=True)\n",
"temp"
],
"id": "5f0cbbd84c2e2dbb",
"outputs": [
{
"data": {
"text/plain": [
"['9', '8', '7', '6', '5', '4', '3', '2', '12', '11', '10', '1']"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 97
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "8fd7e3611b128add"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}