[問題] Django Postgresql 寫入資料停滯 BlgAtlfans PTT批踢踢實業坊

[問題] Django Postgresql 寫入資料停滯

作者: BlgAtlfans (BLG_Eric) 2016-10-04 23:37:55

各位大神好
最近小弟在寫一個上傳檔案模組
功能是把上傳上來的csv xls xlsx...等檔案轉成postgresql table
每次測試小檔案的時候都沒甚麼問題
大約幾秒鐘就完成上傳並存入postgresql
但是最近試著上傳了一個100多mb(13萬多筆)的xlsx資料時網頁會卡住
看了之後發現檔案上傳很快幾秒鐘就好了
可是後續的匯入postgresql卻遲遲沒反應卡住了
資料庫也沒有任何新增的table(本人不才沒做timeout設定)
希望各位能幫我看一下程式碼感謝
以下是upload.py的程式碼(應該是沒什麼問題還是附上):
# -*- coding: utf-8 -*-
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.contrib import messages
from django.conf import settings
from django.db import connection
from django.views.decorators.csrf import csrf_exempt
import csv
import sys
import os
import random
import psycopg2
from .models import Document,Folder
from .forms import DocumentForm
from data.write import *
def upload(request,fkey): # upload multiple files and create new table into
postgresql
tct = Document.objects.count() # Table count
fct = Folder.objects.count() # Folder count
f = DocumentForm()
folder = Folder.objects.filter(groupkey=fkey).values_list('grouptitle',
flat=True).first()
if request.method == 'POST':
f = DocumentForm(request.POST, request.FILES)
if f.is_valid():
file_count = 0 #Count how many files in this upload
tc = 0 # Count title split
for u in request.FILES.getlist('file'):
file_count = file_count+1
check_title = request.POST['title'] # get titles (one or multiple)
info = request.POST['details']
tag = request.POST['tags']
oauth = request.POST['public']
source = request.POST['source']
print(file_count)
print(len(check_title.split(",")))
if len(check_title.split(",")) != file_count: # check if title amount
and file amount are the same
messages.warning(request,"Title Format Error!!")
else:
title = check_title.split(",")
for up in request.FILES.getlist('file'): # multiple file uplaod loop
samefilercheck =
Document.objects.filter(title=title[tc],folder=fkey).count() #set a variable
to check if same file exists
if samefilercheck > 0:
messages.warning(request,"Same File Already Exists!!")
return
HttpResponseRedirect(reverse('data.views.upload',kwargs={'fkey':fkey}))
else:
pname = settings.MEDIAPATH+'\%s\\' % folder+up.name.replace("
","_").replace("(","").replace(")","")
#up.name = up.name.replace('','_')
filekey =
''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789')
for i in range(25)])
n,ext = os.path.splitext(up.name)
newdoc = Document(title=title[tc],filekey=filekey,path= u'%s' %
pname,filename= u'%s' % up.name,tablename= u'%s' %
n,filetype=ext,info=info,tag=tag,oauth=oauth,source=source,folder=fkey,version='1.0')
newdoc.file=handle_uploaded_file(up,pname)
newdoc.save()
tc = tc+1
messages.warning(request,"Upload Success!!!")
tc =0
for filename in request.FILES.getlist('file'): # call different
function to write file to table
documents = Document.objects.get(filename=filename) # judge the
filetype by its filetype
if documents.filetype =='.csv':
csvwritein(documents)
if documents.filetype =='.xls':
xlswritein(documents)
if documents.filetype =='.xlsx':
xlsxwritein(documents)
if documents.filetype =='.json':
jsonwritein(documents)
url = '/datasets/%s' % (fkey)
return HttpResponseRedirect(url)
else: # if fail,return to empty form
messages.warning(request,"Upload Fail!!!")
f = DocumentForm()
return render_to_response('upload.html',RequestContext(request,locals()))
def handle_uploaded_file(f,pn):
with open(pn, 'wb+') as destination:
for chunk in f.chunks():
destination.write(chunk)
return pn
以下是寫入postgresql的write.py程式碼(太長所以只擷取xlsx的部分):
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.core.urlresolvers import reverse
from django.contrib import messages
from django.conf import settings
from django.db import connection
from django.views.decorators.csrf import csrf_exempt
import re
import sys
import random
import psycopg2
import xlrd
import openpyxl as pyxl
from .models import Document
from .forms import DocumentForm
def xlsxwritein(doc): # write into database for file type xlsx
xlsxt = 0 # check if action is first time
conn = psycopg2.connect("dbname='apidb' user='postgres' host='localhost'
password='eric40502' port='8000'")
maincur = conn.cursor()
readcur = conn.cursor()
writecur = conn.cursor()
readcur.execute("select exists(select * from information_schema.tables
where table_name='%s')" % doc.tablename) # check if same file is already in
database
check = readcur.fetchone()[0]
row_id = 1 # used for following id field
wb = pyxl.load_workbook(doc.path)
sheetnames = wb.get_sheet_names()
ws = wb.get_sheet_by_name(sheetnames[0])
print(ws.max_row)
for rown in range(ws.max_row):
if xlsxt == 0:
if check == True: #if table exists,rename tablename in postgresql and
change the settings
checktrue(doc)
else:
tablename = '"%s"' % doc.tablename
field = [ws.cell(row=1,column=col_index).value for col_index in
range(1,ws.max_column+1)]
maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY KEY);" % tablename)
for coln in range(ws.max_column):
field[coln] = '"%s"' % field[coln] # change number to string
if field[coln] == 'ID':
field[coln] = 'original_id'
print(field)
print(field[coln])
maincur.execute("ALTER TABLE %s ADD %s CITEXT;" %
(tablename,field[coln]))
xlsxt = xlsxt+1
elif xlsxt > 0 and check == False: # not first time(insert data) and
check no same file exists
for coln in range(ws.max_column):
if coln == 0:
writecur.execute("INSERT INTO %s (%s) VALUES ('%s');"
%(tablename,field[coln],str(ws.cell(row=rown,column=coln+1).value)))
else:
writecur.execute("UPDATE %s SET %s = '%s' WHERE id = '%d';"
%(tablename,field[coln],str(ws.cell(row=rown+1,column=coln+1).value),row_id))
xlsxt = xlsxt+1
row_id = row_id+1
else:
break
conn.commit()
maincur.close()
readcur.close()
writecur.close()
conn.close()
xlsxt = 0

作者: kenduest (小州) 2016-10-05 05:47:00

正確方式要用 celery 方式丟給背景處理比較不會有問題

作者: a0919610611 (熾) 2016-10-05 13:09:00

上傳下載和寫入速度差很多吧

作者: BlgAtlfans (BLG_Eric) 2016-10-05 15:59:00

感謝各位回答但是想問為什麼會卡住? 是因為CPU?

繼續閱讀

[問題] 爬網頁一直出現的錯誤訊息！koshi0413 [問題] 如何使用__init__.py 所 import的moduleherbacin [問題] 如何建立模組，功能如同SAS的..IceBear Re: [問題] 如何在ubuntu下，安裝numpypeekar [問題] 如何在ubuntu下，安裝numpypopo14777 [問題] python multiprocessingsplasky Re: [問題] 巢狀字典的問題IMPOSSIBLEr [問題] 巢狀字典的問題vanilla1474 [問題] 執行「.py檔案」的問題grubby [討論] Python程式設計入門練習題解答交流CaptPlanet