import re import time import pandas as pd import requests from tqdm import tqdm from bs4 import BeautifulSoup domain = "https://www.data.jma.go.jp/" index_url = "https://www.data.jma.go.jp/svd/eqev/data/daily_map/index.html" res = requests.get(index_url) soup = BeautifulSoup(res.content, "html.parser") eq_link = [i.get("href") for i in soup.find_all("a") if len(i.get("href"))==13] print(len(eq_link))
# 地震データをテキストから取得 def data_pick(text): row_data = [] for row in [i for i in text.split("\n") if len(i)>1][2:]: row = row.replace("° ","°") for i in range(7,1,-1): row = row.replace(" "*i, " ") row = row.replace(":"," ") tmp = row.split(" ") row_data.append(tmp[:-1]) return row_data all_data = [] for day in tqdm(eq_link): url = "https://www.data.jma.go.jp/svd/eqev/data/daily_map/" + day #print(url) res = requests.get(url) soup = BeautifulSoup(res.content, "html.parser") time.sleep(3.971) text_data = soup.pre.text all_data += data_pick(text_data) #break print(len(all_data))
columns=["年","月","日","時","分","秒","緯度","経度","深さ(km)","M","震央地名"] df = pd.DataFrame(all_data,columns=columns) df = df[df["M"] != "-"].reset_index(drop=True) df = df.astype({"M": float , "深さ(km)":int, "年":int,"月":int,"日":int}) df["M2"] = df["M"]//1 df["年月日"] = df.apply(lambda x : "{0}{1:02}{2:02}".format(x["年"],x["月"],x["日"]),axis=1) def lat_lon_10(x): tmp = x.split("°") degree = int(tmp[0]) minute = int(tmp[1].split(".")[0]) second = int(tmp[1].split(".")[1].split("'")[0]) # 度、分、秒を10進法で表現 decimal_degree = degree + minute/60 + second/3600 return decimal_degree df["緯度10"] = df["緯度"].map(lat_lon_10) df["経度10"] = df["経度"].map(lat_lon_10) df["size"] = df["M2"] ** 3
df[["M2","震央地名"]].groupby("M2").count().sort_index(ascending=False)
import plotly.express as px fig = px.scatter_mapbox( data_frame=df[df["M2"]>=5], lat="緯度10", lon="経度10", hover_data=["年月日","深さ(km)"], color="M2", size="size", size_max=20, opacity=0.3, zoom=4, height=700, width=1500) fig.update_layout(mapbox_style='open-street-map') fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) fig.show()
# コードのクローン git clone https://github.com/richmanbtc/mlbot_tutorial.git # Dockerの起動 cd mlbot_tutorial docker-compose up -d # Jupyterの起動 # http://localhost:8888 をブラウザで開く
# Jupyterの人は全部 ccxt : 仮想通貨取引用のライブラリ TA-lib : 指標の計算ライブラリ # Colabの人は不要 numba : Pythonの高速化ライブラリ scikit-learn : 機械学習用ライブラリ lightGBM : 機械学習のモデル構築ライブラリ
import urllib.request import os import time from datetime import datetime, timedelta # SSLの問題が有ったら追加 import ssl ssl._create_default_https_context = ssl._create_unverified_context def get_date_range(start_date, end_date): start = datetime.strptime(start_date, '%Y-%m-%d') end = datetime.strptime(end_date, '%Y-%m-%d') date_range, current_date= [],start while current_date <= end: date_range.append((current_date.year, current_date.month, current_date.day)) current_date += timedelta(days=1) return date_range def get_data(market,start_date,end_date,data_dir = "data"): data_dir = f"{data_dir}/{market}" url_base = 'https://api.coin.z.com/data/trades/{0}/{1}/{2:02}/{1}{2:02}{3:02}_{0}.csv.gz' dates = get_date_range(start_date, end_date) for d in dates: year,month,day = d url = url_base.format( market, year, month, day ) file_name = os.path.basename(url) if not os.path.exists(f"{data_dir}/{year}"): os.makedirs(f"{data_dir}/{year}") save_path = os.path.join(f"{data_dir}/{year}", file_name) urllib.request.urlretrieve(url, save_path) time.sleep(1.37)
# マーケットと開始日、終了日を指定 market = "BTC_JPY" start_date = "2024-01-01" end_date = "2024-02-22" get_data(market,start_date,end_date)
import os import glob def make_df(file_path,interval_sec): df = pd.read_csv(file_path) df = df.rename(columns={'symbol': 'market',}) df['price'] = df['price'].astype('float64') df['size'] = df['size'].astype('float64') df['timestamp'] = pd.to_datetime(df['timestamp'], utc=True) df['side'] = np.where(df['side'] == 'BUY', 1, -1).astype('int8') df['timestamp'] = df['timestamp'].dt.floor('{}S'.format(interval_sec)) df.set_index("timestamp", inplace=True) volume = df.groupby('timestamp')['size'].sum().rename('volume') ohlc = df['price'].resample('1T').ohlc() df_merged = ohlc.merge(volume, left_index=True, right_index=True,how="left") df_merged = df_merged.fillna(method="ffill") df_merged = df_merged.rename(columns={"open":"op","high":"hi","low":"lo","close":"cl"}) return df_merged
# データの保存場所を指定 data_dir = "data/BTC_JPY/2024/" interval_sec = 60 df = pd.DataFrame() for file_path in sorted(glob.glob(data_dir + "*.gz")): tmp_df = make_df(file_path,60) df = pd.concat([df,tmp_df],axis=0)
df[["cl"]].plot(figsize=(12,4)) plt.show()
maker_fee_history = [ { # https://coin.z.com/jp/news/2020/08/6482/ # 変更時刻が記載されていないが、定期メンテナンス後と仮定 'changed_at': '2020/08/05 06:00:00Z', 'maker_fee': -0.00035 }, { # https://coin.z.com/jp/news/2020/08/6541/ 'changed_at': '2020/09/09 06:00:00Z', 'maker_fee': -0.00025 }, { # https://coin.z.com/jp/news/2020/10/6686/ 'changed_at': '2020/11/04 06:00:00Z', 'maker_fee': 0.0 }, ### 追加 { # 現在値 'changed_at': '2023/08/05 06:00:00Z', 'maker_fee': -0.0003 }, ]
feature_importance = model.feature_importances_ # 特徴量名と重要度を紐づける feature_importance_df = pd.DataFrame({'Feature': features, 'Importance': feature_importance}) # 重要度の降順でソート feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False).reset_index(drop=True) # 結果の表示(トップ10) feature_importance_df.head(10)
import pandas_datareader.data as web web.DataReader(銘柄コード, data_source='stooq', start=開始日,end=終了日)株価は銘柄ごとのデータになっているので
# ライブラリのインポート import os import datetime as dt import pandas_datareader.data as web import plotly.graph_objects as go import pandas as pd # 銘柄コード入力(7203はトヨタ自動車) code = "7203" symbol = code + ".JP" # 2023-01-01以降の株価取得 start , end = '2023-01-01', dt.date.today() #データ取得 df = web.DataReader(symbol, data_source='stooq', start=start,end=end) df.insert(0, "code", code, allow_duplicates=False) df = df.sort_index() df.tail(3)
# 移動平均を計算する期間 period = 20 # 移動平均を計算 df[f'MA{period}'] = df['Close'].rolling(window=period).mean() # ボリンジャーバンドを計算 std = df['Close'].rolling(window=period).std() df['Upper Band1'] = df[f'MA{period}'] + 1 * std df['Upper Band2'] = df[f'MA{period}'] + 2 * std df['Upper Band3'] = df[f'MA{period}'] + 3 * std df['Lower Band1'] = df[f'MA{period}'] - 1 * std df['Lower Band2'] = df[f'MA{period}'] - 2 * std df['Lower Band3'] = df[f'MA{period}'] - 3 * std
# プロット用のデータを作成 data = [ go.Candlestick( x=df.index, open=df['Open'], high=df['High'], low=df['Low'], close=df['Close'], name='株価', ), go.Scatter(x=df.index,y=df['Upper Band2'],mode='lines',name='+2@',), go.Scatter(x=df.index,y=df['Upper Band1'],mode='lines',name='+1@',), go.Scatter(x=df.index,y=df[f'MA{period}'],mode='lines',name='移動平均',), go.Scatter(x=df.index,y=df['Lower Band2'],mode='lines',name='-2@',), go.Scatter(x=df.index,y=df['Lower Band1'],mode='lines',name='-1@',), ] # レイアウト設定 layout = go.Layout( title='株価の移動平均とボリンジャーバンド', xaxis=dict( title='日付', tickformat='%Y-%m-%d' ), yaxis=dict(title='株価'), ) # グラフを描画 fig = go.Figure(data=data, layout=layout) fig.show()
# RSIを計算する関数 def calculate_rsi(data, window=14): delta = data.diff() up = delta.copy() down = delta.copy() up[up < 0] = 0 down[down > 0] = 0 avg_gain = up.rolling(window).mean() avg_loss = abs(down.rolling(window).mean()) rs = avg_gain / avg_loss rsi = 100 - (100 / (1 + rs)) return rsi # ストキャスティクスを計算する関数 def calculate_stochastic_oscillator(data, window=14): lowest_low = data.rolling(window).min() highest_high = data.rolling(window).max() stochastic_oscillator = (data - lowest_low) / (highest_high - lowest_low) * 100 return stochastic_oscillator # MACDを計算する関数 def calculate_macd(data, short_window=12, long_window=26, signal_window=9): short_ema = data.ewm(span=short_window, adjust=False).mean() long_ema = data.ewm(span=long_window, adjust=False).mean() macd = short_ema - long_ema signal_line = macd.ewm(span=signal_window, adjust=False).mean() histogram = macd - signal_line return macd, signal_line, histogram # RSIを計算 df['RSI'] = calculate_rsi(df['Close']) # ストキャスティクスを計算 df['%K'] = calculate_stochastic_oscillator(df['Close']) df['%D'] = df['%K'].rolling(3).mean() # MACDを計算 macd, signal_line, histogram = calculate_macd(df['Close']) df['MACD'] = macd df['Signal'] = signal_line df['Histogram'] = histogram
# MACD fig = go.Figure( data=[ go.Candlestick( x=df.index, open=df['Open'], high=df['High'], low=df['Low'], close=df['Close'], name='株価', yaxis='y1' ), go.Scatter(x=df.index, y=df['MACD'], line=dict(color='red', width=1), name="macd", yaxis='y2'), go.Scatter(x=df.index, y=df['Signal'], line=dict(color='blue', width=1), name="signal", yaxis='y2') ], layout = go.Layout( xaxis=dict( title='日付', tickformat='%Y-%m-%d' ), yaxis = dict(title='株価', side='left', showgrid=False, range=[df.Low.min(), df.High.max()]), yaxis2 = dict(title='指標', side='right', overlaying='y', range=[min(df.Signal.min(), df.MACD.min()), max(df.MACD.max(), df.Signal.max())] ) ) ) fig.show()
# RSI fig = go.Figure( data=[ go.Candlestick( x=df.index, open=df['Open'], high=df['High'], low=df['Low'], close=df['Close'], name='株価', yaxis='y1' ), go.Scatter(x=df.index, y=df['RSI'], line=dict(color='blue', width=1), name="RSI", yaxis='y2'), ], layout = go.Layout( xaxis=dict( title='日付', tickformat='%Y-%m-%d' ), yaxis = dict(title='株価', side='left', showgrid=False, range=[df.Low.min(), df.High.max()]), yaxis2 = dict(title='指標', side='right', overlaying='y', range=[df.RSI.min(), df.RSI.max()] ) ) ) fig.show()
# ストキャスティクス fig = go.Figure( data=[ go.Candlestick( x=df.index, open=df['Open'], high=df['High'], low=df['Low'], close=df['Close'], name='株価', yaxis='y1' ), go.Scatter(x=df.index, y=df['%K'], line=dict(color='blue', width=1), name="%K", yaxis='y2'), go.Scatter(x=df.index, y=df['%D'], line=dict(color='red', width=1), name="%D", yaxis='y2') ], layout = go.Layout( xaxis=dict( title='日付', tickformat='%Y-%m-%d' ), yaxis = dict(title='株価', side='left', showgrid=False, range=[df.Low.min(), df.High.max()]), yaxis2 = dict(title='指標', side='right', overlaying='y', range=[min(df['%K'].min(), df['%D'].min()), max(df['%K'].max(), df['%D'].max())] ) ) ) fig.show()
# トレードのシミュレーションを行う関数 def simulate_trades(df): positions = [] # トレードのポジション(買い or 売り) entry_prices = [] # エントリー価格 exit_prices = [] # エグジット価格 pnl = [] # 損益 for i in range(len(df)): if i == 0: positions.append(None) entry_prices.append(None) exit_prices.append(None) pnl.append(None) else: if df['MACD'][i] > df['Signal'][i] and df['MACD'][i-1] <= df['Signal'][i-1]: # MACDがシグナルを上回った時に買いポジション positions.append('Buy') entry_prices.append(df['Close'][i]) exit_prices.append(None) pnl.append(None) elif df['MACD'][i] < df['Signal'][i] and df['MACD'][i-1] >= df['Signal'][i-1]: # MACDがシグナルを下回った時に売りポジション positions.append('Sell') entry_prices.append(df['Close'][i]) exit_prices.append(None) pnl.append(None) elif positions[-1] == 'Buy': # 買いポジションを保持している場合 positions.append('Buy') entry_prices.append(entry_prices[-1]) exit_prices.append(df['Close'][i]) pnl.append(df['Close'][i] - entry_prices[-1]) elif positions[-1] == 'Sell': # 売りポジションを保持している場合 positions.append('Sell') entry_prices.append(entry_prices[-1]) exit_prices.append(df['Close'][i]) pnl.append(entry_prices[-1] - df['Close'][i]) else: positions.append(None) entry_prices.append(None) exit_prices.append(None) pnl.append(None) df['Position'] = positions df['Entry Price'] = entry_prices df['Exit Price'] = exit_prices df['P&L'] = pnl return df # トレードのシミュレーションを実行 df = simulate_trades(df)
# プロット用のデータを作成 buy_indices = df[df['Position'] == 'Buy'].index sell_indices = df[df['Position'] == 'Sell'].index fig = go.Figure() # Closeをプロット fig.add_trace(go.Scatter(x=df.index, y=df['Close'], name='Close', mode='lines')) # Entry Priceをプロット fig.add_trace(go.Scatter(x=df.loc[buy_indices].index, y=df.loc[buy_indices]['Entry Price'], name='Entry Price (BUY)', mode='markers', marker=dict(color='blue', size=8))) fig.add_trace(go.Scatter(x=df.loc[sell_indices].index, y=df.loc[sell_indices]['Entry Price'], name='Entry Price (SELL)', mode='markers', marker=dict(color='red', size=8))) # Exit Priceをプロット fig.add_trace(go.Scatter(x=df.loc[buy_indices].index, y=df.loc[buy_indices]['Exit Price'], name='Exit Price (BUY)', mode='markers', marker=dict(color='blue', size=8, symbol='triangle-up'))) fig.add_trace(go.Scatter(x=df.loc[sell_indices].index, y=df.loc[sell_indices]['Exit Price'], name='Exit Price (SELL)', mode='markers', marker=dict(color='red', size=8, symbol='triangle-down'))) # P&Lをプロット(二番目の軸) fig.add_trace(go.Scatter(x=df.index, y=df['P&L'], name='P&L', mode='lines', yaxis='y2')) # レイアウト設定 fig.update_layout(title='Trading Simulation', xaxis_title='Date', yaxis=dict(title='Price', side='left'), yaxis2=dict(title='P&L', side='right', overlaying='y', showgrid=False), legend=dict(x=0, y=1), hovermode='x unified') # グラフの表示 fig.show()
import random import sys from PIL import Image import matplotlib.image as mpimg import matplotlib.pyplot as plt sys.setrecursionlimit(10**6) # 迷路を作成する def make_maze(size): def make(ny, nx,tmp_maze): ar = list(range(4)) random.shuffle(ar) for i in ar: if ny+dy[i][1]<1 or ny+dy[i][1]>=size[0]: continue if nx+dx[i][1]<1 or nx+dx[i][1]>=size[1]: continue if tmp_maze[ny+dy[i][1]][nx+dx[i][1]]==0: continue for j in range(2): tmp_maze[ny+dy[i][j]][nx+dx[i][j]]=0 make(ny+dy[i][1], nx+dx[i][1],tmp_maze) tmp_maze = [[1]*size[1] for _ in range(size[0])] dx,dy = [(1,2), (-1,-2), (0,0), (0,0)],[(0,0), (0,0), (1,2), (-1,-2)] make(1, 1,tmp_maze) tmp_maze[1][1],tmp_maze[size[0]-2][size[1]-2] = 2,3 return tmp_maze # 画像を作成する def create_image_from_maze(maze, file_path,pixel_size=20): # 色のマッピング colors = { 0: (255, 255, 255), # 白 1: (0, 0, 0), # 黒 2: (0, 0, 255), # 青 3: (0, 255, 0) # 緑 } width = len(maze[0]) * pixel_size height = len(maze) * pixel_size image = Image.new("RGB", (width, height), "white") pixels = image.load() for y in range(len(maze)): for x in range(len(maze[0])): color = colors.get(maze[y][x], (255, 255, 255)) for dy in range(pixel_size): for dx in range(pixel_size): pixels[x * pixel_size + dx, y * pixel_size + dy] = color image.save(file_path) return image
# 迷路を作成する size = (31, 101) # height , width maze = make_maze(size) # 画像を生成 image = create_image_from_maze(maze,"image.png") plt.figure(figsize=(16,10)) plt.imshow(image, cmap='gray') plt.axis('off') plt.show()
!pip3 install pedalboard
import librosa from pedalboard.io import AudioFile from pedalboard import ( Pedalboard, Gain, Chorus, Phaser, Reverb, Compressor, LadderFilter, ) import IPython.display as display from matplotlib import pyplot as pltlibrosaは音声用のライブラリです。
# サンプル音源を読み込む audio, sr = librosa.load(librosa.example('trumpet', hq=True), sr=44100) print(type(audio)) print(audio.size) display.Audio(audio, rate=sr)
変換後の音声の変数 = Pedalboard([エフェクト関数 , ・・・])
# コンプレッサー、ゲイン、コーラス board = Pedalboard([ Compressor(threshold_db=-50, ratio=25), Gain(gain_db=30), Chorus(), ]) effected_audio = board(audio ,sr)
# エフェクトかけた音源を再生 display.Audio(effected_audio, rate=sr)
# オリジナル音源 plt.figure() plt.subplot(2, 1, 1) librosa.display.waveshow(audio, sr=sr, color='b') plt.title('Original') # エフェクトかけた音源 plt.subplot(2, 1, 2) librosa.display.waveshow(effected_audio, sr=sr, color='r') plt.title('Effected') plt.tight_layout() plt.show()
# オーディオファイルの保存 with AudioFile('ファイル名.wav', 'w', sr, num_channels=1) as f: f.write(effected_audio)
!curl -sSfL https://raw.githubusercontent.com/VOICEVOX/voicevox_core/8cf307df4412dc0db0b03c6957b83b032770c31a/scripts/downloads/download.sh | bash -s cd voicevox_core/ !wget https://github.com/VOICEVOX/voicevox_core/releases/download/0.14.1/voicevox_core-0.14.1+cpu-cp38-abi3-linux_x86_64.whl !pip install voicevox_core-0.14.1+cpu-cp38-abi3-linux_x86_64.whl !wget https://raw.githubusercontent.com/VOICEVOX/voicevox_core/406f6c41408836840b9a38489d0f670fb960f412/example/python/run.py
dirs = './open_jtalk_dic_utf_8-1.11' out_path = '../out.wav' text = 'テキスト' speaker_id = スピーカーID !python ./run.py --dict-dir $dirs --text $text --out $out_path --speaker-id $speaker_idout_pathで指定した場所に音声ファイルが出力されます。
キャラクター | 口調 | キャラクターID |
---|---|---|
四国めたん | ノーマル | 2 |
四国めたん | あまあま | 0 |
四国めたん | ツンツン | 6 |
四国めたん | セクシー | 4 |
四国めたん | ささやき | 36 |
四国めたん | ヒソヒソ | 37 |
ずんだもん | ノーマル | 3 |
ずんだもん | あまあま | 1 |
ずんだもん | ツンツン | 7 |
ずんだもん | セクシー | 5 |
ずんだもん | ささやき | 22 |
ずんだもん | ヒソヒソ | 38 |
春日部つむぎ | ノーマル | 8 |
雨晴はう | ノーマル | 10 |
波音リツ | ノーマル | 9 |
玄野武宏 | ノーマル | 11 |
玄野武宏 | 喜び | 39 |
玄野武宏 | ツンギレ | 40 |
玄野武宏 | 悲しみ | 41 |
白上虎太郎 | ふつう | 12 |
白上虎太郎 | わーい | 32 |
白上虎太郎 | びくびく | 33 |
白上虎太郎 | おこ | 34 |
白上虎太郎 | びえーん | 35 |
青山龍星 | ノーマル | 13 |
冥鳴ひまり | ノーマル | 14 |
九州そら | ノーマル | 16 |
九州そら | あまあま | 15 |
九州そら | ツンツン | 18 |
九州そら | セクシー | 17 |
九州そら | ささやき | 19 |
もち子さん | ノーマル | 20 |
剣崎雌雄 | ノーマル | 21 |
WhiteCUL | ノーマル | 23 |
WhiteCUL | たのしい | 24 |
WhiteCUL | かなしい | 25 |
WhiteCUL | びえーん | 26 |
後鬼 | 人間ver. | 27 |
後鬼 | ぬいぐるみver. | 28 |
No.7 | ノーマル | 29 |
No.7 | アナウンス | 30 |
No.7 | 読み聞かせ | 31 |
ちび式じい | ノーマル | 42 |
櫻歌ミコ | ノーマル | 43 |
櫻歌ミコ | 第二形態 | 44 |
櫻歌ミコ | ロリ | 45 |
小夜/SAYO | ノーマル | 46 |
ナースロボ_タイプT | ノーマル | 47 |
ナースロボ_タイプT | 楽々 | 48 |
ナースロボ_タイプT | 恐怖 | 49 |
ナースロボ_タイプT | 内緒話 | 50 |
dirs = './open_jtalk_dic_utf_8-1.11' out_path = '../out.wav' text = 'どうも、ずんだもんです。よろしくね' speaker_id = 1 !python ./run.py --dict-dir $dirs --text $text --out $out_path --speaker-id $speaker_id from IPython.display import Audio Audio('../out.wav', autoplay=True)
!pip install AnimatedWordCloudTimelapse==0.9.5
from AnimatedWordCloud import animate, Config config = Config(output_path="/content/") import pandas as pd from datetime import datetime as dt from sklearn.feature_extraction.text import CountVectorizerconfigではアウトプット先の設定を行えます。
df = pd.read_table("https://raw.githubusercontent.com/konbraphat51/AnimatedWordCloudExampleElon/main/elonmusk_tweets.csv",sep=",") df["created_at"] = pd.to_datetime(df["created_at"]) df.head()
dfs_by_year = [] for year in range(2012, 2018): df_year = df[dt(year, 1, 1) <= df["created_at"] ] df_year = df_year[dt(year+1, 1, 1) > df_year["created_at"]] dfs_by_year.append(df_year) print(df_year.shape) stopwords = ["the", "co", "http", "to", "of", "and", "on", "for", "just", "that", "with", "by", "is", "in", "at", "will"] min_df = 10 counter = CountVectorizer(stop_words=stopwords) dicts_by_year = [] for cnt in range(len(dfs_by_year)): np_year = counter.fit_transform(dfs_by_year[cnt]["text"]).toarray() df_year = pd.DataFrame(np_year, columns=counter.get_feature_names_out()) sr_sum = df_year.sum(axis=0).sort_values(ascending=False) sr_sum = sr_sum[sr_sum >= min_df] #to dictionary dict_year = sr_sum.to_dict() dicts_by_year.append(dict_year) wordvector_timelapse = [] for cnt in range(len(dicts_by_year)): year = 2012 + cnt wordvector_timelapse.append((year, dicts_by_year[cnt]))
result = animate(wordvector_timelapse, config)
from IPython.display import display, Image with open('/content/output.gif','rb') as f: display(Image(data=f.read(), format='png',width=600))